This is notebook to the election post from earlier this month.
import pandas as pd
import numpy as np

twenty_sixteen = pd.read_csv('/media/gates/Data1/data/election/election/2016.csv')
eighty_four = pd.read_csv('/media/gates/Data1/data/election/election/county.csv')
combined = twenty_sixteen
combined['STCOU']
0        2013
1        2016
2        2020
3        2050
4        2060
5        2068
6        2070
7        2090
8        2100
9        2105
10       2110
11       2122
12       2130
13       2150
14       2164
15       2170
16       2180
17       2185
18       2188
19       2195
20       2198
21       2220
22       2230
23       2240
24       2261
25       2270
26       2275
27       2282
28       2290
29       1001
        ...  
3111    54097
3112    54099
3113    54101
3114    54103
3115    54105
3116    54107
3117    54109
3118    56001
3119    56003
3120    56005
3121    56007
3122    56009
3123    56011
3124    56013
3125    56015
3126    56017
3127    56019
3128    56021
3129    56023
3130    56025
3131    56027
3132    56029
3133    56031
3134    56033
3135    56035
3136    56037
3137    56039
3138    56041
3139    56043
3140    56045
Name: STCOU, dtype: int64
combined = combined.merge(eighty_four, on=['STCOU'])
combined.head()
votesDem votesGOP total demPer gopPer diff diffPer state county STCOU Areaname 1984 results Democrat 1984 election result Republican 1984 votes other Total cal Total reported
0 93003 130413 246588 0.377159 0.52887 37410 15.17+ACU- AK Alaska 2013 Aleutians East, AK 0 0 0 0 0
1 93003 130413 246588 0.377159 0.52887 37410 15.17+ACU- AK Alaska 2016 Aleutians West, AK 0 0 0 0 0
2 93003 130413 246588 0.377159 0.52887 37410 15.17+ACU- AK Alaska 2020 Anchorage, AK 25403 62049 2702 90154 90154
3 93003 130413 246588 0.377159 0.52887 37410 15.17+ACU- AK Alaska 2050 Bethel, AK 0 0 0 0 0
4 93003 130413 246588 0.377159 0.52887 37410 15.17+ACU- AK Alaska 2060 Bristol Bay, AK 0 0 0 0 0
combined['1984 Diff']= combined['1984 election result Republican']-combined['1984 results Democrat']
combined['2016 Diff'] = combined['votesGOP']-combined['votesDem']
combined_data = combined[['votesDem', 'votesGOP', 'state', 'county', 'STCOU', '1984 results Democrat',
                          '1984 election result Republican', '1984 Diff','2016 Diff']]
sum_1984_Dem = sum(combined['1984 results Democrat'])
sum_1984_Reb = sum(combined['1984 election result Republican'])
sum_2016_Dem = sum(combined['votesDem'])
sum_2016_Reb = sum(combined['votesGOP'])
print('1984 Dem', sum_1984_Dem)
print('1984 Rep', sum_1984_Reb)
print('2016 Dem', sum_2016_Dem)
print('2016 Rep', sum_2016_Reb)
1984 Dem 37537186
1984 Rep 54372639
2016 Dem 63571033
2016 Rep 63979765
sum_1984_Reb - sum_1984_Dem
16835453
avg_diff_1984 = np.mean(combined['1984 Diff'])
avg_diff_2016 = np.mean(combined['2016 Diff'])
avg_diff_1984
5359.902260426616
avg_diff_2016
130.1279847182426
max_dif_1984 = np.max(combined['1984 Diff'])
max_dif_2016 = np.max(combined['2016 Diff'])
max_dif_1984
428741
max_dif_2016
104444
combined.columns.values
array(['votesDem', 'votesGOP', 'total', 'demPer', 'gopPer', 'diff',
       'diffPer', 'state', 'county', 'STCOU', 'Areaname',
       '1984 results Democrat', '1984 election result Republican',
       '1984 votes other', 'Total cal', 'Total reported', '1984 Diff',
       '2016 Diff'], dtype=object)
combined_data = combined[['votesDem', 'votesGOP', '1984 results Democrat', '1984 election result Republican',
                         'STCOU', 'state', 'county', '1984 Diff', '2016 Diff']]

Need to make the STCOU a five digit string in order to break it apart accurately

def add_zero(county):
    if len(county)<5:
        while len(county)<5:
            county = '0'+county
        return county
            
    else:
        return county
combined_data['STCOU'] = combined_data['STCOU'].apply(str)
/home/gates/anaconda3/envs/math/lib/python3.5/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
combined_data['STCOU'] = combined_data['STCOU'].apply(add_zero)
/home/gates/anaconda3/envs/math/lib/python3.5/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
combined_data['AFFGEOID'] = '0500000US'+combined_data['STCOU']
/home/gates/anaconda3/envs/math/lib/python3.5/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
no_AK = combined_data[combined_data['state']!='AK']
no_AK.head()
votesDem votesGOP 1984 results Democrat 1984 election result Republican STCOU state county 1984 Diff 2016 Diff AFFGEOID
29 5908 18110 3366 8350 01001 AL Autauga County 4984 12202 0500000US01001
30 18409 72780 7272 24964 01003 AL Baldwin County 17692 54371 0500000US01003
31 4848 5431 4591 5459 01005 AL Barbour County 868 583 0500000US01005
32 1874 6733 2167 3487 01007 AL Bibb County 1320 4859 0500000US01007
33 2150 22808 3738 8508 01009 AL Blount County 4770 20658 0500000US01009
sum_1984_Dem = sum(no_AK['1984 results Democrat'])
sum_1984_Reb = sum(no_AK['1984 election result Republican'])
sum_2016_Dem = sum(no_AK['votesDem'])
sum_2016_Reb = sum(no_AK['votesGOP'])
print('1984 Dem', sum_1984_Dem)
print('1984 Rep', sum_1984_Reb)
print('2016 Dem', sum_2016_Dem)
print('2016 Rep', sum_2016_Reb)
1984 Dem 37511783
1984 Rep 54310590
2016 Dem 60873946
2016 Rep 60197788
no_AK.to_csv('/media/gates/Data1/data/election/election/no_AK.csv')
no_AK.columns.values
array(['votesDem', 'votesGOP', '1984 results Democrat',
       '1984 election result Republican', 'STCOU', 'state', 'county',
       '1984 Diff', '2016 Diff', 'AFFGEOID'], dtype=object)
Diff_1984 = np.mean(no_AK['1984 Diff'])
Diff_2016 = np.mean(no_AK['2016 Diff'])
print(Diff_1984)
print(Diff_2016)
5398.074228791774
-217.27442159383034
max_1984 = np.max(no_AK['1984 Diff'])
max_2016 = np.max(no_AK['2016 Diff'])
print(max_1984)
print(max_2016)
428741
104444
no_AK.head()
votesDem votesGOP 1984 results Democrat 1984 election result Republican STCOU state county 1984 Diff 2016 Diff AFFGEOID
29 5908 18110 3366 8350 01001 AL Autauga County 4984 12202 0500000US01001
30 18409 72780 7272 24964 01003 AL Baldwin County 17692 54371 0500000US01003
31 4848 5431 4591 5459 01005 AL Barbour County 868 583 0500000US01005
32 1874 6733 2167 3487 01007 AL Bibb County 1320 4859 0500000US01007
33 2150 22808 3738 8508 01009 AL Blount County 4770 20658 0500000US01009
combined_no_AK = combined[combined['state']!='AK']
#realized I didn't include the initial totals for the elections
no_AK['2016 total'] = combined_no_AK['total']
no_AK['1984 total'] = combined_no_AK['Total cal']
/home/gates/anaconda3/envs/math/lib/python3.5/site-packages/ipykernel/__main__.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
/home/gates/anaconda3/envs/math/lib/python3.5/site-packages/ipykernel/__main__.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()

no_AK.head()
votesDem votesGOP 1984 results Democrat 1984 election result Republican STCOU state county 1984 Diff 2016 Diff AFFGEOID 2016 total 1984 total
29 5908 18110 3366 8350 01001 AL Autauga County 4984 12202 0500000US01001 24661 11917
30 18409 72780 7272 24964 01003 AL Baldwin County 17692 54371 0500000US01003 94090 33045
31 4848 5431 4591 5459 01005 AL Barbour County 868 583 0500000US01005 10390 10161
32 1874 6733 2167 3487 01007 AL Bibb County 1320 4859 0500000US01007 8748 5687
33 2150 22808 3738 8508 01009 AL Blount County 4770 20658 0500000US01009 25384 12482
total_2016 = sum(no_AK['2016 total'])
total_1984 = sum(no_AK['1984 total'])
print(total_2016)
print(total_1984)
127269327
92434275
Trump_wins = no_AK[no_AK['2016 Diff']>0]
Trump_wins.head()
votesDem votesGOP 1984 results Democrat 1984 election result Republican STCOU state county 1984 Diff 2016 Diff AFFGEOID 2016 total 1984 total
29 5908 18110 3366 8350 01001 AL Autauga County 4984 12202 0500000US01001 24661 11917
30 18409 72780 7272 24964 01003 AL Baldwin County 17692 54371 0500000US01003 94090 33045
31 4848 5431 4591 5459 01005 AL Barbour County 868 583 0500000US01005 10390 10161
32 1874 6733 2167 3487 01007 AL Bibb County 1320 4859 0500000US01007 8748 5687
33 2150 22808 3738 8508 01009 AL Blount County 4770 20658 0500000US01009 25384 12482
Trump_loss = no_AK[no_AK['2016 Diff']<0]
Trump_loss.head()
votesDem votesGOP 1984 results Democrat 1984 election result Republican STCOU state county 1984 Diff 2016 Diff AFFGEOID 2016 total 1984 total
34 3530 1139 3537 1697 01011 AL Bullock County -1840 -2391 0500000US01011 4701 5299
52 12826 5784 10955 9585 01047 AL Dallas County -1370 -7042 0500000US01047 18730 20718
60 4006 838 3675 1361 01063 AL Greene County -2314 -3168 0500000US01063 4862 5209
61 4772 3172 3289 2691 01065 AL Hale County -598 -1600 0500000US01065 8010 6056
65 151581 130614 107506 158362 01073 AL Jefferson County 50856 -20967 0500000US01073 290111 266547
np.mean(Trump_loss['2016 Diff']) 
#Remember that negative numbers are Clinton's votes, since we are subtracting her numbers from Trumps.
-37052.85010266941
len(Trump_loss)
487
len(Trump_wins)
2625
Reagan_wins = no_AK[no_AK['1984 Diff']>0]
Reagan_loss = no_AK[no_AK['1984 Diff']<0]
len(Reagan_wins)
2777
len(Reagan_loss)
333