This is notebook to the election post from earlier this month.
import pandas as pd
import numpy as np
twenty_sixteen = pd.read_csv('/media/gates/Data1/data/election/election/2016.csv')
eighty_four = pd.read_csv('/media/gates/Data1/data/election/election/county.csv')
combined = twenty_sixteen
0 2013
1 2016
2 2020
3 2050
4 2060
5 2068
6 2070
7 2090
8 2100
9 2105
10 2110
11 2122
12 2130
13 2150
14 2164
15 2170
16 2180
17 2185
18 2188
19 2195
20 2198
21 2220
22 2230
23 2240
24 2261
25 2270
26 2275
27 2282
28 2290
29 1001
...
3111 54097
3112 54099
3113 54101
3114 54103
3115 54105
3116 54107
3117 54109
3118 56001
3119 56003
3120 56005
3121 56007
3122 56009
3123 56011
3124 56013
3125 56015
3126 56017
3127 56019
3128 56021
3129 56023
3130 56025
3131 56027
3132 56029
3133 56031
3134 56033
3135 56035
3136 56037
3137 56039
3138 56041
3139 56043
3140 56045
Name: STCOU, dtype: int64
combined = combined.merge(eighty_four, on=['STCOU'])
|
votesDem |
votesGOP |
total |
demPer |
gopPer |
diff |
diffPer |
state |
county |
STCOU |
Areaname |
1984 results Democrat |
1984 election result Republican |
1984 votes other |
Total cal |
Total reported |
| 0 |
93003 |
130413 |
246588 |
0.377159 |
0.52887 |
37410 |
15.17+ACU- |
AK |
Alaska |
2013 |
Aleutians East, AK |
0 |
0 |
0 |
0 |
0 |
| 1 |
93003 |
130413 |
246588 |
0.377159 |
0.52887 |
37410 |
15.17+ACU- |
AK |
Alaska |
2016 |
Aleutians West, AK |
0 |
0 |
0 |
0 |
0 |
| 2 |
93003 |
130413 |
246588 |
0.377159 |
0.52887 |
37410 |
15.17+ACU- |
AK |
Alaska |
2020 |
Anchorage, AK |
25403 |
62049 |
2702 |
90154 |
90154 |
| 3 |
93003 |
130413 |
246588 |
0.377159 |
0.52887 |
37410 |
15.17+ACU- |
AK |
Alaska |
2050 |
Bethel, AK |
0 |
0 |
0 |
0 |
0 |
| 4 |
93003 |
130413 |
246588 |
0.377159 |
0.52887 |
37410 |
15.17+ACU- |
AK |
Alaska |
2060 |
Bristol Bay, AK |
0 |
0 |
0 |
0 |
0 |
combined['1984 Diff']= combined['1984 election result Republican']-combined['1984 results Democrat']
combined['2016 Diff'] = combined['votesGOP']-combined['votesDem']
combined_data = combined[['votesDem', 'votesGOP', 'state', 'county', 'STCOU', '1984 results Democrat',
'1984 election result Republican', '1984 Diff','2016 Diff']]
sum_1984_Dem = sum(combined['1984 results Democrat'])
sum_1984_Reb = sum(combined['1984 election result Republican'])
sum_2016_Dem = sum(combined['votesDem'])
sum_2016_Reb = sum(combined['votesGOP'])
print('1984 Dem', sum_1984_Dem)
print('1984 Rep', sum_1984_Reb)
print('2016 Dem', sum_2016_Dem)
print('2016 Rep', sum_2016_Reb)
1984 Dem 37537186
1984 Rep 54372639
2016 Dem 63571033
2016 Rep 63979765
sum_1984_Reb - sum_1984_Dem
avg_diff_1984 = np.mean(combined['1984 Diff'])
avg_diff_2016 = np.mean(combined['2016 Diff'])
max_dif_1984 = np.max(combined['1984 Diff'])
max_dif_2016 = np.max(combined['2016 Diff'])
array(['votesDem', 'votesGOP', 'total', 'demPer', 'gopPer', 'diff',
'diffPer', 'state', 'county', 'STCOU', 'Areaname',
'1984 results Democrat', '1984 election result Republican',
'1984 votes other', 'Total cal', 'Total reported', '1984 Diff',
'2016 Diff'], dtype=object)
combined_data = combined[['votesDem', 'votesGOP', '1984 results Democrat', '1984 election result Republican',
'STCOU', 'state', 'county', '1984 Diff', '2016 Diff']]
Need to make the STCOU a five digit string in order to break it apart accurately
def add_zero(county):
if len(county)<5:
while len(county)<5:
county = '0'+county
return county
else:
return county
combined_data['STCOU'] = combined_data['STCOU'].apply(str)
/home/gates/anaconda3/envs/math/lib/python3.5/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
if __name__ == '__main__':
combined_data['STCOU'] = combined_data['STCOU'].apply(add_zero)
/home/gates/anaconda3/envs/math/lib/python3.5/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
if __name__ == '__main__':
combined_data['AFFGEOID'] = '0500000US'+combined_data['STCOU']
/home/gates/anaconda3/envs/math/lib/python3.5/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
if __name__ == '__main__':
no_AK = combined_data[combined_data['state']!='AK']
|
votesDem |
votesGOP |
1984 results Democrat |
1984 election result Republican |
STCOU |
state |
county |
1984 Diff |
2016 Diff |
AFFGEOID |
| 29 |
5908 |
18110 |
3366 |
8350 |
01001 |
AL |
Autauga County |
4984 |
12202 |
0500000US01001 |
| 30 |
18409 |
72780 |
7272 |
24964 |
01003 |
AL |
Baldwin County |
17692 |
54371 |
0500000US01003 |
| 31 |
4848 |
5431 |
4591 |
5459 |
01005 |
AL |
Barbour County |
868 |
583 |
0500000US01005 |
| 32 |
1874 |
6733 |
2167 |
3487 |
01007 |
AL |
Bibb County |
1320 |
4859 |
0500000US01007 |
| 33 |
2150 |
22808 |
3738 |
8508 |
01009 |
AL |
Blount County |
4770 |
20658 |
0500000US01009 |
sum_1984_Dem = sum(no_AK['1984 results Democrat'])
sum_1984_Reb = sum(no_AK['1984 election result Republican'])
sum_2016_Dem = sum(no_AK['votesDem'])
sum_2016_Reb = sum(no_AK['votesGOP'])
print('1984 Dem', sum_1984_Dem)
print('1984 Rep', sum_1984_Reb)
print('2016 Dem', sum_2016_Dem)
print('2016 Rep', sum_2016_Reb)
1984 Dem 37511783
1984 Rep 54310590
2016 Dem 60873946
2016 Rep 60197788
no_AK.to_csv('/media/gates/Data1/data/election/election/no_AK.csv')
array(['votesDem', 'votesGOP', '1984 results Democrat',
'1984 election result Republican', 'STCOU', 'state', 'county',
'1984 Diff', '2016 Diff', 'AFFGEOID'], dtype=object)
Diff_1984 = np.mean(no_AK['1984 Diff'])
Diff_2016 = np.mean(no_AK['2016 Diff'])
print(Diff_1984)
print(Diff_2016)
5398.074228791774
-217.27442159383034
max_1984 = np.max(no_AK['1984 Diff'])
max_2016 = np.max(no_AK['2016 Diff'])
print(max_1984)
print(max_2016)
|
votesDem |
votesGOP |
1984 results Democrat |
1984 election result Republican |
STCOU |
state |
county |
1984 Diff |
2016 Diff |
AFFGEOID |
| 29 |
5908 |
18110 |
3366 |
8350 |
01001 |
AL |
Autauga County |
4984 |
12202 |
0500000US01001 |
| 30 |
18409 |
72780 |
7272 |
24964 |
01003 |
AL |
Baldwin County |
17692 |
54371 |
0500000US01003 |
| 31 |
4848 |
5431 |
4591 |
5459 |
01005 |
AL |
Barbour County |
868 |
583 |
0500000US01005 |
| 32 |
1874 |
6733 |
2167 |
3487 |
01007 |
AL |
Bibb County |
1320 |
4859 |
0500000US01007 |
| 33 |
2150 |
22808 |
3738 |
8508 |
01009 |
AL |
Blount County |
4770 |
20658 |
0500000US01009 |
combined_no_AK = combined[combined['state']!='AK']
#realized I didn't include the initial totals for the elections
no_AK['2016 total'] = combined_no_AK['total']
no_AK['1984 total'] = combined_no_AK['Total cal']
/home/gates/anaconda3/envs/math/lib/python3.5/site-packages/ipykernel/__main__.py:2: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
from ipykernel import kernelapp as app
/home/gates/anaconda3/envs/math/lib/python3.5/site-packages/ipykernel/__main__.py:3: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
app.launch_new_instance()
|
votesDem |
votesGOP |
1984 results Democrat |
1984 election result Republican |
STCOU |
state |
county |
1984 Diff |
2016 Diff |
AFFGEOID |
2016 total |
1984 total |
| 29 |
5908 |
18110 |
3366 |
8350 |
01001 |
AL |
Autauga County |
4984 |
12202 |
0500000US01001 |
24661 |
11917 |
| 30 |
18409 |
72780 |
7272 |
24964 |
01003 |
AL |
Baldwin County |
17692 |
54371 |
0500000US01003 |
94090 |
33045 |
| 31 |
4848 |
5431 |
4591 |
5459 |
01005 |
AL |
Barbour County |
868 |
583 |
0500000US01005 |
10390 |
10161 |
| 32 |
1874 |
6733 |
2167 |
3487 |
01007 |
AL |
Bibb County |
1320 |
4859 |
0500000US01007 |
8748 |
5687 |
| 33 |
2150 |
22808 |
3738 |
8508 |
01009 |
AL |
Blount County |
4770 |
20658 |
0500000US01009 |
25384 |
12482 |
total_2016 = sum(no_AK['2016 total'])
total_1984 = sum(no_AK['1984 total'])
print(total_2016)
print(total_1984)
Trump_wins = no_AK[no_AK['2016 Diff']>0]
|
votesDem |
votesGOP |
1984 results Democrat |
1984 election result Republican |
STCOU |
state |
county |
1984 Diff |
2016 Diff |
AFFGEOID |
2016 total |
1984 total |
| 29 |
5908 |
18110 |
3366 |
8350 |
01001 |
AL |
Autauga County |
4984 |
12202 |
0500000US01001 |
24661 |
11917 |
| 30 |
18409 |
72780 |
7272 |
24964 |
01003 |
AL |
Baldwin County |
17692 |
54371 |
0500000US01003 |
94090 |
33045 |
| 31 |
4848 |
5431 |
4591 |
5459 |
01005 |
AL |
Barbour County |
868 |
583 |
0500000US01005 |
10390 |
10161 |
| 32 |
1874 |
6733 |
2167 |
3487 |
01007 |
AL |
Bibb County |
1320 |
4859 |
0500000US01007 |
8748 |
5687 |
| 33 |
2150 |
22808 |
3738 |
8508 |
01009 |
AL |
Blount County |
4770 |
20658 |
0500000US01009 |
25384 |
12482 |
Trump_loss = no_AK[no_AK['2016 Diff']<0]
|
votesDem |
votesGOP |
1984 results Democrat |
1984 election result Republican |
STCOU |
state |
county |
1984 Diff |
2016 Diff |
AFFGEOID |
2016 total |
1984 total |
| 34 |
3530 |
1139 |
3537 |
1697 |
01011 |
AL |
Bullock County |
-1840 |
-2391 |
0500000US01011 |
4701 |
5299 |
| 52 |
12826 |
5784 |
10955 |
9585 |
01047 |
AL |
Dallas County |
-1370 |
-7042 |
0500000US01047 |
18730 |
20718 |
| 60 |
4006 |
838 |
3675 |
1361 |
01063 |
AL |
Greene County |
-2314 |
-3168 |
0500000US01063 |
4862 |
5209 |
| 61 |
4772 |
3172 |
3289 |
2691 |
01065 |
AL |
Hale County |
-598 |
-1600 |
0500000US01065 |
8010 |
6056 |
| 65 |
151581 |
130614 |
107506 |
158362 |
01073 |
AL |
Jefferson County |
50856 |
-20967 |
0500000US01073 |
290111 |
266547 |
np.mean(Trump_loss['2016 Diff'])
#Remember that negative numbers are Clinton's votes, since we are subtracting her numbers from Trumps.
Reagan_wins = no_AK[no_AK['1984 Diff']>0]
Reagan_loss = no_AK[no_AK['1984 Diff']<0]