Elections Notebook

This is notebook to the election post from earlier this month.

import pandas as pd
import numpy as np

twenty_sixteen = pd.read_csv('/media/gates/Data1/data/election/election/2016.csv')

eighty_four = pd.read_csv('/media/gates/Data1/data/election/election/county.csv')

combined = twenty_sixteen

combined['STCOU']

0        2013
1        2016
2        2020
3        2050
4        2060
5        2068
6        2070
7        2090
8        2100
9        2105
10       2110
11       2122
12       2130
13       2150
14       2164
15       2170
16       2180
17       2185
18       2188
19       2195
20       2198
21       2220
22       2230
23       2240
24       2261
25       2270
26       2275
27       2282
28       2290
29       1001
        ...  
3111    54097
3112    54099
3113    54101
3114    54103
3115    54105
3116    54107
3117    54109
3118    56001
3119    56003
3120    56005
3121    56007
3122    56009
3123    56011
3124    56013
3125    56015
3126    56017
3127    56019
3128    56021
3129    56023
3130    56025
3131    56027
3132    56029
3133    56031
3134    56033
3135    56035
3136    56037
3137    56039
3138    56041
3139    56043
3140    56045
Name: STCOU, dtype: int64

combined = combined.merge(eighty_four, on=['STCOU'])

combined.head()

	votesDem	votesGOP	total	demPer	gopPer	diff	diffPer	state	county	STCOU	Areaname	1984 results Democrat	1984 election result Republican	1984 votes other	Total cal	Total reported
0	93003	130413	246588	0.377159	0.52887	37410	15.17+ACU-	AK	Alaska	2013	Aleutians East, AK	0	0	0	0	0
1	93003	130413	246588	0.377159	0.52887	37410	15.17+ACU-	AK	Alaska	2016	Aleutians West, AK	0	0	0	0	0
2	93003	130413	246588	0.377159	0.52887	37410	15.17+ACU-	AK	Alaska	2020	Anchorage, AK	25403	62049	2702	90154	90154
3	93003	130413	246588	0.377159	0.52887	37410	15.17+ACU-	AK	Alaska	2050	Bethel, AK	0	0	0	0	0
4	93003	130413	246588	0.377159	0.52887	37410	15.17+ACU-	AK	Alaska	2060	Bristol Bay, AK	0	0	0	0	0

combined['1984 Diff']= combined['1984 election result Republican']-combined['1984 results Democrat']

combined['2016 Diff'] = combined['votesGOP']-combined['votesDem']

combined_data = combined[['votesDem', 'votesGOP', 'state', 'county', 'STCOU', '1984 results Democrat',
                          '1984 election result Republican', '1984 Diff','2016 Diff']]

sum_1984_Dem = sum(combined['1984 results Democrat'])
sum_1984_Reb = sum(combined['1984 election result Republican'])
sum_2016_Dem = sum(combined['votesDem'])
sum_2016_Reb = sum(combined['votesGOP'])

print('1984 Dem', sum_1984_Dem)
print('1984 Rep', sum_1984_Reb)
print('2016 Dem', sum_2016_Dem)
print('2016 Rep', sum_2016_Reb)

1984 Dem 37537186
1984 Rep 54372639
2016 Dem 63571033
2016 Rep 63979765

sum_1984_Reb - sum_1984_Dem

16835453

avg_diff_1984 = np.mean(combined['1984 Diff'])
avg_diff_2016 = np.mean(combined['2016 Diff'])

avg_diff_1984

5359.902260426616

avg_diff_2016

130.1279847182426

max_dif_1984 = np.max(combined['1984 Diff'])
max_dif_2016 = np.max(combined['2016 Diff'])

max_dif_1984

max_dif_2016

combined.columns.values

array(['votesDem', 'votesGOP', 'total', 'demPer', 'gopPer', 'diff',
       'diffPer', 'state', 'county', 'STCOU', 'Areaname',
       '1984 results Democrat', '1984 election result Republican',
       '1984 votes other', 'Total cal', 'Total reported', '1984 Diff',
       '2016 Diff'], dtype=object)

combined_data = combined[['votesDem', 'votesGOP', '1984 results Democrat', '1984 election result Republican',
                         'STCOU', 'state', 'county', '1984 Diff', '2016 Diff']]

Need to make the STCOU a five digit string in order to break it apart accurately

def add_zero(county):
    if len(county)<5:
        while len(county)<5:
            county = '0'+county
        return county
            
    else:
        return county

combined_data['STCOU'] = combined_data['STCOU'].apply(str)

/home/gates/anaconda3/envs/math/lib/python3.5/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':

combined_data['STCOU'] = combined_data['STCOU'].apply(add_zero)

/home/gates/anaconda3/envs/math/lib/python3.5/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':

combined_data['AFFGEOID'] = '0500000US'+combined_data['STCOU']

/home/gates/anaconda3/envs/math/lib/python3.5/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':

no_AK = combined_data[combined_data['state']!='AK']

no_AK.head()

	votesDem	votesGOP	1984 results Democrat	1984 election result Republican	STCOU	state	county	1984 Diff	2016 Diff	AFFGEOID
29	5908	18110	3366	8350	01001	AL	Autauga County	4984	12202	0500000US01001
30	18409	72780	7272	24964	01003	AL	Baldwin County	17692	54371	0500000US01003
31	4848	5431	4591	5459	01005	AL	Barbour County	868	583	0500000US01005
32	1874	6733	2167	3487	01007	AL	Bibb County	1320	4859	0500000US01007
33	2150	22808	3738	8508	01009	AL	Blount County	4770	20658	0500000US01009

sum_1984_Dem = sum(no_AK['1984 results Democrat'])
sum_1984_Reb = sum(no_AK['1984 election result Republican'])
sum_2016_Dem = sum(no_AK['votesDem'])
sum_2016_Reb = sum(no_AK['votesGOP'])

print('1984 Dem', sum_1984_Dem)
print('1984 Rep', sum_1984_Reb)
print('2016 Dem', sum_2016_Dem)
print('2016 Rep', sum_2016_Reb)

1984 Dem 37511783
1984 Rep 54310590
2016 Dem 60873946
2016 Rep 60197788

no_AK.to_csv('/media/gates/Data1/data/election/election/no_AK.csv')

no_AK.columns.values

array(['votesDem', 'votesGOP', '1984 results Democrat',
       '1984 election result Republican', 'STCOU', 'state', 'county',
       '1984 Diff', '2016 Diff', 'AFFGEOID'], dtype=object)

Diff_1984 = np.mean(no_AK['1984 Diff'])
Diff_2016 = np.mean(no_AK['2016 Diff'])

print(Diff_1984)
print(Diff_2016)

5398.074228791774
-217.27442159383034

max_1984 = np.max(no_AK['1984 Diff'])
max_2016 = np.max(no_AK['2016 Diff'])
print(max_1984)
print(max_2016)

428741
104444

no_AK.head()

	votesDem	votesGOP	1984 results Democrat	1984 election result Republican	STCOU	state	county	1984 Diff	2016 Diff	AFFGEOID
29	5908	18110	3366	8350	01001	AL	Autauga County	4984	12202	0500000US01001
30	18409	72780	7272	24964	01003	AL	Baldwin County	17692	54371	0500000US01003
31	4848	5431	4591	5459	01005	AL	Barbour County	868	583	0500000US01005
32	1874	6733	2167	3487	01007	AL	Bibb County	1320	4859	0500000US01007
33	2150	22808	3738	8508	01009	AL	Blount County	4770	20658	0500000US01009

combined_no_AK = combined[combined['state']!='AK']

#realized I didn't include the initial totals for the elections
no_AK['2016 total'] = combined_no_AK['total']
no_AK['1984 total'] = combined_no_AK['Total cal']

/home/gates/anaconda3/envs/math/lib/python3.5/site-packages/ipykernel/__main__.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
/home/gates/anaconda3/envs/math/lib/python3.5/site-packages/ipykernel/__main__.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()

no_AK.head()

	votesDem	votesGOP	1984 results Democrat	1984 election result Republican	STCOU	state	county	1984 Diff	2016 Diff	AFFGEOID	2016 total	1984 total
29	5908	18110	3366	8350	01001	AL	Autauga County	4984	12202	0500000US01001	24661	11917
30	18409	72780	7272	24964	01003	AL	Baldwin County	17692	54371	0500000US01003	94090	33045
31	4848	5431	4591	5459	01005	AL	Barbour County	868	583	0500000US01005	10390	10161
32	1874	6733	2167	3487	01007	AL	Bibb County	1320	4859	0500000US01007	8748	5687
33	2150	22808	3738	8508	01009	AL	Blount County	4770	20658	0500000US01009	25384	12482

total_2016 = sum(no_AK['2016 total'])
total_1984 = sum(no_AK['1984 total'])
print(total_2016)
print(total_1984)

127269327
92434275

Trump_wins = no_AK[no_AK['2016 Diff']>0]

Trump_wins.head()

	votesDem	votesGOP	1984 results Democrat	1984 election result Republican	STCOU	state	county	1984 Diff	2016 Diff	AFFGEOID	2016 total	1984 total
29	5908	18110	3366	8350	01001	AL	Autauga County	4984	12202	0500000US01001	24661	11917
30	18409	72780	7272	24964	01003	AL	Baldwin County	17692	54371	0500000US01003	94090	33045
31	4848	5431	4591	5459	01005	AL	Barbour County	868	583	0500000US01005	10390	10161
32	1874	6733	2167	3487	01007	AL	Bibb County	1320	4859	0500000US01007	8748	5687
33	2150	22808	3738	8508	01009	AL	Blount County	4770	20658	0500000US01009	25384	12482

Trump_loss = no_AK[no_AK['2016 Diff']<0]

Trump_loss.head()

	votesDem	votesGOP	1984 results Democrat	1984 election result Republican	STCOU	state	county	1984 Diff	2016 Diff	AFFGEOID	2016 total	1984 total
34	3530	1139	3537	1697	01011	AL	Bullock County	-1840	-2391	0500000US01011	4701	5299
52	12826	5784	10955	9585	01047	AL	Dallas County	-1370	-7042	0500000US01047	18730	20718
60	4006	838	3675	1361	01063	AL	Greene County	-2314	-3168	0500000US01063	4862	5209
61	4772	3172	3289	2691	01065	AL	Hale County	-598	-1600	0500000US01065	8010	6056
65	151581	130614	107506	158362	01073	AL	Jefferson County	50856	-20967	0500000US01073	290111	266547

np.mean(Trump_loss['2016 Diff']) 
#Remember that negative numbers are Clinton's votes, since we are subtracting her numbers from Trumps.

-37052.85010266941

len(Trump_loss)

len(Trump_wins)

Reagan_wins = no_AK[no_AK['1984 Diff']>0]
Reagan_loss = no_AK[no_AK['1984 Diff']<0]

len(Reagan_wins)

len(Reagan_loss)