|
1 |
| -#!/usr/bin/env python |
2 | 1 | """
|
3 |
| -A monstrosity of an election results script. Generates statewide results for races, |
4 |
| -based on county results. |
| 2 | +A monstrosity of an election results script. Calculates total votes for |
| 3 | +races and candidates, and determines if there is a winner in each race. |
5 | 4 |
|
6 |
| -This module bundles together way too much functionality and is near impossible to test, |
7 |
| -beyond eye-balling results. |
| 5 | +This module bundles together way too much functionality and is near impossible |
| 6 | +to test, beyond eye-balling results. |
8 | 7 |
|
9 | 8 | USAGE:
|
10 | 9 |
|
11 | 10 | python election_results.py
|
12 | 11 |
|
13 |
| -
|
14 | 12 | OUTPUT:
|
15 | 13 |
|
16 |
| - Generates summary_results.csv |
17 |
| -
|
| 14 | + summary_results.csv |
18 | 15 |
|
19 | 16 | """
|
20 | 17 | import csv
|
21 |
| -import datetime |
22 | 18 | import urllib
|
23 |
| -from decimal import Decimal, getcontext |
24 | 19 | from operator import itemgetter
|
25 | 20 | from collections import defaultdict
|
26 | 21 | from os.path import abspath, dirname, join
|
27 |
| -from urllib import urlretrieve |
28 | 22 |
|
29 |
| -# Set precision for all Decimals |
30 |
| -getcontext().prec = 2 |
31 | 23 |
|
32 |
| -# Download CSV of fake Virginia election results from GDocs |
| 24 | +# Download CSV of fake Virginia election results to root of project |
33 | 25 | url = "https://docs.google.com/spreadsheet/pub?key=0AhhC0IWaObRqdGFkUW1kUmp2ZlZjUjdTYV9lNFJ5RHc&output=csv"
|
34 |
| - |
35 |
| -# Download the file to the root project directory /path/to/refactorin101/ |
36 |
| -# NOTE: This will only download the file if it doesn't already exist |
37 |
| -# This approach is simplified for demo purposes. In a real-life application, |
38 |
| -# you'd likely have a considerable amount of additional code |
39 |
| -# to appropriately handle HTTP timeouts, 404s, and other real-world scenarios. |
40 |
| -# For example, you might retry a request several times after a timeout, and then |
41 |
| -# send an email alert that the site is non-responsive. |
42 | 26 | filename = join(dirname(dirname(abspath(__file__))), 'fake_va_elec_results.csv')
|
43 | 27 | urllib.urlretrieve(url, filename)
|
44 | 28 |
|
45 | 29 | # Create reader for ingesting CSV as array of dicts
|
46 | 30 | reader = csv.DictReader(open(filename, 'rb'))
|
47 | 31 |
|
48 |
| -# Normally, accessing a non-existent dictionary key would raise a KeyError. |
49 | 32 | # Use defaultdict to automatically create non-existent keys with an empty dictionary as the default value.
|
50 | 33 | # See https://pydocs2cn.readthedocs.org/en/latest/library/collections.html#defaultdict-objects
|
51 | 34 | results = defaultdict(dict)
|
|
54 | 37 | for row in reader:
|
55 | 38 | # Parse name into first and last
|
56 | 39 | row['last_name'], row['first_name'] = [name.strip() for name in row['candidate'].split(',')]
|
57 |
| - |
58 |
| - # Standardize party abbreviations |
59 |
| - party = row['party'].strip().upper() |
60 |
| - if party.startswith('GOP'): |
61 |
| - party_clean = 'REP' |
62 |
| - elif party.startswith('DEM'): |
63 |
| - party_clean = 'DEM' |
64 |
| - else: |
65 |
| - party_clean = party |
66 |
| - row['party_clean'] = party_clean |
67 |
| - |
68 |
| - # Standardize Office and district |
69 |
| - office = row['office'] |
70 |
| - if 'Rep' in office: |
71 |
| - row['office_clean'] = 'U.S. House of Representatives' |
72 |
| - row['district'] = int(office.split('-')[-1]) |
73 |
| - else: |
74 |
| - row['office_clean'] = office.strip() |
75 |
| - row['district'] = '' |
76 |
| - |
77 | 40 | # Convert total votes to an integer
|
78 | 41 | row['votes'] = int(row['votes'])
|
79 | 42 |
|
80 |
| - # Store county-level results by office/district pair, then by candidate key |
81 |
| - # Create unique candidate key from party and name, in case multiple candidates have same |
| 43 | + # Store county-level results by office/district pair, then by candidate party and raw name |
82 | 44 | race_key = (row['office'], row['district'])
|
| 45 | + # Create unique candidate key from party and name, in case multiple candidates have same |
83 | 46 | cand_key = (row['party'], row['candidate'])
|
84 | 47 | # Below, setdefault initializes empty dict and list for the respective keys if they don't already exist.
|
85 | 48 | race = results[race_key]
|
86 | 49 | race.setdefault(cand_key, []).append(row)
|
87 | 50 |
|
88 | 51 |
|
89 |
| -# Create a new set of summary results that includes each candidate's |
90 |
| -# statewide total votes, % of vote, winner flag, margin of victory, tie_race flag |
| 52 | +# Tally votes for Races and candidates and assign winners |
91 | 53 | summary = defaultdict(dict)
|
92 | 54 |
|
93 | 55 | for race_key, cand_results in results.items():
|
94 | 56 | all_votes = 0
|
95 |
| - tie_race = '' |
96 |
| - cand_totals = [] |
| 57 | + cands = [] |
97 | 58 | for cand_key, results in cand_results.items():
|
98 | 59 | # Populate a new candidate dict using one set of county results
|
99 | 60 | cand = {
|
100 |
| - 'candidate': results[0]['candidate'], |
101 | 61 | 'first_name': results[0]['first_name'],
|
102 | 62 | 'last_name': results[0]['last_name'],
|
103 | 63 | 'party': results[0]['party'],
|
104 |
| - 'party_clean': results[0]['party_clean'], |
105 | 64 | 'winner': '',
|
106 |
| - 'margin_of_vic': '', |
107 | 65 | }
|
108 | 66 | # Calculate candidate total votes
|
109 |
| - cand_statewide_total= sum([result['votes'] for result in results]) |
110 |
| - cand['votes'] = cand_statewide_total |
111 |
| - cand_totals.append(cand) |
| 67 | + cand_total_votes = sum([result['votes'] for result in results]) |
| 68 | + cand['votes'] = cand_total_votes |
112 | 69 | # Add cand totals to racewide vote count
|
113 |
| - all_votes += cand_statewide_total |
| 70 | + all_votes += cand_total_votes |
| 71 | + # And stash the candidate's data |
| 72 | + cands.append(cand) |
114 | 73 |
|
115 | 74 | # sort cands from highest to lowest vote count
|
116 |
| - sorted_cands = sorted(cand_totals, key=itemgetter('votes'), reverse=True) |
117 |
| - |
118 |
| - # Determine vote pct for each candiate |
119 |
| - for cand in sorted_cands: |
120 |
| - vote_pct = (Decimal(cand['votes']) / Decimal(all_votes)) * 100 |
121 |
| - cand['vote_pct'] = "%s" % vote_pct.to_eng_string() |
| 75 | + sorted_cands = sorted(cands, key=itemgetter('votes'), reverse=True) |
122 | 76 |
|
123 |
| - # Determine winner, if any, and assign margin of victory |
| 77 | + # Determine winner, if any |
124 | 78 | first = sorted_cands[0]
|
125 | 79 | second = sorted_cands[1]
|
126 | 80 |
|
127 |
| - if first['votes'] == second['votes']: |
128 |
| - tie_race = 'X' |
129 |
| - else: |
| 81 | + if first['votes'] != second['votes']: |
130 | 82 | first['winner'] = 'X'
|
131 |
| - mov = (Decimal(first['votes'] - second['votes']) / all_votes) * 100 |
132 |
| - first['margin_of_vic'] = "%s" % mov.to_eng_string() |
133 | 83 |
|
134 | 84 | # Get race metadata from one set of results
|
135 | 85 | result = cand_results.values()[0][0]
|
| 86 | + # Add results to output |
136 | 87 | summary[race_key] = {
|
137 |
| - 'all_votes': all_votes, |
138 |
| - 'tie_race': tie_race, |
139 | 88 | 'date': result['date'],
|
140 |
| - 'office': result['office_clean'], |
| 89 | + 'office': result['office'], |
141 | 90 | 'district': result['district'],
|
| 91 | + 'all_votes': all_votes, |
142 | 92 | 'candidates': sorted_cands,
|
143 | 93 | }
|
144 | 94 |
|
145 | 95 |
|
146 |
| -# Output CSV of summary results |
| 96 | +# Write CSV of results |
147 | 97 | outfile = join(dirname(abspath(__file__)), 'summary_results.csv')
|
148 | 98 | with open(outfile, 'wb') as fh:
|
149 | 99 | # We'll limit the output to cleanly parsed, standardized values
|
|
153 | 103 | 'district',
|
154 | 104 | 'last_name',
|
155 | 105 | 'first_name',
|
156 |
| - 'party_clean', |
| 106 | + 'party', |
157 | 107 | 'all_votes',
|
158 | 108 | 'votes',
|
159 |
| - 'vote_pct', |
160 | 109 | 'winner',
|
161 |
| - 'margin_of_vic', |
162 |
| - 'tie_race', |
163 | 110 | ]
|
164 | 111 | writer = csv.DictWriter(fh, fieldnames, extrasaction='ignore', quoting=csv.QUOTE_MINIMAL)
|
165 | 112 | writer.writeheader()
|
|
168 | 115 | for cand in cands:
|
169 | 116 | results.update(cand)
|
170 | 117 | writer.writerow(results)
|
| 118 | + |
0 commit comments