Skip to content

Commit c5bcf0d

Browse files
committed
simplify a bit by removing office slug
1 parent 59ff1d3 commit c5bcf0d

File tree

11 files changed

+32
-64
lines changed

11 files changed

+32
-64
lines changed

elex1/election_results.py

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -65,29 +65,24 @@
6565
party_clean = party
6666
row['party_clean'] = party_clean
6767

68-
# Standardize Office and add slug and district
68+
# Standardize Office and district
6969
office = row['office']
70-
if 'Pres' in office:
71-
row['office_clean'] = office.strip()
72-
row['office_slug'] = 'president'
73-
row['district'] = ''
74-
elif 'Rep' in office:
75-
row['office_clean'] = 'U.S. Representative'
76-
row['office_slug'] = 'us-house'
70+
if 'Rep' in office:
71+
row['office_clean'] = 'U.S. House of Representatives'
7772
row['district'] = int(office.split('-')[-1])
7873
else:
7974
row['office_clean'] = office.strip()
80-
row['office_slug'] = office.strip().replace(' ', '-')
8175
row['district'] = ''
8276

8377
# Convert total votes to an integer
8478
row['votes'] = int(row['votes'])
8579

86-
# Store county-level results by office, then by candidate key
80+
# Store county-level results by office/district pair, then by candidate key
8781
# Create unique candidate key from party and name, in case multiple candidates have same
82+
race_key = (row['office'], row['district'])
8883
cand_key = (row['party'], row['candidate'])
8984
# Below, setdefault initializes empty dict and list for the respective keys if they don't already exist.
90-
race = results[row['office']]
85+
race = results[race_key]
9186
race.setdefault(cand_key, []).append(row)
9287

9388

@@ -143,7 +138,6 @@
143138
'tie_race': tie_race,
144139
'date': result['date'],
145140
'office': result['office_clean'],
146-
'office_slug': result['office_slug'],
147141
'district': result['district'],
148142
'candidates': sorted_cands,
149143
}
@@ -155,7 +149,7 @@
155149
# We'll limit the output to cleanly parsed, standardized values
156150
fieldnames = [
157151
'date',
158-
'office_slug',
152+
'office',
159153
'district',
160154
'last_name',
161155
'first_name',

elex2/election_results.py

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -52,19 +52,13 @@ def clean_party(party):
5252
return party_clean
5353

5454
def clean_office(office):
55-
if 'Pres' in office:
56-
office_clean = office.strip()
57-
office_slug = 'president'
58-
district = ''
59-
elif 'Rep' in office:
55+
if 'Rep' in office:
6056
office_clean = 'U.S. House of Representatives'
61-
office_slug = 'us-house'
6257
district = int(office.split('-')[-1])
6358
else:
6459
office_clean = office.strip()
65-
office_slug = office.strip().replace('.', '').replace(' ', '-').lower()
6660
district = ''
67-
return office_clean, office_slug, district
61+
return office_clean, district
6862

6963

7064
#### PRIMARY FUNCS ####
@@ -110,14 +104,15 @@ def parse_and_clean(path):
110104
# Perform some data clean-ups and conversions
111105
row['last_name'], row['first_name'] = [name.strip() for name in row['candidate'].split(',')]
112106
row['party_clean'] = clean_party(row['party'])
113-
row['office_clean'], row['office_slug'], row['district'] = clean_office(row['office'])
107+
row['office_clean'], row['district'] = clean_office(row['office'])
114108
row['votes'] = int(row['votes'])
115109

116-
# Store county-level results by office, then by candidate key
110+
# Store county-level results by office/district pair, then by candidate key
117111
# Create unique candidate key from party and name, in case multiple candidates have same
112+
race_key = (row['office'], row['district'])
118113
cand_key = (row['party'], row['candidate'])
119114
# Below, setdefault initializes empty dict and list for the respective keys if they don't already exist.
120-
race = results[row['office']]
115+
race = results[race_key]
121116
race.setdefault(cand_key, []).append(row)
122117

123118
return results
@@ -176,7 +171,6 @@ def summarize(results):
176171
'tie_race': tie_race,
177172
'date': result['date'],
178173
'office': result['office_clean'],
179-
'office_slug': result['office_slug'],
180174
'district': result['district'],
181175
'candidates': sorted_cands,
182176
}
@@ -196,7 +190,7 @@ def write_csv(summary):
196190
# Limit output to cleanly parsed, standardized values
197191
fieldnames = [
198192
'date',
199-
'office_slug',
193+
'office',
200194
'district',
201195
'last_name',
202196
'first_name',

elex2/test_elections.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,11 @@
55

66
class TestDataCleaners(TestCase):
77

8-
def test_clean_office_prez(self):
9-
self.assertEquals(clean_office('President'), ('President', 'president', ''))
10-
118
def test_clean_office_rep(self):
12-
self.assertEquals(clean_office('U.S. Rep - 1'), ('U.S. House of Representatives', 'us-house', 1))
9+
self.assertEquals(clean_office('U.S. Rep - 1'), ('U.S. House of Representatives', 1))
1310

1411
def test_clean_office_other(self):
15-
self.assertEquals(clean_office('U.S. Senate'), ('U.S. Senate', 'us-senate', ''))
12+
self.assertEquals(clean_office('U.S. Senate'), ('U.S. Senate', ''))
1613

1714
def test_clean_party_gop(self):
1815
self.assertEquals(clean_party('GOP'), 'REP')

elex3/lib/analysis.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,6 @@ def summarize(results):
7070
'tie_race': tie_race,
7171
'date': result['date'],
7272
'office': result['office_clean'],
73-
'office_slug': result['office_slug'],
7473
'district': result['district'],
7574
'candidates': sorted_cands,
7675
}

elex3/lib/parser.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,12 @@ def parse_and_clean(path):
2727
# Perform some data clean-ups and conversions
2828
row['last_name'], row['first_name'] = [name.strip() for name in row['candidate'].split(',')]
2929
row['party_clean'] = clean_party(row['party'])
30-
row['office_clean'], row['office_slug'], row['district'] = clean_office(row['office'])
30+
row['office_clean'], row['district'] = clean_office(row['office'])
3131
row['votes'] = int(row['votes'])
3232

33-
# Store county-level results by office, then by candidate key
33+
# Store county-level results by office/district pair, then by candidate key
3434
# Create unique candidate key from party and name, in case multiple candidates have same
35+
race_key = (row['office'], row['district'])
3536
cand_key = (row['party'], row['candidate'])
3637
# Below, setdefault initializes empty dict and list for the respective keys if they don't already exist.
3738
race = results[row['office']]
@@ -50,17 +51,11 @@ def clean_party(party):
5051
return party_clean
5152

5253
def clean_office(office):
53-
if 'Pres' in office:
54-
office_clean = office.strip()
55-
office_slug = 'president'
56-
district = ''
57-
elif 'Rep' in office:
54+
if 'Rep' in office:
5855
office_clean = 'U.S. House of Representatives'
59-
office_slug = 'us-house'
6056
district = int(office.split('-')[-1])
6157
else:
6258
office_clean = office.strip()
63-
office_slug = office.strip().replace('.', '').replace(' ', '-').lower()
6459
district = ''
65-
return office_clean, office_slug, district
60+
return office_clean, district
6661

elex3/scripts/save_summary_to_csv.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def write_csv(summary):
4242
# Limit output to cleanly parsed, standardized values
4343
fieldnames = [
4444
'date',
45-
'office_slug',
45+
'office',
4646
'district',
4747
'last_name',
4848
'first_name',

elex3/tests/test_parser.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,12 @@
55

66
class TestDataCleaners(TestCase):
77

8-
def test_clean_office_prez(self):
9-
self.assertEquals(clean_office('President'), ('President', 'president', ''))
108

119
def test_clean_office_rep(self):
12-
self.assertEquals(clean_office('U.S. Rep - 1'), ('U.S. House of Representatives', 'us-house', 1))
10+
self.assertEquals(clean_office('U.S. Rep - 1'), ('U.S. House of Representatives', 1))
1311

1412
def test_clean_office_other(self):
15-
self.assertEquals(clean_office('U.S. Senate'), ('U.S. Senate', 'us-senate', ''))
13+
self.assertEquals(clean_office('U.S. Senate'), ('U.S. Senate', ''))
1614

1715
def test_clean_party_gop(self):
1816
self.assertEquals(clean_party('GOP'), 'REP')

elex4/lib/analysis.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,6 @@ def summarize(results):
7070
'tie_race': tie_race,
7171
'date': result['date'],
7272
'office': result['office_clean'],
73-
'office_slug': result['office_slug'],
7473
'district': result['district'],
7574
'candidates': sorted_cands,
7675
}

elex4/lib/parser.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,12 @@ def parse_and_clean(path):
2727
# Perform some data clean-ups and conversions
2828
row['last_name'], row['first_name'] = [name.strip() for name in row['candidate'].split(',')]
2929
row['party_clean'] = clean_party(row['party'])
30-
row['office_clean'], row['office_slug'], row['district'] = clean_office(row['office'])
30+
row['office_clean'], row['district'] = clean_office(row['office'])
3131
row['votes'] = int(row['votes'])
3232

33-
# Store county-level results by office, then by candidate key
33+
# Store county-level results by office/district pair, then by candidate key
3434
# Create unique candidate key from party and name, in case multiple candidates have same
35+
race_key = (row['office'], row['district'])
3536
cand_key = (row['party'], row['candidate'])
3637
# Below, setdefault initializes empty dict and list for the respective keys if they don't already exist.
3738
race = results[row['office']]
@@ -50,17 +51,11 @@ def clean_party(party):
5051
return party_clean
5152

5253
def clean_office(office):
53-
if 'Pres' in office:
54-
office_clean = office.strip()
55-
office_slug = 'president'
56-
district = ''
57-
elif 'Rep' in office:
54+
if 'Rep' in office:
5855
office_clean = 'U.S. House of Representatives'
59-
office_slug = 'us-house'
6056
district = int(office.split('-')[-1])
6157
else:
6258
office_clean = office.strip()
63-
office_slug = office.strip().replace('.', '').replace(' ', '-').lower()
6459
district = ''
65-
return office_clean, office_slug, district
60+
return office_clean, district
6661

elex4/scripts/save_summary_to_csv.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def write_csv(summary):
4242
# Limit output to cleanly parsed, standardized values
4343
fieldnames = [
4444
'date',
45-
'office_slug',
45+
'office',
4646
'district',
4747
'last_name',
4848
'first_name',

0 commit comments

Comments
 (0)