simplify a bit by removing office slug

zstumgoren · zstumgoren · commit c5bcf0d5f503 · 2014-02-11T21:12:56.000-08:00
diff --git a/elex1/election_results.py b/elex1/election_results.py
@@ -65,29 +65,24 @@
         party_clean = party
     row['party_clean'] = party_clean
 
-    # Standardize Office and add slug and district
+    # Standardize Office and district
     office = row['office']
-    if 'Pres' in office:
-        row['office_clean'] = office.strip()
-        row['office_slug'] = 'president'
-        row['district'] = ''
-    elif 'Rep' in office:
-        row['office_clean'] = 'U.S. Representative'
-        row['office_slug'] = 'us-house'
+    if 'Rep' in office:
+        row['office_clean'] = 'U.S. House of Representatives'
         row['district'] = int(office.split('-')[-1])
     else:
         row['office_clean'] = office.strip()
-        row['office_slug'] = office.strip().replace(' ', '-')
         row['district'] = ''
 
     # Convert total votes to an integer
     row['votes'] = int(row['votes'])
 
-    # Store county-level results by office, then by candidate key
+    # Store county-level results by office/district pair, then by candidate key
     # Create unique candidate key from party and name, in case multiple candidates have same
+    race_key = (row['office'], row['district'])
     cand_key = (row['party'], row['candidate'])
     # Below, setdefault initializes empty dict and list for the respective keys if they don't already exist.
-    race = results[row['office']]
+    race = results[race_key]
     race.setdefault(cand_key, []).append(row)
 
 
@@ -143,7 +138,6 @@
         'tie_race': tie_race,
         'date': result['date'],
         'office': result['office_clean'],
-        'office_slug': result['office_slug'],
         'district': result['district'],
         'candidates': sorted_cands,
     }
@@ -155,7 +149,7 @@
     # We'll limit the output to cleanly parsed, standardized values
     fieldnames = [
         'date',
-        'office_slug',
+        'office',
         'district',
         'last_name',
         'first_name',
diff --git a/elex2/election_results.py b/elex2/election_results.py
@@ -52,19 +52,13 @@ def clean_party(party):
     return party_clean
 
 def clean_office(office):
-    if 'Pres' in office:
-        office_clean = office.strip()
-        office_slug = 'president'
-        district = ''
-    elif 'Rep' in office:
+    if 'Rep' in office:
         office_clean = 'U.S. House of Representatives'
-        office_slug = 'us-house'
         district = int(office.split('-')[-1])
     else:
         office_clean = office.strip()
-        office_slug = office.strip().replace('.', '').replace(' ', '-').lower()
         district = ''
-    return office_clean, office_slug, district
+    return office_clean, district
 
 
 #### PRIMARY FUNCS ####
@@ -110,14 +104,15 @@ def parse_and_clean(path):
         # Perform some data clean-ups and conversions
         row['last_name'], row['first_name'] = [name.strip() for name in row['candidate'].split(',')]
         row['party_clean'] = clean_party(row['party'])
-        row['office_clean'], row['office_slug'], row['district'] = clean_office(row['office'])
+        row['office_clean'], row['district'] = clean_office(row['office'])
         row['votes'] = int(row['votes'])
 
-        # Store county-level results by office, then by candidate key
+        # Store county-level results by office/district pair, then by candidate key
         # Create unique candidate key from party and name, in case multiple candidates have same
+        race_key = (row['office'], row['district'])
         cand_key = (row['party'], row['candidate'])
         # Below, setdefault initializes empty dict and list for the respective keys if they don't already exist.
-        race = results[row['office']]
+        race = results[race_key]
         race.setdefault(cand_key, []).append(row)
 
     return results
@@ -176,7 +171,6 @@ def summarize(results):
             'tie_race': tie_race,
             'date': result['date'],
             'office': result['office_clean'],
-            'office_slug': result['office_slug'],
             'district': result['district'],
             'candidates': sorted_cands,
         }
@@ -196,7 +190,7 @@ def write_csv(summary):
         # Limit output to cleanly parsed, standardized values
         fieldnames = [
             'date',
-            'office_slug',
+            'office',
             'district',
             'last_name',
             'first_name',
diff --git a/elex2/test_elections.py b/elex2/test_elections.py
@@ -5,14 +5,11 @@
 
 class TestDataCleaners(TestCase):
 
-    def test_clean_office_prez(self):
-        self.assertEquals(clean_office('President'), ('President', 'president', ''))
-
     def test_clean_office_rep(self):
-        self.assertEquals(clean_office('U.S. Rep - 1'), ('U.S. House of Representatives', 'us-house', 1))
+        self.assertEquals(clean_office('U.S. Rep - 1'), ('U.S. House of Representatives', 1))
 
     def test_clean_office_other(self):
-        self.assertEquals(clean_office('U.S. Senate'), ('U.S. Senate', 'us-senate', ''))
+        self.assertEquals(clean_office('U.S. Senate'), ('U.S. Senate', ''))
 
     def test_clean_party_gop(self):
         self.assertEquals(clean_party('GOP'), 'REP')
diff --git a/elex3/lib/analysis.py b/elex3/lib/analysis.py
@@ -70,7 +70,6 @@ def summarize(results):
             'tie_race': tie_race,
             'date': result['date'],
             'office': result['office_clean'],
-            'office_slug': result['office_slug'],
             'district': result['district'],
             'candidates': sorted_cands,
         }
diff --git a/elex3/lib/parser.py b/elex3/lib/parser.py
@@ -27,11 +27,12 @@ def parse_and_clean(path):
         # Perform some data clean-ups and conversions
         row['last_name'], row['first_name'] = [name.strip() for name in row['candidate'].split(',')]
         row['party_clean'] = clean_party(row['party'])
-        row['office_clean'], row['office_slug'], row['district'] = clean_office(row['office'])
+        row['office_clean'], row['district'] = clean_office(row['office'])
         row['votes'] = int(row['votes'])
 
-        # Store county-level results by office, then by candidate key
+        # Store county-level results by office/district pair, then by candidate key
         # Create unique candidate key from party and name, in case multiple candidates have same
+        race_key = (row['office'], row['district'])
         cand_key = (row['party'], row['candidate'])
         # Below, setdefault initializes empty dict and list for the respective keys if they don't already exist.
         race = results[row['office']]
@@ -50,17 +51,11 @@ def clean_party(party):
     return party_clean
 
 def clean_office(office):
-    if 'Pres' in office:
-        office_clean = office.strip()
-        office_slug = 'president'
-        district = ''
-    elif 'Rep' in office:
+    if 'Rep' in office:
         office_clean = 'U.S. House of Representatives'
-        office_slug = 'us-house'
         district = int(office.split('-')[-1])
     else:
         office_clean = office.strip()
-        office_slug = office.strip().replace('.', '').replace(' ', '-').lower()
         district = ''
-    return office_clean, office_slug, district
+    return office_clean, district
 
diff --git a/elex3/scripts/save_summary_to_csv.py b/elex3/scripts/save_summary_to_csv.py
@@ -42,7 +42,7 @@ def write_csv(summary):
         # Limit output to cleanly parsed, standardized values
         fieldnames = [
             'date',
-            'office_slug',
+            'office',
             'district',
             'last_name',
             'first_name',
diff --git a/elex3/tests/test_parser.py b/elex3/tests/test_parser.py
@@ -5,14 +5,12 @@
 
 class TestDataCleaners(TestCase):
 
-    def test_clean_office_prez(self):
-        self.assertEquals(clean_office('President'), ('President', 'president', ''))
 
     def test_clean_office_rep(self):
-        self.assertEquals(clean_office('U.S. Rep - 1'), ('U.S. House of Representatives', 'us-house', 1))
+        self.assertEquals(clean_office('U.S. Rep - 1'), ('U.S. House of Representatives', 1))
 
     def test_clean_office_other(self):
-        self.assertEquals(clean_office('U.S. Senate'), ('U.S. Senate', 'us-senate', ''))
+        self.assertEquals(clean_office('U.S. Senate'), ('U.S. Senate', ''))
 
     def test_clean_party_gop(self):
         self.assertEquals(clean_party('GOP'), 'REP')
diff --git a/elex4/lib/analysis.py b/elex4/lib/analysis.py
@@ -70,7 +70,6 @@ def summarize(results):
             'tie_race': tie_race,
             'date': result['date'],
             'office': result['office_clean'],
-            'office_slug': result['office_slug'],
             'district': result['district'],
             'candidates': sorted_cands,
         }
diff --git a/elex4/lib/parser.py b/elex4/lib/parser.py
@@ -27,11 +27,12 @@ def parse_and_clean(path):
         # Perform some data clean-ups and conversions
         row['last_name'], row['first_name'] = [name.strip() for name in row['candidate'].split(',')]
         row['party_clean'] = clean_party(row['party'])
-        row['office_clean'], row['office_slug'], row['district'] = clean_office(row['office'])
+        row['office_clean'], row['district'] = clean_office(row['office'])
         row['votes'] = int(row['votes'])
 
-        # Store county-level results by office, then by candidate key
+        # Store county-level results by office/district pair, then by candidate key
         # Create unique candidate key from party and name, in case multiple candidates have same
+        race_key = (row['office'], row['district'])
         cand_key = (row['party'], row['candidate'])
         # Below, setdefault initializes empty dict and list for the respective keys if they don't already exist.
         race = results[row['office']]
@@ -50,17 +51,11 @@ def clean_party(party):
     return party_clean
 
 def clean_office(office):
-    if 'Pres' in office:
-        office_clean = office.strip()
-        office_slug = 'president'
-        district = ''
-    elif 'Rep' in office:
+    if 'Rep' in office:
         office_clean = 'U.S. House of Representatives'
-        office_slug = 'us-house'
         district = int(office.split('-')[-1])
     else:
         office_clean = office.strip()
-        office_slug = office.strip().replace('.', '').replace(' ', '-').lower()
         district = ''
-    return office_clean, office_slug, district
+    return office_clean, district
 
diff --git a/elex4/scripts/save_summary_to_csv.py b/elex4/scripts/save_summary_to_csv.py
@@ -42,7 +42,7 @@ def write_csv(summary):
         # Limit output to cleanly parsed, standardized values
         fieldnames = [
             'date',
-            'office_slug',
+            'office',
             'district',
             'last_name',
             'first_name',
diff --git a/elex4/tests/test_parser.py b/elex4/tests/test_parser.py
@@ -5,14 +5,11 @@
 
 class TestDataCleaners(TestCase):
 
-    def test_clean_office_prez(self):
-        self.assertEquals(clean_office('President'), ('President', 'president', ''))
-
     def test_clean_office_rep(self):
-        self.assertEquals(clean_office('U.S. Rep - 1'), ('U.S. House of Representatives', 'us-house', 1))
+        self.assertEquals(clean_office('U.S. Rep - 1'), ('U.S. House of Representatives', 1))
 
     def test_clean_office_other(self):
-        self.assertEquals(clean_office('U.S. Senate'), ('U.S. Senate', 'us-senate', ''))
+        self.assertEquals(clean_office('U.S. Senate'), ('U.S. Senate', ''))
 
     def test_clean_party_gop(self):
         self.assertEquals(clean_party('GOP'), 'REP')

Original file line number	Diff line number	Diff line change
`@@ -70,7 +70,6 @@ def summarize(results):`
`70`	`70`	`'tie_race': tie_race,`
`71`	`71`	`'date': result['date'],`
`72`	`72`	`'office': result['office_clean'],`
`73`		`- 'office_slug': result['office_slug'],`
`74`	`73`	`'district': result['district'],`
`75`	`74`	`'candidates': sorted_cands,`
`76`	`75`	`}`