qubitron
diff --git a/‎.gitignore
+1 b/‎.gitignore
+1
diff --git a/‎dev-requirements.txt
733 Bytes b/‎dev-requirements.txt
733 Bytes
diff --git a/‎requirements.txt
666 Bytes b/‎requirements.txt
666 Bytes
diff --git a/‎stackoverflow.py
+14-22 b/‎stackoverflow.py
+14-22
diff --git a/‎tests/test_data.py
+16 b/‎tests/test_data.py
+16
@@ -1,4 +1,5 @@
 env/
 .vscode/
+__pycache__
 
 *.csv
@@ -35,30 +35,27 @@
     2011: 'Which languages are you proficient in?',
 }
 
-def extractSurvey(year):
-    zipFilename = "data/survey" + str(year) + ".zip"
-    folder = "survey" + str(year)
-    with zipfile.ZipFile(zipFilename, "r") as zip:
-        zip.extractall(folder)
+def survey_csvname(year):
+    return 'survey{}.csv'.format(year)
 
-    print(folder)
-    shutil.move(folder + '/' + filenames[year], "survey" + str(year) + ".csv")
-    shutil.rmtree(folder)
-
-def downloadSurvey(year):
+def download_survey(year):
+    print("Downloading " + year)
     request = requests.get(urls[year])
     with open("survey.zip", "wb") as file:
         file.write(request.content) 
 
     with zipfile.ZipFile("survey.zip", "r") as file:
         file.extractall("data")
 
-    shutil.move("data/" + filenames[year], "survey{}.csv".format(year))
+    shutil.move("data/" + filenames[year], survey_csvname(year))
     shutil.rmtree("data", ignore_errors=True)
     os.remove("survey.zip")
 
-def languagesBreakdown(year):
-    data=pd.read_csv('survey{}.csv'.format(year), encoding='latin1')
+def languages_breakdown(year):
+    if not os.path.exists(survey_csvname(year)):
+        download_survey(year)
+    print("Processing " + str(year))
+    data=pd.read_csv(survey_csvname(year), encoding='latin1')
 
     if year >= 2016:
         # Languages are semicolon separated list in a single column
@@ -89,22 +86,17 @@ def languagesBreakdown(year):
 
     # total needs to account for all languages columns
     if year < 2016:
-        notNull = languages.apply(lambda x: pd.notnull(x)).sum(axis=1)
-        total = notNull[notNull > 0].shape[0]
-    
+        notnull = languages.apply(lambda x: pd.notnull(x)).sum(axis=1)
+        total = notnull[notnull > 0].shape[0]
+
     summary['percent'] = summary['count']/total*100
 
     return summary
 
 if __name__ == "__main__":
     totals = {}
     for year in range(2011, 2018):
-        print("Downloading " + str(year))       
-        downloadSurvey(year)
-
-        print("Processing " + str(year))
-        totals[year] = languagesBreakdown(year).to_dict()
-        #os.remove('survey{}.csv'.format(year))
+        totals[year] = languages_breakdown(year).to_dict()
 
     with open('app/static/data.json', 'w') as file:
         file.write(json.dumps(totals, indent=4, separators=(',', ': ')))
 
@@ -0,0 +1,16 @@
+import unittest
+import stackoverflow
+import os
+
+class TestStackoverflow(unittest.TestCase):
+    def test_language_percents(self):
+        # get list of web frameworks used by Python developers
+        languages = stackoverflow.languages_breakdown(2015).to_dict()
+
+        self.assertAlmostEqual(languages['percent']['Java'], 37.4, 1)
+        self.assertAlmostEqual(languages['percent']['C#'], 31.6, 1)
+        self.assertAlmostEqual(languages['percent']['Python'], 23.8, 1)
+        self.assertAlmostEqual(languages['percent']['C++'], 20.6, 1)
+        
+        
+