Skip to content

Commit 607ab87

Browse files
committed
refactoring a bit, adding unit test
1 parent d3d14f7 commit 607ab87

File tree

5 files changed

+31
-22
lines changed

5 files changed

+31
-22
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
env/
22
.vscode/
3+
__pycache__
34

45
*.csv

dev-requirements.txt

733 Bytes
Binary file not shown.

requirements.txt

666 Bytes
Binary file not shown.

stackoverflow.py

+14-22
Original file line numberDiff line numberDiff line change
@@ -35,30 +35,27 @@
3535
2011: 'Which languages are you proficient in?',
3636
}
3737

38-
def extractSurvey(year):
39-
zipFilename = "data/survey" + str(year) + ".zip"
40-
folder = "survey" + str(year)
41-
with zipfile.ZipFile(zipFilename, "r") as zip:
42-
zip.extractall(folder)
38+
def survey_csvname(year):
39+
return 'survey{}.csv'.format(year)
4340

44-
print(folder)
45-
shutil.move(folder + '/' + filenames[year], "survey" + str(year) + ".csv")
46-
shutil.rmtree(folder)
47-
48-
def downloadSurvey(year):
41+
def download_survey(year):
42+
print("Downloading " + year)
4943
request = requests.get(urls[year])
5044
with open("survey.zip", "wb") as file:
5145
file.write(request.content)
5246

5347
with zipfile.ZipFile("survey.zip", "r") as file:
5448
file.extractall("data")
5549

56-
shutil.move("data/" + filenames[year], "survey{}.csv".format(year))
50+
shutil.move("data/" + filenames[year], survey_csvname(year))
5751
shutil.rmtree("data", ignore_errors=True)
5852
os.remove("survey.zip")
5953

60-
def languagesBreakdown(year):
61-
data=pd.read_csv('survey{}.csv'.format(year), encoding='latin1')
54+
def languages_breakdown(year):
55+
if not os.path.exists(survey_csvname(year)):
56+
download_survey(year)
57+
print("Processing " + str(year))
58+
data=pd.read_csv(survey_csvname(year), encoding='latin1')
6259

6360
if year >= 2016:
6461
# Languages are semicolon separated list in a single column
@@ -89,22 +86,17 @@ def languagesBreakdown(year):
8986

9087
# total needs to account for all languages columns
9188
if year < 2016:
92-
notNull = languages.apply(lambda x: pd.notnull(x)).sum(axis=1)
93-
total = notNull[notNull > 0].shape[0]
94-
89+
notnull = languages.apply(lambda x: pd.notnull(x)).sum(axis=1)
90+
total = notnull[notnull > 0].shape[0]
91+
9592
summary['percent'] = summary['count']/total*100
9693

9794
return summary
9895

9996
if __name__ == "__main__":
10097
totals = {}
10198
for year in range(2011, 2018):
102-
print("Downloading " + str(year))
103-
downloadSurvey(year)
104-
105-
print("Processing " + str(year))
106-
totals[year] = languagesBreakdown(year).to_dict()
107-
#os.remove('survey{}.csv'.format(year))
99+
totals[year] = languages_breakdown(year).to_dict()
108100

109101
with open('app/static/data.json', 'w') as file:
110102
file.write(json.dumps(totals, indent=4, separators=(',', ': ')))

tests/test_data.py

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import unittest
2+
import stackoverflow
3+
import os
4+
5+
class TestStackoverflow(unittest.TestCase):
6+
def test_language_percents(self):
7+
# get list of web frameworks used by Python developers
8+
languages = stackoverflow.languages_breakdown(2015).to_dict()
9+
10+
self.assertAlmostEqual(languages['percent']['Java'], 37.4, 1)
11+
self.assertAlmostEqual(languages['percent']['C#'], 31.6, 1)
12+
self.assertAlmostEqual(languages['percent']['Python'], 23.8, 1)
13+
self.assertAlmostEqual(languages['percent']['C++'], 20.6, 1)
14+
15+
16+

0 commit comments

Comments
 (0)