|
35 | 35 | 2011: 'Which languages are you proficient in?',
|
36 | 36 | }
|
37 | 37 |
|
38 |
| -def extractSurvey(year): |
39 |
| - zipFilename = "data/survey" + str(year) + ".zip" |
40 |
| - folder = "survey" + str(year) |
41 |
| - with zipfile.ZipFile(zipFilename, "r") as zip: |
42 |
| - zip.extractall(folder) |
| 38 | +def survey_csvname(year): |
| 39 | + return 'survey{}.csv'.format(year) |
43 | 40 |
|
44 |
| - print(folder) |
45 |
| - shutil.move(folder + '/' + filenames[year], "survey" + str(year) + ".csv") |
46 |
| - shutil.rmtree(folder) |
47 |
| - |
48 |
| -def downloadSurvey(year): |
| 41 | +def download_survey(year): |
| 42 | + print("Downloading " + year) |
49 | 43 | request = requests.get(urls[year])
|
50 | 44 | with open("survey.zip", "wb") as file:
|
51 | 45 | file.write(request.content)
|
52 | 46 |
|
53 | 47 | with zipfile.ZipFile("survey.zip", "r") as file:
|
54 | 48 | file.extractall("data")
|
55 | 49 |
|
56 |
| - shutil.move("data/" + filenames[year], "survey{}.csv".format(year)) |
| 50 | + shutil.move("data/" + filenames[year], survey_csvname(year)) |
57 | 51 | shutil.rmtree("data", ignore_errors=True)
|
58 | 52 | os.remove("survey.zip")
|
59 | 53 |
|
60 |
| -def languagesBreakdown(year): |
61 |
| - data=pd.read_csv('survey{}.csv'.format(year), encoding='latin1') |
| 54 | +def languages_breakdown(year): |
| 55 | + if not os.path.exists(survey_csvname(year)): |
| 56 | + download_survey(year) |
| 57 | + print("Processing " + str(year)) |
| 58 | + data=pd.read_csv(survey_csvname(year), encoding='latin1') |
62 | 59 |
|
63 | 60 | if year >= 2016:
|
64 | 61 | # Languages are semicolon separated list in a single column
|
@@ -89,22 +86,17 @@ def languagesBreakdown(year):
|
89 | 86 |
|
90 | 87 | # total needs to account for all languages columns
|
91 | 88 | if year < 2016:
|
92 |
| - notNull = languages.apply(lambda x: pd.notnull(x)).sum(axis=1) |
93 |
| - total = notNull[notNull > 0].shape[0] |
94 |
| - |
| 89 | + notnull = languages.apply(lambda x: pd.notnull(x)).sum(axis=1) |
| 90 | + total = notnull[notnull > 0].shape[0] |
| 91 | + |
95 | 92 | summary['percent'] = summary['count']/total*100
|
96 | 93 |
|
97 | 94 | return summary
|
98 | 95 |
|
99 | 96 | if __name__ == "__main__":
|
100 | 97 | totals = {}
|
101 | 98 | for year in range(2011, 2018):
|
102 |
| - print("Downloading " + str(year)) |
103 |
| - downloadSurvey(year) |
104 |
| - |
105 |
| - print("Processing " + str(year)) |
106 |
| - totals[year] = languagesBreakdown(year).to_dict() |
107 |
| - #os.remove('survey{}.csv'.format(year)) |
| 99 | + totals[year] = languages_breakdown(year).to_dict() |
108 | 100 |
|
109 | 101 | with open('app/static/data.json', 'w') as file:
|
110 | 102 | file.write(json.dumps(totals, indent=4, separators=(',', ': ')))
|
|
0 commit comments