|
6 | 6 | from datetime import datetime |
7 | 7 | import math |
8 | 8 |
|
| 9 | +from constants import VIRUSES, GEOS, REGIONS, NATION, LAST_WEEK_OF_YEAR |
| 10 | + |
9 | 11 | def abbreviate_virus(full_name): |
10 | 12 | lowercase=full_name.lower() |
11 | | - |
12 | | - if any(name in lowercase for name in ["parainfluenza","para","piv"]): |
13 | | - if "hpiv" not in lowercase: |
14 | | - abbrev = re.sub("parainfluenza|para|piv","hpiv",lowercase) |
15 | | - else: |
16 | | - abbrev = lowercase |
17 | | - elif any(name in lowercase for name in ["adenovirus","adeno"]): |
18 | | - abbrev = re.sub("adenovirus|adeno","adv",lowercase) |
19 | | - elif "human metapneumovirus" in lowercase: |
20 | | - abbrev = re.sub("human metapneumovirus","hmpv",lowercase) |
21 | | - elif any(name in lowercase for name in ["enterovirus/rhinovirus","rhinovirus","rhv","entero/rhino","rhino","ev/rv","evrv"]): |
22 | | - abbrev = re.sub("enterovirus/rhinovirus|rhinovirus|rhv|entero/rhino|rhino|ev/rv|evrv","ev_rv",lowercase) |
23 | | - elif any(name in lowercase for name in ["coronavirus","coron","coro"]): |
24 | | - abbrev = re.sub("coronavirus|coron|coro","hcov",lowercase) |
25 | | - elif "respiratory syncytial virus" in lowercase: |
26 | | - abbrev = re.sub("respiratory syncytial virus","rsv",lowercase) |
27 | | - elif "influenza" in lowercase: |
28 | | - abbrev = re.sub("influenza","flu",lowercase) |
29 | | - elif "sarscov2" in lowercase: |
30 | | - abbrev = re.sub("sarscov2","sars-cov-2",lowercase) |
31 | | - else: |
32 | | - abbrev=lowercase |
33 | | - return(abbrev) |
| 13 | + keys = (re.escape(k) for k in VIRUSES.keys()) |
| 14 | + pattern = re.compile(r'\b(' + '|'.join(keys) + r')\b') |
| 15 | + result = pattern.sub(lambda x: VIRUSES[x.group()], lowercase) |
| 16 | + return(result) |
34 | 17 |
|
35 | 18 | def abbreviate_geo(full_name): |
36 | 19 | lowercase=full_name.lower() |
37 | | - |
38 | | - if "newfoundland" in lowercase: |
39 | | - abbrev = "nl" |
40 | | - elif "prince edward island" in lowercase: |
41 | | - abbrev = "pe" |
42 | | - elif "nova scotia" in lowercase: |
43 | | - abbrev = "ns" |
44 | | - elif "new brunswick" in lowercase: |
45 | | - abbrev = "nb" |
46 | | - elif "nova scotia" in lowercase: |
47 | | - abbrev = "ns" |
48 | | - elif re.match('|'.join(("^québec$", "province of québec","quebec")),lowercase): |
49 | | - abbrev = "qc" |
50 | | - elif re.match('|'.join(("^ontario$", "province of ontario")),lowercase): |
51 | | - abbrev = "on" |
52 | | - elif "manitoba" in lowercase: |
53 | | - abbrev = "mb" |
54 | | - elif "saskatchewan" in lowercase: |
55 | | - abbrev = "sk" |
56 | | - elif "alberta" in lowercase: |
57 | | - abbrev = "ab" |
58 | | - elif "british columbia" in lowercase: |
59 | | - abbrev = "bc" |
60 | | - elif "yukon" in lowercase: |
61 | | - abbrev = "yk" |
62 | | - elif "northwest territories" in lowercase: |
63 | | - abbrev = "nt" |
64 | | - elif "nunavut" in lowercase: |
65 | | - abbrev = "nu" |
66 | | - elif re.match("canada|can",lowercase): |
67 | | - abbrev = "ca" |
68 | | - elif re.match(r"^at\b",lowercase): |
69 | | - abbrev = "atlantic" |
70 | | - elif "pr" in lowercase: |
71 | | - abbrev = "prairies" |
72 | | - elif "terr" in lowercase: |
73 | | - abbrev = "territories" |
74 | | - else: |
75 | | - abbrev=lowercase |
76 | | - return(abbrev) |
| 20 | + keys = (re.escape(k) for k in GEOS.keys()) |
| 21 | + pattern = re.compile(r'\b(' + '|'.join(keys) + r')\b') |
| 22 | + |
| 23 | + result = pattern.sub(lambda x: GEOS[x.group()], lowercase) |
| 24 | + return(result) |
77 | 25 |
|
78 | 26 | def create_geo_types(geo,default_geo): |
79 | | - regions = ['atlantic','atl','province of québec','québec','qc','province of ontario','ontario','on', |
80 | | - 'prairies', 'pr', "british columbia", 'bc',"territories",'terr'] |
81 | | - nation = ["canada","can",'ca'] |
82 | | - |
83 | | - if geo in nation: |
| 27 | + if geo in NATION: |
84 | 28 | geo_type="nation" |
85 | | - elif geo in regions: |
| 29 | + elif geo in REGIONS: |
86 | 30 | geo_type="region" |
87 | 31 | else: |
88 | 32 | geo_type = default_geo |
@@ -163,7 +107,7 @@ def get_weekly_data(base_url,start_year): |
163 | 107 | week_string = week_df.iloc[0]['Text'].lower() |
164 | 108 | current_week = int(re.search("week (.+?) ", week_string).group(1)) |
165 | 109 |
|
166 | | - if current_week < 34: |
| 110 | + if current_week < LAST_WEEK_OF_YEAR: |
167 | 111 | current_year = start_year+1 |
168 | 112 | else: |
169 | 113 | current_year = start_year |
|
0 commit comments