Skip to content

Commit 61a199a

Browse files
committed
Change issue and time_value to integers instead of dates, and updated tests
1 parent 9fc8343 commit 61a199a

File tree

6 files changed

+1526
-1505
lines changed

6 files changed

+1526
-1505
lines changed

src/acquisition/rvdss/pull_historic.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
ALTERNATIVE_SEASON_BASE_URL, SEASON_BASE_URL, FIRST_WEEK_OF_YEAR, DASHBOARD_BASE_URLS_2023_2024_SEASON
2121
)
2222
from delphi.epidata.acquisition.rvdss.utils import (
23-
abbreviate_geo, create_geo_types, check_date_format,
23+
abbreviate_geo, create_geo_types, check_date_format,convert_date_to_int,
2424
fetch_archived_dashboard_data, preprocess_table_columns, add_flu_prefix
2525
)
2626
#%% Functions
@@ -499,6 +499,17 @@ def fetch_one_season_from_report(url):
499499
del combined_positive_tables
500500
del pos_table
501501

502+
# convert dates to integers
503+
all_respiratory_detection_tables = all_respiratory_detection_tables.reset_index()
504+
all_respiratory_detection_tables['time_value'] = [convert_date_to_int(t) for t in all_respiratory_detection_tables['time_value']]
505+
all_respiratory_detection_tables['issue'] = [convert_date_to_int(t) for t in all_respiratory_detection_tables['issue']]
506+
all_respiratory_detection_tables = all_respiratory_detection_tables.set_index(['epiweek', 'time_value', 'issue', 'geo_type', 'geo_value'],verify_integrity=True)
507+
508+
all_positive_tables = all_positive_tables.reset_index()
509+
all_positive_tables['time_value'] = [convert_date_to_int(t) for t in all_positive_tables['time_value']]
510+
all_positive_tables['issue'] = [convert_date_to_int(t) for t in all_positive_tables['issue']]
511+
all_positive_tables = all_positive_tables.set_index(['epiweek', 'time_value', 'issue', 'geo_type', 'geo_value'],verify_integrity=True)
512+
502513
return {
503514
"respiratory_detection": all_respiratory_detection_tables,
504515
"positive": all_positive_tables
@@ -515,5 +526,4 @@ def fetch_historical_dashboard_data():
515526
# Update the end of the 2023-2024 season with the dashboard data
516527

517528
dict_list = [fetch_archived_dashboard_data(url) for url in DASHBOARD_BASE_URLS_2023_2024_SEASON]
518-
519529
return dict_list

src/acquisition/rvdss/run.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import argparse
99
from datetime import datetime
1010

11-
from delphi.epidata.acquisition.rvdss.utils import create_geo_types, abbreviate_geo, fetch_current_dashboard_data, check_most_recent_update_date,get_dashboard_update_date, combine_tables, duplicate_provincial_detections,expand_detections_columns
11+
from delphi.epidata.acquisition.rvdss.utils import create_geo_types, abbreviate_geo, fetch_current_dashboard_data, check_most_recent_update_date,get_dashboard_update_date, combine_tables, duplicate_provincial_detections,expand_detections_columns
1212
from delphi.epidata.acquisition.rvdss.constants import DASHBOARD_BASE_URL, RESP_DETECTIONS_OUTPUT_FILE, POSITIVE_TESTS_OUTPUT_FILE,UPDATE_DATES_FILE
1313
from delphi.epidata.acquisition.rvdss.pull_historic import fetch_report_data,fetch_historical_dashboard_data
1414
from delphi.epidata.acquisition.rvdss.database import update
@@ -76,6 +76,7 @@ def update_historical_data(logger):
7676
data = combine_tables(hist_dict_list)
7777
data['time_type'] = "week"
7878

79+
7980
#update database
8081
update(data,logger)
8182
logger.info("Finished updating historic data")

src/acquisition/rvdss/utils.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,13 @@ def check_date_format(date_string):
7373

7474
return(new_date)
7575

76+
def convert_date_to_int(date_string):
77+
if re.search("[0-9]{4}-[0-9]{2}-[0-9]{2}",str(date_string)):
78+
new_date = int(re.sub("-","",str(date_string)))
79+
else:
80+
new_date=date_string
81+
return(new_date)
82+
7683
def get_dashboard_update_date(base_url,headers):
7784
# Get update date
7885
update_date_url = base_url + "RVD_UpdateDate.csv"
@@ -206,6 +213,11 @@ def get_positive_data(base_url,headers,update_date):
206213
assert all([0 <= val <= 100 or math.isnan(val) for val in df[df.columns[k]]]), "Percentage not from 0-100"
207214

208215
df = df.reset_index()
216+
217+
# convert dates to integers
218+
df['time_value'] = [convert_date_to_int(t) for t in df['time_value']]
219+
df['issue'] = [convert_date_to_int(t) for t in df['issue']]
220+
209221
return(df.set_index(['epiweek', 'time_value', 'issue', 'geo_type', 'geo_value'],verify_integrity=True))
210222

211223
def get_detections_data(base_url,headers,update_date):
@@ -243,6 +255,10 @@ def get_detections_data(base_url,headers,update_date):
243255
df_detections=df_detections.rename(columns={'reportinglaboratory':"geo_value",'date':"time_value"})
244256
df_detections['geo_value'] = [abbreviate_geo(g) for g in df_detections['geo_value']]
245257
df_detections['geo_type'] = [create_geo_types(g,"lab") for g in df_detections['geo_value']]
258+
259+
# convert dates to integers
260+
df_detections['time_value'] = [convert_date_to_int(t) for t in df_detections['time_value']]
261+
df_detections['issue'] = [convert_date_to_int(t) for t in df_detections['issue']]
246262

247263
return(df_detections.set_index(['epiweek', 'time_value', 'issue', 'geo_type', 'geo_value'],verify_integrity=True))
248264

@@ -336,13 +352,13 @@ def combine_tables(data_dict):
336352
detections=data_dict["respiratory_detection"]
337353

338354
positive["epiweek"] = pd.to_numeric(positive["epiweek"],downcast="integer")
339-
positive["time_value"] = pd.to_datetime(positive["time_value"])
340-
positive["issue"] = pd.to_datetime(positive["issue"])
355+
#positive["time_value"] = pd.to_datetime(positive["time_value"])
356+
#positive["issue"] = pd.to_datetime(positive["issue"])
341357
positive['geo_type'] = [create_geo_types(g,'lab') for g in positive['geo_value']]
342358

343359
detections["epiweek"] = pd.to_numeric(detections["epiweek"],downcast="integer")
344-
detections["time_value"] = pd.to_datetime(detections["time_value"])
345-
detections["issue"] = pd.to_datetime(detections["issue"])
360+
#detections["time_value"] = pd.to_datetime(detections["time_value"])
361+
#detections["issue"] = pd.to_datetime(detections["issue"])
346362
detections['geo_type'] = [create_geo_types(g,'lab') for g in detections['geo_value']]
347363

348364
detections = expand_detections_columns(detections)

0 commit comments

Comments
 (0)