Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 120 additions & 0 deletions src/acquisition/mobility/Apple_Mobility_US.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import os
import urllib.request
import json
import pandas as pd
from delphi.epidata.acquisition.mobility.database import database

site_url = "https://covid19-static.cdn-apple.com"
index_url = "https://covid19-static.cdn-apple.com/covid19-mobility-data/current/v3/index.json"
directory = 'Apple_Mobility'


def get_mobility_link():
"""Get Apple Mobility data link
"""

# get link
with urllib.request.urlopen(index_url) as url:
json_link = json.loads(url.read().decode())
base_path = json_link['basePath']
csv_path = json_link['regions']['en-us']['csvPath']
link = site_url + \
base_path + csv_path
return link


def get_mobility_data():
"""Download Apple Mobility data in CSV format
"""
# create directory if it doesn't exist
if not os.path.exists(directory) and directory != '':
os.makedirs(directory)
else:
files = os.listdir(directory)
for file in files:
os.remove(directory + '/' + file)

urllib.request.urlretrieve(get_mobility_link(), os.path.join(directory, "apple_mobility_data_US.csv"))


def build_report():
"""Build cleaned mobility data
"""
mobilityData = pd.read_csv(os.path.join(directory, "apple_mobility_data_US.csv"), low_memory=False)
mobilityData = mobilityData.drop(columns=['alternative_name'])
mobilityData['country'] = mobilityData.apply(
lambda x: x['region'] if x['geo_type'] == 'country/region' else x['country'],
axis=1)

mobilityData = mobilityData[mobilityData.country == "United States"].drop(columns=[
'country'])
mobilityData['sub-region'] = mobilityData['sub-region'].fillna(
mobilityData['region']).replace({"United States": "Total"})
mobilityData['region'] = mobilityData.apply(lambda x: x['region'] if (
x['geo_type'] == 'city' or x['geo_type'] == 'county') else 'Total', axis=1)
mobilityData = mobilityData.rename(
columns={
'sub-region': 'state',
'region': 'county'})

mobilityData = mobilityData.melt(
id_vars=[
'geo_type',
'state',
'county',
'transportation_type'],
var_name='date')
mobilityData['value'] = mobilityData['value'] - 100

mobilityData = mobilityData.pivot_table(
index=[
'geo_type',
'state',
'county',
'date'],
columns='transportation_type').reset_index()
mobilityData.columns = [t + (v if v != "value" else "")
for v, t in mobilityData.columns]

mobilityData = mobilityData.loc[:, ['state', 'county', 'geo_type',
'date', 'driving', 'transit', 'walking']]
mobilityData = mobilityData.sort_values(
by=['state', 'county', 'geo_type', 'date']).reset_index(drop=True)
mobilityData = mobilityData[(mobilityData['geo_type'] == "county")]
mobilityData.fillna(0, inplace=True)

fipsData = pd.read_csv("data/county_fipscode.csv")
fipsData['f_county_country'] = fipsData['county name'].astype(str) + ',' + fipsData['state name'].astype(str)
mobilityData['m_county_country'] = mobilityData['county'].astype(str) + ',' + mobilityData['state'].astype(
str)

mergeData = pd.merge(mobilityData, fipsData, left_on=['m_county_country'],
right_on=['f_county_country'],
how='left', sort=False)

mergeData = mergeData.drop(columns=['m_county_country', 'state name', 'county name', 'f_county_country', 'geo_type'])
mergeData['fips code'] = mergeData['fips code'].astype(str).replace('\.0', '', regex=True)
mergeData['fips code'] = mergeData['fips code'].str.rjust(5, '0')
mergeData.fillna(0, inplace=True)
return mergeData


def split_df(df, size):
"""Split df and insert into database.
"""
db = database.Database()
for i in range(0, df.shape[0], size):
start = i
end = min(i + size - 1, df.shape[0])
subset = df.iloc[start:end]
db.insert_apple_data(subset)


if __name__ == '__main__':

# get data
get_mobility_data()
# build reports
mobilityData_US = build_report()
# insert into database
split_df(mobilityData_US, 10000)
74 changes: 74 additions & 0 deletions src/acquisition/mobility/Google_Mobility_US.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import os
import urllib.request
import pandas as pd
from delphi.epidata.acquisition.mobility.database import database

url = "https://www.gstatic.com/covid19/mobility/Global_Mobility_Report.csv"
directory = 'Google_Mobility'


def get_mobility_data():
"""Download Google Mobility data in CSV format
"""
# create directory if it doesn't exist
if not os.path.exists(directory) and directory != '':
os.makedirs(directory)
else:
files = os.listdir(directory)
for file in files:
os.remove(directory + '/' + file)

urllib.request.urlretrieve(url, os.path.join(directory, "google_mobility_data_US.csv"))


def build_report():
"""Build cleaned Google report
"""
mobilityData = pd.read_csv(os.path.join(directory, "google_mobility_data_US.csv"), low_memory=False)
mobilityData.columns = mobilityData.columns.str.replace(
r'_percent_change_from_baseline', '')
mobilityData.columns = mobilityData.columns.str.replace(r'_', ' ')
mobilityData = mobilityData.rename(columns={'country region': 'country'})
mobilityData = mobilityData[(mobilityData['country'] == "United States")]
mobilityData = mobilityData.rename(
columns={
'sub region 1': 'state',
'sub region 2': 'county',
'census fips code': 'fips code'})
mobilityData = mobilityData.loc[:,
['state',
'county',
'fips code',
'date',
'retail and recreation',
'grocery and pharmacy',
'parks',
'transit stations',
'workplaces',
'residential']]

mobilityData.dropna(subset=['county'], how='all', inplace=True)
mobilityData.fillna(0, inplace=True)
mobilityData['fips code'] = mobilityData['fips code'].astype(str).replace('\.0', '', regex=True)
mobilityData['fips code'] = mobilityData['fips code'].str.rjust(5, '0')
return mobilityData


def split_df(df, size):
"""Split df and insert into database.
"""
db = database.Database()
for i in range(0, df.shape[0], size):
start = i
end = min(i + size - 1, df.shape[0])
subset = df.iloc[start:end]
db.insert_google_data(subset)


if __name__ == '__main__':
# get Google data
get_mobility_data()
# build reports
mobilityData_US = build_report()
# Insert into database
split_df(mobilityData_US, 100000)
17 changes: 17 additions & 0 deletions src/acquisition/mobility/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# COVID-19 Mobility data

- Data source:
https://covid19.apple.com/mobility
- Data source:
https://www.google.com/covid19/mobility/
- Type: mobility data type (Apple,Google)
- Country & State: location where mobility trends been reported
- Month & Year: month and year of the data to be retrieved.

Sample Query 1 : https://api.covidcast.cmu.edu/epidata/api.php?source=mobility&type=google&country=us&state=florida&month=9&year=2020

Above query retrieves Apple mobility data for Florida state in septemeber month.

Sample Query 2 : https://api.covidcast.cmu.edu/epidata/api.php?source=mobility&type=google&country=us&state=florida&month=9&year=2020

Above query retrieves Google mobility data for Florida state in septemeber month.
Loading