Skip to content

Commit 38a1258

Browse files
author
jpaten
committed
feat: added datetime handling to analytics sheets package (#4353)
1 parent dbc8e19 commit 38a1258

File tree

2 files changed

+67
-14
lines changed

2 files changed

+67
-14
lines changed

analytics/analytics_package/analytics/sheets_api.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ class COLUMN_FORMAT_OPTIONS(Enum):
2727
DEFAULT = 1
2828
PERCENT_UNCOLORED = 2
2929
PERCENT_COLORED = 3
30+
YEAR_MONTH_DATE = 4
3031

3132

3233
class CHART_TYPES(Enum):
@@ -41,6 +42,10 @@ class CHART_TYPES(Enum):
4142
"extra_columns_width": 50,
4243
}
4344

45+
DEFAULT_GSPREAD_UPDATE_ARGS = {
46+
"value_input_option": gspread.utils.ValueInputOption.user_entered,
47+
}
48+
4449
def extract_credentials(authentication_response):
4550
"""Extracts the credentials from the tuple from api.authenticate"""
4651
return authentication_response[3]
@@ -173,7 +178,8 @@ def fill_worksheet_with_df(
173178
worksheet_name,
174179
overlapBehavior,
175180
sheet_formatting_options={},
176-
column_formatting_options={}
181+
column_formatting_options={},
182+
**gspread_update_args
177183
):
178184
"""
179185
Fill a worksheet with the contents of a DataFrame.
@@ -210,7 +216,10 @@ def fill_worksheet_with_df(
210216
axis=1
211217
)
212218
# Add data to worksheet
213-
worksheet.update([df_to_insert.columns.values.tolist()] + df_to_insert.fillna("NA").values.tolist())
219+
worksheet.update(
220+
[df_to_insert.columns.values.tolist()] + df_to_insert.fillna("NA").values.tolist(),
221+
**{**DEFAULT_GSPREAD_UPDATE_ARGS, **gspread_update_args}
222+
)
214223

215224
# Format worksheet
216225
# Justify Column Widths
@@ -287,6 +296,13 @@ def fill_worksheet_with_df(
287296
column_range,
288297
gspread_formatting.CellFormat(numberFormat=gspread_formatting.NumberFormat(type='PERCENT', pattern='0.0%'))
289298
)
299+
if column_formatting_options[column] == COLUMN_FORMAT_OPTIONS.YEAR_MONTH_DATE:
300+
# Apply date format rule
301+
gspread_formatting.format_cell_range(
302+
worksheet,
303+
column_range,
304+
gspread_formatting.CellFormat(numberFormat=gspread_formatting.NumberFormat(type='DATE', pattern='yyyy-mm'))
305+
)
290306

291307
# Apply base formatting options
292308
gspread_formatting.format_cell_range(
@@ -299,7 +315,7 @@ def fill_worksheet_with_df(
299315
if "Sheet1" in [i.title for i in sheet.worksheets()]:
300316
sheet.del_worksheet(sheet.worksheet("Sheet1"))
301317

302-
def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, sheet_formatting_options={}, column_formatting_options={}):
318+
def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, sheet_formatting_options={}, column_formatting_options={}, **gspread_update_args):
303319
"""
304320
Fill a sheet with the contents of a dictionary of DataFrames.
305321
The keys of the dictionary are the names of the worksheets, and the values contain the data to be placed in the sheet.
@@ -326,7 +342,8 @@ def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, sheet_formatt
326342
fill_worksheet_with_df(
327343
sheet, df, worksheet_name, overlapBehavior,
328344
sheet_formatting_options=sheet_formatting_options.get(worksheet_name, {}),
329-
column_formatting_options=column_formatting_options.get(worksheet_name, {})
345+
column_formatting_options=column_formatting_options.get(worksheet_name, {}),
346+
**gspread_update_args
330347
)
331348

332349
def update_sheet_raw(sheets_authentication_response, sheet, *updates):

analytics/analytics_package/analytics/sheets_elements.py

Lines changed: 46 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -232,17 +232,47 @@ class ADDITIONAL_DATA_BEHAVIOR(Enum):
232232
ADD = "add"
233233
REPLACE = "replace"
234234

235+
def get_page_views_over_time_df(analytics_params, additional_data_path=None, additional_data_behavior=None):
236+
"""
237+
Get a DataFrame with pageviews and total active users over time from the Analytics API.
238+
:param analytics_params: the parameters for the Analytics API, including service params, start dates, and end dates
239+
:param additional_data_path: the path to a JSON file with additional data to be added to the DataFrame, defaults to None
240+
:param additional_data_behavior: the behavior to use when adding the additional data, defaults to None
241+
"""
242+
return get_change_over_time_df(
243+
["Users", "Total Pageviews"],
244+
["activeUsers", "screenPageViews"],
245+
["Month"],
246+
"yearMonth",
247+
additional_data_path=additional_data_path,
248+
additional_data_behavior=additional_data_behavior,
249+
**analytics_params
250+
)
251+
235252
def get_change_over_time_df(
236-
analytics_params, include_changes=True, additional_data_path=None, additional_data_behavior=None
253+
metric_titles, metrics, time_title, time_dimension, include_changes=True, change_title_suffix = " Change", additional_data_path=None, additional_data_behavior=None, strftime_format="%Y-%m", **other_params
237254
):
255+
"""
256+
Get a DataFrame with the change over time for the given metrics, renamed to match metric_titles
257+
:param metric_titles: the titles of the metrics to be displayed
258+
:param metrics: the metrics to be displayed
259+
:param time_title: the title to be displayed for the time dimension
260+
:param time_dimension: the time dimension to be displayed
261+
:param include_changes: whether to include the percent change columns, defaults to True
262+
:param change_title_suffix: the suffix to be added to the change columns, defaults to " Change"
263+
:param additional_data_path: the path to a JSON file with additional data to be added to the DataFrame, defaults to None
264+
:param additional_data_behavior: the behavior to use when adding the additional data, defaults to None
265+
:param strftime_format: the format to use for the time dimension, defaults to "%Y-%m". None means a datetime will be returned
266+
:param other_params: any other parameters to be passed to the get_df_over_time function, including service params
267+
"""
238268
df_api = get_df_over_time(
239-
["Users", "Total Pageviews"],
240-
["activeUsers", "screenPageViews"],
241-
"yearMonth",
242-
sort_results=["yearMonth"],
269+
metric_titles,
270+
metrics,
271+
time_dimension,
272+
sort_results=[time_dimension],
243273
df_processor=(lambda df: df.set_index(df.index + "01")[-2::-1]),
244274
format_table=False,
245-
**analytics_params
275+
**other_params
246276
)
247277

248278
df_combined = pd.DataFrame()
@@ -259,7 +289,13 @@ def get_change_over_time_df(
259289
df_combined = df_api
260290

261291
if include_changes:
262-
df_combined["Users Change"] = df_combined["Users"].pct_change()
263-
df_combined["Total Pageviews Change"] = df_combined["Total Pageviews"].pct_change()
264-
265-
return df_combined
292+
assert change_title_suffix is not None
293+
df_combined[
294+
[f"{title}{change_title_suffix}" for title in metric_titles]
295+
] = df_combined[metric_titles].pct_change(periods=-1).replace({np.inf: np.nan})
296+
297+
if strftime_format is not None:
298+
df_combined.index = pd.to_datetime(df_combined.index).strftime(strftime_format)
299+
300+
return df_combined.reset_index(names=time_title)
301+

0 commit comments

Comments
 (0)