From c5a7133662ab29b2c73c98131a321996cbfd5b98 Mon Sep 17 00:00:00 2001 From: jpaten Date: Tue, 14 Jan 2025 19:52:27 -0800 Subject: [PATCH] feat: added pageviews analytics functions (#4336) --- .../analytics_package/analytics/fields.py | 1 + .../analytics/sheets_elements.py | 59 ++++++++++++++++++- 2 files changed, 57 insertions(+), 3 deletions(-) diff --git a/analytics/analytics_package/analytics/fields.py b/analytics/analytics_package/analytics/fields.py index ec328bc80..8498e6600 100644 --- a/analytics/analytics_package/analytics/fields.py +++ b/analytics/analytics_package/analytics/fields.py @@ -1,6 +1,7 @@ # Metric names METRIC_EVENT_COUNT = 'eventCount' METRIC_TOTAL_USERS = 'totalUsers' +METRIC_PAGE_VIEW = 'screenPageViews' # DIMENSIONS DIMENSION_PAGE_PATH = { diff --git a/analytics/analytics_package/analytics/sheets_elements.py b/analytics/analytics_package/analytics/sheets_elements.py index 828d9303c..6db186514 100644 --- a/analytics/analytics_package/analytics/sheets_elements.py +++ b/analytics/analytics_package/analytics/sheets_elements.py @@ -2,8 +2,9 @@ from .charts import get_data_df from .fields import * from urllib.parse import urlparse +import datetime as dt -def get_flat_data_df(analytics_params, metrics, dimensions, remove_matches=None): +def get_flat_data_df(metrics, dimensions, remove_matches=None, **other_params): """ Get a df from the Analytics API with a flat structure (no multiindex). @@ -22,7 +23,7 @@ def get_flat_data_df(analytics_params, metrics, dimensions, remove_matches=None) df = get_data_df( metrics, [dimension["id"] for dimension in dimensions], - **analytics_params, + **other_params, ) if remove_matches is not None: for i, match in enumerate([dimension["remove_matches"] for dimension in dimensions]): @@ -95,4 +96,56 @@ def get_outbound_sheets_df(analytics_params): "hostname": "Hostname", } )[["Page Path", "Hostname", "Outbound Link", "Total Clicks", "Total Users"]] - return df_all_links.copy().reset_index(drop=True) \ No newline at end of file + return df_all_links.copy().reset_index(drop=True) + +def get_page_views_df(analytics_params): + """ + Get a DF with page views from the Analytics API. + + :param analytics_params: the parameters for the Analytics API, including authentication and property ids + :return: a DataFrame with the page views from the Analytics API + """ + df_response = get_flat_data_df( + [METRIC_EVENT_COUNT, METRIC_TOTAL_USERS, METRIC_PAGE_VIEW], + [DIMENSION_PAGE_PATH, DIMENSION_EVENT_NAME], + dimension_filter="eventName==page_view", + **analytics_params, + ).rename( + columns={ + DIMENSION_PAGE_PATH["alias"]: "Page Path", + METRIC_PAGE_VIEW: "Total Views", + METRIC_TOTAL_USERS: "Total Users", + } + )[["Page Path", "Total Views", "Total Users"]].copy() + return df_response + +def get_page_views_change_df(analytics_params, start_current, end_current, start_previous, end_previous): + """ + Get a DF with page views from the Analytics API and a comparison for the prior month + :param analytics_params: the parameters for the Analytics API, including authentication and property ids + :param start_current: the start date for the current month in the format "YYYY-MM-DD" + :param end_current: the end date for the current month + :param start_previous: the start date for the previous month + :param end_previous: the end date for the previous month + """ + analytics_params_current = { + **analytics_params, + "start_date": start_current, + "end_date": end_current, + } + analytics_params_previous = { + **analytics_params, + "start_date": start_previous, + "end_date": end_previous, + } + current_length = float((dt.datetime.fromisoformat(end_current) - dt.datetime.fromisoformat(start_current)).days + 1) + previous_length = float((dt.datetime.fromisoformat(end_previous) - dt.datetime.fromisoformat(start_previous)).days + 1) + df_current = get_page_views_df(analytics_params_current).set_index("Page Path") + df_previous = get_page_views_df(analytics_params_previous).set_index("Page Path") * current_length / previous_length + combined_index = df_current.index.union(df_previous.index) + df_current_reindexed = df_current.reindex(combined_index).fillna(0) + df_previous_reindexed = df_previous.reindex(combined_index) + df_current_reindexed["Total Views Percent Change"] = (df_current_reindexed["Total Views"] / df_previous_reindexed["Total Views"]) - 1 + df_current_reindexed["Total Users Percent Change"] = (df_current_reindexed["Total Users"] / df_previous_reindexed["Total Users"]) - 1 + return df_current_reindexed.sort_values(["Total Views", "Total Users"], ascending=False, kind="stable").reset_index() + \ No newline at end of file