Skip to content

Commit 6d65fae

Browse files
author
Jonah Paten
authored
feat: added pageviews analytics functions (#4336) (#4339)
* feat: added pageviews analytics functions (#4336) * fix: updated outbound sheets link to breaking change in get_flat_data_df (#4336) * chore: made analytics percent change calculation consistent in sheets-elements (#4336) * chore: bumped ga package version (#4336) * fix: no longer double adjusting for time period length (#4336) * fix: corrected formula for changes (#4336) * chore: remove logging (#4336)
1 parent eef1cee commit 6d65fae

File tree

3 files changed

+116
-11
lines changed

3 files changed

+116
-11
lines changed

analytics/analytics_package/analytics/fields.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Metric names
22
METRIC_EVENT_COUNT = 'eventCount'
33
METRIC_TOTAL_USERS = 'totalUsers'
4+
METRIC_PAGE_VIEW = 'screenPageViews'
45

56
# DIMENSIONS
67
DIMENSION_PAGE_PATH = {

analytics/analytics_package/analytics/sheets_elements.py

+114-10
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
1+
import numpy as np
12
import pandas as pd
23
from .charts import get_data_df
34
from .fields import *
45
from urllib.parse import urlparse
6+
import datetime as dt
57

6-
def get_flat_data_df(analytics_params, metrics, dimensions, remove_matches=None):
8+
def get_flat_data_df(metrics, dimensions, remove_matches=None, **other_params):
79
"""
810
Get a df from the Analytics API with a flat structure (no multiindex).
911
@@ -22,7 +24,7 @@ def get_flat_data_df(analytics_params, metrics, dimensions, remove_matches=None)
2224
df = get_data_df(
2325
metrics,
2426
[dimension["id"] for dimension in dimensions],
25-
**analytics_params,
27+
**other_params,
2628
)
2729
if remove_matches is not None:
2830
for i, match in enumerate([dimension["remove_matches"] for dimension in dimensions]):
@@ -46,20 +48,20 @@ def get_outbound_links_df(analytics_params):
4648
pd.set_option('future.no_silent_downcasting', True)
4749
# Get the builtin "Click" event
4850
df_builtin_links = get_flat_data_df(
49-
analytics_params,
5051
[METRIC_EVENT_COUNT, METRIC_TOTAL_USERS],
5152
[DIMENSION_PAGE_PATH, DIMENSION_BUILTIN_URL, DIMENSION_EVENT_NAME],
52-
remove_matches=[None, r"\s*", None]
53+
remove_matches=[None, r"\s*", None],
54+
**analytics_params,
5355
).groupby(
5456
[DIMENSION_PAGE_PATH["alias"], DIMENSION_BUILTIN_URL["alias"]]
5557
).sum().reset_index()
5658

5759
# Get the custom "outbound_link_click" event
5860
df_custom_links = get_flat_data_df(
59-
analytics_params,
6061
[METRIC_EVENT_COUNT, METRIC_TOTAL_USERS],
6162
[DIMENSION_EVENT_NAME, DIMENSION_CUSTOM_URL, DIMENSION_PAGE_PATH],
6263
remove_matches=[DIMENSION_EVENT_NAME["remove_matches"], r"\(not set\)", None],
64+
**analytics_params,
6365
).groupby(
6466
[DIMENSION_PAGE_PATH["alias"], DIMENSION_CUSTOM_URL["alias"]]
6567
).sum().reset_index()
@@ -117,17 +119,119 @@ def get_outbound_links_change(analytics_params, start_current, end_current, star
117119
"start_date": start_previous,
118120
"end_date": end_previous,
119121
}
120-
print(analytics_params_month_2)
121122
df_current = get_outbound_links_df(analytics_params_month_1).set_index(
122123
["Page Path", "Outbound Link", "Hostname"]
123124
)
124125
df_previous = get_outbound_links_df(analytics_params_month_2).set_index(
125126
["Page Path", "Outbound Link", "Hostname"]
126127
)
128+
total_clicks_percent_change = get_change(
129+
df_current["Total Clicks"],
130+
df_previous["Total Clicks"],
131+
start_current,
132+
end_current,
133+
start_previous,
134+
end_previous
135+
)
136+
total_users_percent_change = get_change(
137+
df_current["Total Users"],
138+
df_previous["Total Users"],
139+
start_current,
140+
end_current,
141+
start_previous,
142+
end_previous
143+
)
144+
df_reindexed = df_current.reindex(total_clicks_percent_change.index).fillna(0)
145+
df_reindexed["Total Clicks Percent Change"] = total_clicks_percent_change
146+
df_reindexed["Total Users Percent Change"] = total_users_percent_change
147+
return df_reindexed.sort_values(["Total Clicks", "Total Users"], ascending=False, kind="stable").reset_index()
148+
149+
def get_page_views_df(analytics_params):
150+
"""
151+
Get a DF with page views from the Analytics API.
152+
153+
:param analytics_params: the parameters for the Analytics API, including authentication and property ids
154+
:return: a DataFrame with the page views from the Analytics API
155+
"""
156+
df_response = get_flat_data_df(
157+
[METRIC_EVENT_COUNT, METRIC_TOTAL_USERS, METRIC_PAGE_VIEW],
158+
[DIMENSION_PAGE_PATH, DIMENSION_EVENT_NAME],
159+
dimension_filter="eventName==page_view",
160+
**analytics_params,
161+
).rename(
162+
columns={
163+
DIMENSION_PAGE_PATH["alias"]: "Page Path",
164+
METRIC_PAGE_VIEW: "Total Views",
165+
METRIC_TOTAL_USERS: "Total Users",
166+
}
167+
)[["Page Path", "Total Views", "Total Users"]].copy()
168+
return df_response
169+
170+
def get_page_views_change(analytics_params, start_current, end_current, start_previous, end_previous):
171+
"""
172+
Get a DF with page views from the Analytics API and a comparison for the prior month
173+
:param analytics_params: the parameters for the Analytics API, including authentication and property ids
174+
:param start_current: the start date for the current month in the format "YYYY-MM-DD"
175+
:param end_current: the end date for the current month
176+
:param start_previous: the start date for the previous month
177+
:param end_previous: the end date for the previous month
178+
"""
179+
analytics_params_current = {
180+
**analytics_params,
181+
"start_date": start_current,
182+
"end_date": end_current,
183+
}
184+
analytics_params_previous = {
185+
**analytics_params,
186+
"start_date": start_previous,
187+
"end_date": end_previous,
188+
}
189+
df_current = get_page_views_df(analytics_params_current).set_index("Page Path")
190+
df_previous = get_page_views_df(analytics_params_previous).set_index("Page Path")
127191
combined_index = df_current.index.union(df_previous.index)
128192
df_current_reindexed = df_current.reindex(combined_index).fillna(0)
129193
df_previous_reindexed = df_previous.reindex(combined_index)
130-
df_current_reindexed["Total Clicks Percent Change"] = (df_current_reindexed["Total Clicks"] / df_previous_reindexed["Total Clicks"]) - 1
131-
df_current_reindexed["Total Users Percent Change"] = (df_current_reindexed["Total Users"] / df_previous_reindexed["Total Users"]) - 1
132-
return df_current_reindexed.sort_values(["Total Clicks", "Total Users"], ascending=False, kind="stable").reset_index()
133-
194+
views_percent_change = get_change(
195+
df_current_reindexed["Total Views"],
196+
df_previous_reindexed["Total Views"],
197+
start_current,
198+
end_current,
199+
start_previous,
200+
end_previous,
201+
)
202+
users_percent_change = get_change(
203+
df_current_reindexed["Total Users"],
204+
df_previous_reindexed["Total Users"],
205+
start_current,
206+
end_current,
207+
start_previous,
208+
end_previous,
209+
)
210+
df_reindexed = df_current.reindex(views_percent_change.index).fillna(0)
211+
df_reindexed["Total Views Percent Change"] = views_percent_change
212+
df_reindexed["Total Users Percent Change"] = users_percent_change
213+
return df_reindexed.sort_values(["Total Views", "Total Users"], ascending=False, kind="stable").reset_index()
214+
215+
def get_change(series_current, series_previous, start_current, end_current, start_previous, end_previous, combined_index = None):
216+
"""
217+
Get the percent change between two serieses, accounting for different numbers of days in the month.
218+
:param series_current: the series representing the current month
219+
:param series_previous: the series representing the prior month
220+
:param start_current: the start date for the current month in the format "YYYY-MM-DD"
221+
:param end_current: the end date for the current month
222+
:param start_previous: the start date for the prior month
223+
:param end_previous: the end date for the prior month
224+
:return: a Series with the change between the two serieses
225+
"""
226+
# Check that both serieses have the same index names
227+
assert series_current.index.names == series_previous.index.names
228+
# Reindex both serieses to have the same index
229+
combined_index = series_current.index.union(series_previous.index)
230+
current_length = float((dt.datetime.fromisoformat(end_current) - dt.datetime.fromisoformat(start_current)).days + 1)
231+
previous_length = float((dt.datetime.fromisoformat(end_previous) - dt.datetime.fromisoformat(start_previous)).days + 1)
232+
assert current_length != 0 and previous_length != 0
233+
series_current_reindexed = series_current.reindex(combined_index).fillna(0)
234+
# Adjust the values from the prior series to account for the different number of days in the month
235+
series_previous_reindexed = (series_previous.reindex(combined_index) * current_length / previous_length)
236+
change = ((series_current_reindexed / series_previous_reindexed) - 1).replace({np.inf: np.nan})
237+
return change

analytics/analytics_package/setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
setup(
44
name="analytics",
5-
version="3.2.0",
5+
version="3.3.0",
66
packages=["analytics"],
77
install_requires=["matplotlib", "pandas", "numpy", "google-auth-oauthlib", "google-api-python-client", "gspread", "gspread-formatting"],
88
)

0 commit comments

Comments
 (0)