Skip to content

Commit 6c12a65

Browse files
author
jpaten
committed
chore: made analytics percent change calculation consistent in sheets-elements (#4336)
1 parent bb7b5e2 commit 6c12a65

File tree

1 file changed

+60
-8
lines changed

1 file changed

+60
-8
lines changed

analytics/analytics_package/analytics/sheets_elements.py

+60-8
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import numpy as np
12
import pandas as pd
23
from .charts import get_data_df
34
from .fields import *
@@ -57,10 +58,10 @@ def get_outbound_links_df(analytics_params):
5758

5859
# Get the custom "outbound_link_click" event
5960
df_custom_links = get_flat_data_df(
60-
analytics_params,
6161
[METRIC_EVENT_COUNT, METRIC_TOTAL_USERS],
6262
[DIMENSION_EVENT_NAME, DIMENSION_CUSTOM_URL, DIMENSION_PAGE_PATH],
6363
remove_matches=[DIMENSION_EVENT_NAME["remove_matches"], r"\(not set\)", None],
64+
**analytics_params,
6465
).groupby(
6566
[DIMENSION_PAGE_PATH["alias"], DIMENSION_CUSTOM_URL["alias"]]
6667
).sum().reset_index()
@@ -99,7 +100,7 @@ def get_outbound_links_df(analytics_params):
99100

100101
return df_all_links.copy().reset_index(drop=True)
101102

102-
def get_outbound_links_change(analytics_params, start_current, end_current, start_previous, end_previous):
103+
def get_outbound_links_change_df(analytics_params, start_current, end_current, start_previous, end_previous):
103104
"""
104105
Get a DF with outbound links from the Analytics API and a comparison for the prior month
105106
:param analytics_params: the parameters for the Analytics API, including authentication and property ids
@@ -118,7 +119,6 @@ def get_outbound_links_change(analytics_params, start_current, end_current, star
118119
"start_date": start_previous,
119120
"end_date": end_previous,
120121
}
121-
print(analytics_params_month_2)
122122
df_current = get_outbound_links_df(analytics_params_month_1).set_index(
123123
["Page Path", "Outbound Link", "Hostname"]
124124
)
@@ -128,8 +128,22 @@ def get_outbound_links_change(analytics_params, start_current, end_current, star
128128
combined_index = df_current.index.union(df_previous.index)
129129
df_current_reindexed = df_current.reindex(combined_index).fillna(0)
130130
df_previous_reindexed = df_previous.reindex(combined_index)
131-
df_current_reindexed["Total Clicks Percent Change"] = (df_current_reindexed["Total Clicks"] / df_previous_reindexed["Total Clicks"]) - 1
132-
df_current_reindexed["Total Users Percent Change"] = (df_current_reindexed["Total Users"] / df_previous_reindexed["Total Users"]) - 1
131+
df_current_reindexed["Total Clicks Percent Change"] = get_change(
132+
df_current_reindexed["Total Clicks"],
133+
df_previous_reindexed["Total Clicks"],
134+
start_current,
135+
end_current,
136+
start_previous,
137+
end_previous
138+
)
139+
df_current_reindexed["Total Users Percent Change"] = get_change(
140+
df_current_reindexed["Total Users"],
141+
df_previous_reindexed["Total Users"],
142+
start_current,
143+
end_current,
144+
start_previous,
145+
end_previous
146+
)
133147
return df_current_reindexed.sort_values(["Total Clicks", "Total Users"], ascending=False, kind="stable").reset_index()
134148

135149

@@ -181,7 +195,45 @@ def get_page_views_change_df(analytics_params, start_current, end_current, start
181195
combined_index = df_current.index.union(df_previous.index)
182196
df_current_reindexed = df_current.reindex(combined_index).fillna(0)
183197
df_previous_reindexed = df_previous.reindex(combined_index)
184-
df_current_reindexed["Total Views Percent Change"] = (df_current_reindexed["Total Views"] / df_previous_reindexed["Total Views"]) - 1
185-
df_current_reindexed["Total Users Percent Change"] = (df_current_reindexed["Total Users"] / df_previous_reindexed["Total Users"]) - 1
198+
df_current_reindexed["Total Views Percent Change"] = get_change(
199+
df_current_reindexed["Total Views"],
200+
df_previous_reindexed["Total Views"],
201+
start_current,
202+
end_current,
203+
start_previous,
204+
end_previous,
205+
)
206+
df_current_reindexed["Total Users Percent Change"] = get_change(
207+
df_current_reindexed["Total Users"],
208+
df_previous_reindexed["Total Users"],
209+
start_current,
210+
end_current,
211+
start_previous,
212+
end_previous,
213+
)
186214
return df_current_reindexed.sort_values(["Total Views", "Total Users"], ascending=False, kind="stable").reset_index()
187-
215+
216+
def get_change(series_current, series_previous, start_current, end_current, start_previous, end_previous):
217+
"""
218+
Get the percent change between two serieses, accounting for different numbers of days in the month.
219+
:param series_current: the series representing the current month
220+
:param series_previous: the series representing the prior month
221+
:param start_current: the start date for the current month in the format "YYYY-MM-DD"
222+
:param end_current: the end date for the current month
223+
:param start_previous: the start date for the prior month
224+
:param end_previous: the end date for the prior month
225+
:return: a Series with the change between the two serieses
226+
"""
227+
# Check that both serieses have the same index names
228+
assert series_current.index.names == series_previous.index.names
229+
# Reindex both serieses to have the same index
230+
combined_index = series_current.index.union(series_previous.index)
231+
current_length = float((dt.datetime.fromisoformat(end_current) - dt.datetime.fromisoformat(start_current)).days + 1)
232+
previous_length = float((dt.datetime.fromisoformat(end_previous) - dt.datetime.fromisoformat(start_previous)).days + 1)
233+
assert current_length != 0
234+
assert previous_length != 0
235+
series_current_reindexed = series_current.reindex(combined_index).fillna(0)
236+
# Adjust the values from the prior series to account for the different number of days in the month
237+
series_previous_reindexed = (series_previous.reindex(combined_index) * current_length / previous_length)
238+
change = ((series_previous_reindexed / series_current_reindexed) - 1).replace({np.inf: np.nan})
239+
return change

0 commit comments

Comments
 (0)