Skip to content

Commit 372145c

Browse files
author
jpaten
committed
fix: switched ga result filtering from results to dimension filters (#4351)
1 parent 1aec654 commit 372145c

File tree

2 files changed

+13
-16
lines changed

2 files changed

+13
-16
lines changed

analytics/analytics_package/analytics/fields.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -3,24 +3,25 @@
33
METRIC_TOTAL_USERS = 'totalUsers'
44
METRIC_PAGE_VIEW = 'screenPageViews'
55

6+
# Event Names
7+
EVENT_BUILTIN_CLICK = "click"
8+
EVENT_CUSTOM_CLICK = "outbound_link_clicked"
9+
EVENT_PAGE_VIEW = "page_view"
10+
611
# DIMENSIONS
712
DIMENSION_PAGE_PATH = {
813
'id': 'pagePath',
914
'alias': 'page_path',
10-
'remove_matches': None,
1115
}
1216
DIMENSION_BUILTIN_URL = {
1317
'id': 'linkUrl',
1418
'alias': 'builtin_url',
15-
'remove_matches': r"\s*",
1619
}
1720
DIMENSION_EVENT_NAME = {
1821
'id': 'eventName',
1922
'alias': 'event_name',
20-
'remove_matches': None,
2123
}
2224
DIMENSION_CUSTOM_URL = {
2325
'id': 'customEvent:click_url',
2426
'alias': 'outbound_url',
25-
'remove_matches': r"\(not set\)",
2627
}

analytics/analytics_package/analytics/sheets_elements.py

+8-12
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from urllib.parse import urlparse
66
import datetime as dt
77

8-
def get_flat_data_df(metrics, dimensions, remove_matches=None, **other_params):
8+
def get_flat_data_df(metrics, dimensions, **other_params):
99
"""
1010
Get a df from the Analytics API with a flat structure (no multiindex).
1111
@@ -18,18 +18,11 @@ def get_flat_data_df(metrics, dimensions, remove_matches=None, **other_params):
1818
1919
:return: a DataFrame with the data from the Analytics API
2020
"""
21-
if remove_matches is not None:
22-
assert len(remove_matches) == len(dimensions)
23-
2421
df = get_data_df(
2522
metrics,
2623
[dimension["id"] for dimension in dimensions],
2724
**other_params,
2825
)
29-
if remove_matches is not None:
30-
for i, match in enumerate([dimension["remove_matches"] for dimension in dimensions]):
31-
if match is not None:
32-
df = df.loc[~df.index.get_level_values(i).str.fullmatch(match)]
3326
return df.reset_index().rename(columns=get_rename_dict(dimensions)).copy()
3427

3528
def get_rename_dict(dimensions):
@@ -41,30 +34,33 @@ def get_rename_dict(dimensions):
4134
def get_outbound_links_df(analytics_params):
4235
"""
4336
Get a DF with outbound links from the Analytics API. Merges the builtin and custom events for outbound links.
37+
analytics_params cannot currently include a dimension_filter
4438
4539
:param analytics_params: the parameters for the Analytics API, including authentication and property ids
4640
:return: a DataFrame with the outbound links from the Analytics API
4741
"""
4842
pd.set_option('future.no_silent_downcasting', True)
43+
assert "dimension_filter" not in analytics_params
4944
# Get the builtin "Click" event
5045
df_builtin_links = get_flat_data_df(
5146
[METRIC_EVENT_COUNT, METRIC_TOTAL_USERS],
5247
[DIMENSION_PAGE_PATH, DIMENSION_BUILTIN_URL, DIMENSION_EVENT_NAME],
53-
remove_matches=[None, r"\s*", None],
48+
dimension_filter=f"eventName=={EVENT_BUILTIN_CLICK}",
5449
**analytics_params,
5550
).groupby(
5651
[DIMENSION_PAGE_PATH["alias"], DIMENSION_BUILTIN_URL["alias"]]
5752
).sum().reset_index()
58-
53+
df_builtin_links.sort_values(METRIC_EVENT_COUNT, ascending=False).to_csv("test_builtin_links.csv")
5954
# Get the custom "outbound_link_click" event
6055
df_custom_links = get_flat_data_df(
6156
[METRIC_EVENT_COUNT, METRIC_TOTAL_USERS],
6257
[DIMENSION_EVENT_NAME, DIMENSION_CUSTOM_URL, DIMENSION_PAGE_PATH],
63-
remove_matches=[DIMENSION_EVENT_NAME["remove_matches"], r"\(not set\)", None],
58+
dimension_filter=f"eventName=={EVENT_CUSTOM_CLICK}",
6459
**analytics_params,
6560
).groupby(
6661
[DIMENSION_PAGE_PATH["alias"], DIMENSION_CUSTOM_URL["alias"]]
6762
).sum().reset_index()
63+
df_custom_links.sort_values(METRIC_EVENT_COUNT, ascending=False).to_csv("test_custom_links.csv")
6864
# Concatenate the two dataframes, avoiding duplicates
6965
# Keep the link from the builtin event, unless the link contains a #fragment, in which case keep the link from the custom event
7066
df_builtin_links["builtin"] = True
@@ -156,7 +152,7 @@ def get_page_views_df(analytics_params):
156152
df_response = get_flat_data_df(
157153
[METRIC_EVENT_COUNT, METRIC_TOTAL_USERS, METRIC_PAGE_VIEW],
158154
[DIMENSION_PAGE_PATH, DIMENSION_EVENT_NAME],
159-
dimension_filter="eventName==page_view",
155+
dimension_filter=f"eventName=={EVENT_PAGE_VIEW}",
160156
**analytics_params,
161157
).rename(
162158
columns={

0 commit comments

Comments
 (0)