5
5
from urllib .parse import urlparse
6
6
import datetime as dt
7
7
8
- def get_flat_data_df (metrics , dimensions , remove_matches = None , ** other_params ):
8
+ def get_flat_data_df (metrics , dimensions , ** other_params ):
9
9
"""
10
10
Get a df from the Analytics API with a flat structure (no multiindex).
11
11
@@ -18,18 +18,11 @@ def get_flat_data_df(metrics, dimensions, remove_matches=None, **other_params):
18
18
19
19
:return: a DataFrame with the data from the Analytics API
20
20
"""
21
- if remove_matches is not None :
22
- assert len (remove_matches ) == len (dimensions )
23
-
24
21
df = get_data_df (
25
22
metrics ,
26
23
[dimension ["id" ] for dimension in dimensions ],
27
24
** other_params ,
28
25
)
29
- if remove_matches is not None :
30
- for i , match in enumerate ([dimension ["remove_matches" ] for dimension in dimensions ]):
31
- if match is not None :
32
- df = df .loc [~ df .index .get_level_values (i ).str .fullmatch (match )]
33
26
return df .reset_index ().rename (columns = get_rename_dict (dimensions )).copy ()
34
27
35
28
def get_rename_dict (dimensions ):
@@ -41,30 +34,33 @@ def get_rename_dict(dimensions):
41
34
def get_outbound_links_df (analytics_params ):
42
35
"""
43
36
Get a DF with outbound links from the Analytics API. Merges the builtin and custom events for outbound links.
37
+ analytics_params cannot currently include a dimension_filter
44
38
45
39
:param analytics_params: the parameters for the Analytics API, including authentication and property ids
46
40
:return: a DataFrame with the outbound links from the Analytics API
47
41
"""
48
42
pd .set_option ('future.no_silent_downcasting' , True )
43
+ assert "dimension_filter" not in analytics_params
49
44
# Get the builtin "Click" event
50
45
df_builtin_links = get_flat_data_df (
51
46
[METRIC_EVENT_COUNT , METRIC_TOTAL_USERS ],
52
47
[DIMENSION_PAGE_PATH , DIMENSION_BUILTIN_URL , DIMENSION_EVENT_NAME ],
53
- remove_matches = [ None , r"\s*" , None ] ,
48
+ dimension_filter = f"eventName== { EVENT_BUILTIN_CLICK } " ,
54
49
** analytics_params ,
55
50
).groupby (
56
51
[DIMENSION_PAGE_PATH ["alias" ], DIMENSION_BUILTIN_URL ["alias" ]]
57
52
).sum ().reset_index ()
58
-
53
+ df_builtin_links . sort_values ( METRIC_EVENT_COUNT , ascending = False ). to_csv ( "test_builtin_links.csv" )
59
54
# Get the custom "outbound_link_click" event
60
55
df_custom_links = get_flat_data_df (
61
56
[METRIC_EVENT_COUNT , METRIC_TOTAL_USERS ],
62
57
[DIMENSION_EVENT_NAME , DIMENSION_CUSTOM_URL , DIMENSION_PAGE_PATH ],
63
- remove_matches = [ DIMENSION_EVENT_NAME [ "remove_matches" ], r"\(not set\)" , None ] ,
58
+ dimension_filter = f"eventName== { EVENT_CUSTOM_CLICK } " ,
64
59
** analytics_params ,
65
60
).groupby (
66
61
[DIMENSION_PAGE_PATH ["alias" ], DIMENSION_CUSTOM_URL ["alias" ]]
67
62
).sum ().reset_index ()
63
+ df_custom_links .sort_values (METRIC_EVENT_COUNT , ascending = False ).to_csv ("test_custom_links.csv" )
68
64
# Concatenate the two dataframes, avoiding duplicates
69
65
# Keep the link from the builtin event, unless the link contains a #fragment, in which case keep the link from the custom event
70
66
df_builtin_links ["builtin" ] = True
@@ -156,7 +152,7 @@ def get_page_views_df(analytics_params):
156
152
df_response = get_flat_data_df (
157
153
[METRIC_EVENT_COUNT , METRIC_TOTAL_USERS , METRIC_PAGE_VIEW ],
158
154
[DIMENSION_PAGE_PATH , DIMENSION_EVENT_NAME ],
159
- dimension_filter = "eventName==page_view " ,
155
+ dimension_filter = f "eventName=={ EVENT_PAGE_VIEW } " ,
160
156
** analytics_params ,
161
157
).rename (
162
158
columns = {
0 commit comments