1
1
import pandas as pd
2
2
from .charts import get_data_df
3
+ from .fields import *
3
4
from urllib .parse import urlparse
4
5
5
6
def get_flat_data_df (analytics_params , metrics , dimensions , remove_matches = None ):
@@ -20,15 +21,20 @@ def get_flat_data_df(analytics_params, metrics, dimensions, remove_matches=None)
20
21
21
22
df = get_data_df (
22
23
metrics ,
23
- dimensions ,
24
+ [ dimension [ "id" ] for dimension in dimensions ] ,
24
25
** analytics_params ,
25
26
)
26
27
if remove_matches is not None :
27
- for i , match in enumerate (remove_matches ):
28
+ for i , match in enumerate ([ dimension [ " remove_matches" ] for dimension in dimensions ] ):
28
29
if match is not None :
29
30
df = df .loc [~ df .index .get_level_values (i ).str .fullmatch (match )]
30
- return df .reset_index ().copy ()
31
+ return df .reset_index ().rename ( columns = get_rename_dict ( dimensions )). copy ()
31
32
33
+ def get_rename_dict (dimensions ):
34
+ """Get a dictionary to rename the columns of a DataFrame."""
35
+ return dict (
36
+ zip ([dimension ["id" ] for dimension in dimensions ], [dimension ["alias" ] for dimension in dimensions ])
37
+ )
32
38
33
39
def get_outbound_sheets_df (analytics_params ):
34
40
"""
@@ -41,42 +47,38 @@ def get_outbound_sheets_df(analytics_params):
41
47
# Get the builtin "Click" event
42
48
df_builtin_links = get_flat_data_df (
43
49
analytics_params ,
44
- ["eventCount" , "totalUsers" ],
45
- ["pagePath" , "linkUrl" , "eventName" ],
50
+ [METRIC_EVENT_COUNT , METRIC_TOTAL_USERS ],
51
+ [DIMENSION_PAGE_PATH , DIMENSION_BUILTIN_URL , DIMENSION_EVENT_NAME ],
46
52
remove_matches = [None , r"\s*" , None ]
47
53
).groupby (
48
- ["pagePath" , "linkUrl" ]
49
- ).sum ().reset_index ().rename (
50
- columns = {"linkUrl" : "builtin_url" }
51
- )
54
+ [DIMENSION_PAGE_PATH ["alias" ], DIMENSION_BUILTIN_URL ["alias" ]]
55
+ ).sum ().reset_index ()
52
56
53
57
# Get the custom "outbound_link_click" event
54
58
df_custom_links = get_flat_data_df (
55
59
analytics_params ,
56
- ["eventCount" , "totalUsers" ],
57
- ["pagePath" , "customEvent:click_url" , "eventName" ],
58
- remove_matches = [None , r"\(not set\)" , None ],
60
+ [METRIC_EVENT_COUNT , METRIC_TOTAL_USERS ],
61
+ [DIMENSION_EVENT_NAME , DIMENSION_CUSTOM_URL , DIMENSION_PAGE_PATH ],
62
+ remove_matches = [DIMENSION_EVENT_NAME [ "remove_matches" ] , r"\(not set\)" , None ],
59
63
).groupby (
60
- ["pagePath" , "customEvent:click_url" ]
61
- ).sum ().reset_index ().rename (
62
- columns = {"customEvent:click_url" : "outbound_url" }
63
- )
64
+ [DIMENSION_PAGE_PATH ["alias" ], DIMENSION_CUSTOM_URL ["alias" ]]
65
+ ).sum ().reset_index ()
64
66
# Concatenate the two dataframes, avoiding duplicates
65
67
# Keep the link from the builtin event, unless the link contains a #fragment, in which case keep the link from the custom event
66
68
df_builtin_links ["builtin" ] = True
67
- df_builtin_links ["truncated_url" ] = df_builtin_links ["builtin_url" ]
68
- df_custom_links ["truncated_url" ] = df_custom_links ["outbound_url" ].str .replace (r"#.*" , "" , regex = True )
69
- df_outbound_links_fragments = df_custom_links .loc [df_custom_links ["outbound_url" ] .str .contains ("#" )]
69
+ df_builtin_links ["truncated_url" ] = df_builtin_links [DIMENSION_BUILTIN_URL [ "alias" ] ]
70
+ df_custom_links ["truncated_url" ] = df_custom_links [DIMENSION_CUSTOM_URL [ "alias" ] ].str .replace (r"#.*" , "" , regex = True )
71
+ df_outbound_links_fragments = df_custom_links .loc [df_custom_links [DIMENSION_CUSTOM_URL [ "alias" ]] .str .contains ("#" )]. copy ()
70
72
df_outbound_links_fragments ["is_fragment" ] = True
71
73
df_all_links = pd .concat (
72
74
[df_builtin_links , df_outbound_links_fragments ], ignore_index = True
73
75
)
76
+ # Use the builtin link, unless the link is not in the custom links, in which case use the custom link
74
77
df_all_links = df_all_links .loc [
75
78
~ (df_all_links ["truncated_url" ].isin (df_outbound_links_fragments ["truncated_url" ]) & df_all_links ["builtin" ])
76
- ].sort_values ("eventCount" , ascending = False )
77
- # Determine whther a link is a fragment or an outbound link
78
- df_all_links ["outbound" ] = df_all_links ["truncated_url" ].isin (df_custom_links ["truncated_url" ])
79
+ ].sort_values (METRIC_EVENT_COUNT , ascending = False )
79
80
df_all_links ["is_fragment" ] = df_all_links ["is_fragment" ].fillna (False ).astype (bool )
81
+ # Use the builtin link, unless the link is a fragment, in which case use the custom link
80
82
df_all_links ["complete_url" ] = df_all_links ["builtin_url" ].where (
81
83
~ df_all_links ["is_fragment" ],
82
84
df_all_links ["outbound_url" ]
@@ -86,12 +88,11 @@ def get_outbound_sheets_df(analytics_params):
86
88
columns = ["builtin_url" , "outbound_url" , "builtin" , "is_fragment" ]
87
89
).rename (
88
90
columns = {
89
- "pagePath" : "Page Path" ,
91
+ DIMENSION_PAGE_PATH [ "alias" ] : "Page Path" ,
90
92
"complete_url" : "Outbound Link" ,
91
- "eventCount" : "Total Clicks" ,
92
- "totalUsers" : "Total Users" ,
93
- "outbound" : "Is Outbound" ,
93
+ METRIC_EVENT_COUNT : "Total Clicks" ,
94
+ METRIC_TOTAL_USERS : "Total Users" ,
94
95
"hostname" : "Hostname" ,
95
96
}
96
- )[["Page Path" , "Hostname" , "Outbound Link" , "Total Clicks" , "Total Users" , "Is Outbound" ]]
97
+ )[["Page Path" , "Hostname" , "Outbound Link" , "Total Clicks" , "Total Users" ]]
97
98
return df_all_links .copy ().reset_index (drop = True )
0 commit comments