1
+ import numpy as np
1
2
import pandas as pd
2
3
from .charts import get_data_df
3
4
from .fields import *
4
5
from urllib .parse import urlparse
6
+ import datetime as dt
5
7
6
- def get_flat_data_df (analytics_params , metrics , dimensions , remove_matches = None ):
8
+ def get_flat_data_df (metrics , dimensions , remove_matches = None , ** other_params ):
7
9
"""
8
10
Get a df from the Analytics API with a flat structure (no multiindex).
9
11
@@ -22,7 +24,7 @@ def get_flat_data_df(analytics_params, metrics, dimensions, remove_matches=None)
22
24
df = get_data_df (
23
25
metrics ,
24
26
[dimension ["id" ] for dimension in dimensions ],
25
- ** analytics_params ,
27
+ ** other_params ,
26
28
)
27
29
if remove_matches is not None :
28
30
for i , match in enumerate ([dimension ["remove_matches" ] for dimension in dimensions ]):
@@ -46,20 +48,20 @@ def get_outbound_links_df(analytics_params):
46
48
pd .set_option ('future.no_silent_downcasting' , True )
47
49
# Get the builtin "Click" event
48
50
df_builtin_links = get_flat_data_df (
49
- analytics_params ,
50
51
[METRIC_EVENT_COUNT , METRIC_TOTAL_USERS ],
51
52
[DIMENSION_PAGE_PATH , DIMENSION_BUILTIN_URL , DIMENSION_EVENT_NAME ],
52
- remove_matches = [None , r"\s*" , None ]
53
+ remove_matches = [None , r"\s*" , None ],
54
+ ** analytics_params ,
53
55
).groupby (
54
56
[DIMENSION_PAGE_PATH ["alias" ], DIMENSION_BUILTIN_URL ["alias" ]]
55
57
).sum ().reset_index ()
56
58
57
59
# Get the custom "outbound_link_click" event
58
60
df_custom_links = get_flat_data_df (
59
- analytics_params ,
60
61
[METRIC_EVENT_COUNT , METRIC_TOTAL_USERS ],
61
62
[DIMENSION_EVENT_NAME , DIMENSION_CUSTOM_URL , DIMENSION_PAGE_PATH ],
62
63
remove_matches = [DIMENSION_EVENT_NAME ["remove_matches" ], r"\(not set\)" , None ],
64
+ ** analytics_params ,
63
65
).groupby (
64
66
[DIMENSION_PAGE_PATH ["alias" ], DIMENSION_CUSTOM_URL ["alias" ]]
65
67
).sum ().reset_index ()
@@ -117,17 +119,119 @@ def get_outbound_links_change(analytics_params, start_current, end_current, star
117
119
"start_date" : start_previous ,
118
120
"end_date" : end_previous ,
119
121
}
120
- print (analytics_params_month_2 )
121
122
df_current = get_outbound_links_df (analytics_params_month_1 ).set_index (
122
123
["Page Path" , "Outbound Link" , "Hostname" ]
123
124
)
124
125
df_previous = get_outbound_links_df (analytics_params_month_2 ).set_index (
125
126
["Page Path" , "Outbound Link" , "Hostname" ]
126
127
)
128
+ total_clicks_percent_change = get_change (
129
+ df_current ["Total Clicks" ],
130
+ df_previous ["Total Clicks" ],
131
+ start_current ,
132
+ end_current ,
133
+ start_previous ,
134
+ end_previous
135
+ )
136
+ total_users_percent_change = get_change (
137
+ df_current ["Total Users" ],
138
+ df_previous ["Total Users" ],
139
+ start_current ,
140
+ end_current ,
141
+ start_previous ,
142
+ end_previous
143
+ )
144
+ df_reindexed = df_current .reindex (total_clicks_percent_change .index ).fillna (0 )
145
+ df_reindexed ["Total Clicks Percent Change" ] = total_clicks_percent_change
146
+ df_reindexed ["Total Users Percent Change" ] = total_users_percent_change
147
+ return df_reindexed .sort_values (["Total Clicks" , "Total Users" ], ascending = False , kind = "stable" ).reset_index ()
148
+
149
+ def get_page_views_df (analytics_params ):
150
+ """
151
+ Get a DF with page views from the Analytics API.
152
+
153
+ :param analytics_params: the parameters for the Analytics API, including authentication and property ids
154
+ :return: a DataFrame with the page views from the Analytics API
155
+ """
156
+ df_response = get_flat_data_df (
157
+ [METRIC_EVENT_COUNT , METRIC_TOTAL_USERS , METRIC_PAGE_VIEW ],
158
+ [DIMENSION_PAGE_PATH , DIMENSION_EVENT_NAME ],
159
+ dimension_filter = "eventName==page_view" ,
160
+ ** analytics_params ,
161
+ ).rename (
162
+ columns = {
163
+ DIMENSION_PAGE_PATH ["alias" ]: "Page Path" ,
164
+ METRIC_PAGE_VIEW : "Total Views" ,
165
+ METRIC_TOTAL_USERS : "Total Users" ,
166
+ }
167
+ )[["Page Path" , "Total Views" , "Total Users" ]].copy ()
168
+ return df_response
169
+
170
+ def get_page_views_change (analytics_params , start_current , end_current , start_previous , end_previous ):
171
+ """
172
+ Get a DF with page views from the Analytics API and a comparison for the prior month
173
+ :param analytics_params: the parameters for the Analytics API, including authentication and property ids
174
+ :param start_current: the start date for the current month in the format "YYYY-MM-DD"
175
+ :param end_current: the end date for the current month
176
+ :param start_previous: the start date for the previous month
177
+ :param end_previous: the end date for the previous month
178
+ """
179
+ analytics_params_current = {
180
+ ** analytics_params ,
181
+ "start_date" : start_current ,
182
+ "end_date" : end_current ,
183
+ }
184
+ analytics_params_previous = {
185
+ ** analytics_params ,
186
+ "start_date" : start_previous ,
187
+ "end_date" : end_previous ,
188
+ }
189
+ df_current = get_page_views_df (analytics_params_current ).set_index ("Page Path" )
190
+ df_previous = get_page_views_df (analytics_params_previous ).set_index ("Page Path" )
127
191
combined_index = df_current .index .union (df_previous .index )
128
192
df_current_reindexed = df_current .reindex (combined_index ).fillna (0 )
129
193
df_previous_reindexed = df_previous .reindex (combined_index )
130
- df_current_reindexed ["Total Clicks Percent Change" ] = (df_current_reindexed ["Total Clicks" ] / df_previous_reindexed ["Total Clicks" ]) - 1
131
- df_current_reindexed ["Total Users Percent Change" ] = (df_current_reindexed ["Total Users" ] / df_previous_reindexed ["Total Users" ]) - 1
132
- return df_current_reindexed .sort_values (["Total Clicks" , "Total Users" ], ascending = False , kind = "stable" ).reset_index ()
133
-
194
+ views_percent_change = get_change (
195
+ df_current_reindexed ["Total Views" ],
196
+ df_previous_reindexed ["Total Views" ],
197
+ start_current ,
198
+ end_current ,
199
+ start_previous ,
200
+ end_previous ,
201
+ )
202
+ users_percent_change = get_change (
203
+ df_current_reindexed ["Total Users" ],
204
+ df_previous_reindexed ["Total Users" ],
205
+ start_current ,
206
+ end_current ,
207
+ start_previous ,
208
+ end_previous ,
209
+ )
210
+ df_reindexed = df_current .reindex (views_percent_change .index ).fillna (0 )
211
+ df_reindexed ["Total Views Percent Change" ] = views_percent_change
212
+ df_reindexed ["Total Users Percent Change" ] = users_percent_change
213
+ return df_reindexed .sort_values (["Total Views" , "Total Users" ], ascending = False , kind = "stable" ).reset_index ()
214
+
215
+ def get_change (series_current , series_previous , start_current , end_current , start_previous , end_previous , combined_index = None ):
216
+ """
217
+ Get the percent change between two serieses, accounting for different numbers of days in the month.
218
+ :param series_current: the series representing the current month
219
+ :param series_previous: the series representing the prior month
220
+ :param start_current: the start date for the current month in the format "YYYY-MM-DD"
221
+ :param end_current: the end date for the current month
222
+ :param start_previous: the start date for the prior month
223
+ :param end_previous: the end date for the prior month
224
+ :return: a Series with the change between the two serieses
225
+ """
226
+ # Check that both serieses have the same index names
227
+ assert series_current .index .names == series_previous .index .names
228
+ # Reindex both serieses to have the same index
229
+ combined_index = series_current .index .union (series_previous .index )
230
+ current_length = float ((dt .datetime .fromisoformat (end_current ) - dt .datetime .fromisoformat (start_current )).days + 1 )
231
+ previous_length = float ((dt .datetime .fromisoformat (end_previous ) - dt .datetime .fromisoformat (start_previous )).days + 1 )
232
+ assert current_length != 0 and previous_length != 0
233
+ series_current_reindexed = series_current .reindex (combined_index ).fillna (0 )
234
+ # Adjust the values from the prior series to account for the different number of days in the month
235
+ series_previous_reindexed = (series_previous .reindex (combined_index ) * current_length / previous_length )
236
+ change = ((series_current_reindexed / series_previous_reindexed ) - 1 ).replace ({np .inf : np .nan })
237
+ return change
0 commit comments