Skip to content

Commit 2b97b4c

Browse files
author
jpaten
committed
chore: finished refactor (#4305)
1 parent b072606 commit 2b97b4c

File tree

3 files changed

+45
-31
lines changed

3 files changed

+45
-31
lines changed

analytics/analytics_package/analytics/api.py

+6
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,12 @@
2424
lambda service, params: service.reports().query(**params).execute()
2525
)
2626

27+
drive_service_params = (
28+
["https://www.googleapis.com/auth/drive", "https://www.googleapis.com/auth/spreadsheets"],
29+
"drive", "v3",
30+
{},
31+
)
32+
2733
next_port = None
2834
default_service_system = None
2935

analytics/analytics_package/analytics/sheets_api.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ class WORKSHEET_OVERRIDE_BEHAVIORS(Enum):
1616

1717
FONT_SIZE_PTS = 10
1818
PTS_PIXELS_RATIO = 4/3
19-
DEFAULT_BUFFER_CHARS = 4
19+
DEFAULT_BUFFER_CHARS = 2
2020

2121
def extract_credentials(authentication_response):
2222
"""Extracts the credentials from the tuple from api.authenticate"""
@@ -189,8 +189,13 @@ def fill_worksheet_with_df(
189189
lambda column_name: df[column_name].astype(str).str.len().max()
190190
)
191191
header_widths = df.columns.str.len()
192+
buffer_chars = (
193+
DEFAULT_BUFFER_CHARS
194+
if ("column_widths" not in options or "buffer_chars" not in options["column_widths"])
195+
else options["column_widths"]["buffer_chars"]
196+
)
192197
column_widths = [
193-
round((max(len_tuple) + options["column_widths"]["buffer_chars"]) * FONT_SIZE_PTS * 1/PTS_PIXELS_RATIO)
198+
round((max(len_tuple) + buffer_chars) * FONT_SIZE_PTS * 1/PTS_PIXELS_RATIO)
194199
for len_tuple in zip(text_widths, header_widths)
195200
]
196201
column_positions = [
@@ -217,7 +222,7 @@ def fill_worksheet_with_df(
217222
if "Sheet1" in [i.title for i in sheet.worksheets()]:
218223
sheet.del_worksheet(sheet.worksheet("Sheet1"))
219224

220-
def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior):
225+
def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, options={}):
221226
"""
222227
Fill a sheet with the contents of a dictionary of DataFrames.
223228
The keys of the dictionary are the names of the worksheets, and the values contain the data to be placed in the sheet.
@@ -226,6 +231,8 @@ def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior):
226231
:param sheet: the gspread.Spreadsheet object
227232
:param df_dict: the dictionary of DataFrames to fill the worksheets with
228233
:param overlapBehavior: the behavior to take if any of the worksheets already exist
234+
:param options: the formatting options for the worksheets.
235+
Should be a dictionary with optional elements "bold_header", "center_header", "freeze_header", and "column_widths", optional
229236
"""
230237
if overlapBehavior == WORKSHEET_OVERRIDE_BEHAVIORS.EXIT:
231238
for worksheet_name in df_dict.keys():
@@ -235,5 +242,5 @@ def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior):
235242
except gspread.exceptions.WorksheetNotFound:
236243
pass
237244
for worksheet_name, df in df_dict.items():
238-
fill_worksheet_with_df(sheet, df, worksheet_name, overlapBehavior)
245+
fill_worksheet_with_df(sheet, df, worksheet_name, overlapBehavior, options=options)
239246

Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import pandas as pd
22
from .charts import get_data_df
3+
from .fields import *
34
from urllib.parse import urlparse
45

56
def get_flat_data_df(analytics_params, metrics, dimensions, remove_matches=None):
@@ -20,15 +21,20 @@ def get_flat_data_df(analytics_params, metrics, dimensions, remove_matches=None)
2021

2122
df = get_data_df(
2223
metrics,
23-
dimensions,
24+
[dimension["id"] for dimension in dimensions],
2425
**analytics_params,
2526
)
2627
if remove_matches is not None:
27-
for i, match in enumerate(remove_matches):
28+
for i, match in enumerate([dimension["remove_matches"] for dimension in dimensions]):
2829
if match is not None:
2930
df = df.loc[~df.index.get_level_values(i).str.fullmatch(match)]
30-
return df.reset_index().copy()
31+
return df.reset_index().rename(columns=get_rename_dict(dimensions)).copy()
3132

33+
def get_rename_dict(dimensions):
34+
"""Get a dictionary to rename the columns of a DataFrame."""
35+
return dict(
36+
zip([dimension["id"] for dimension in dimensions], [dimension["alias"] for dimension in dimensions])
37+
)
3238

3339
def get_outbound_sheets_df(analytics_params):
3440
"""
@@ -41,42 +47,38 @@ def get_outbound_sheets_df(analytics_params):
4147
# Get the builtin "Click" event
4248
df_builtin_links = get_flat_data_df(
4349
analytics_params,
44-
["eventCount", "totalUsers"],
45-
["pagePath", "linkUrl", "eventName"],
50+
[METRIC_EVENT_COUNT, METRIC_TOTAL_USERS],
51+
[DIMENSION_PAGE_PATH, DIMENSION_BUILTIN_URL, DIMENSION_EVENT_NAME],
4652
remove_matches=[None, r"\s*", None]
4753
).groupby(
48-
["pagePath", "linkUrl"]
49-
).sum().reset_index().rename(
50-
columns={"linkUrl": "builtin_url"}
51-
)
54+
[DIMENSION_PAGE_PATH["alias"], DIMENSION_BUILTIN_URL["alias"]]
55+
).sum().reset_index()
5256

5357
# Get the custom "outbound_link_click" event
5458
df_custom_links = get_flat_data_df(
5559
analytics_params,
56-
["eventCount", "totalUsers"],
57-
["pagePath", "customEvent:click_url", "eventName"],
58-
remove_matches=[None, r"\(not set\)", None],
60+
[METRIC_EVENT_COUNT, METRIC_TOTAL_USERS],
61+
[DIMENSION_EVENT_NAME, DIMENSION_CUSTOM_URL, DIMENSION_PAGE_PATH],
62+
remove_matches=[DIMENSION_EVENT_NAME["remove_matches"], r"\(not set\)", None],
5963
).groupby(
60-
["pagePath", "customEvent:click_url"]
61-
).sum().reset_index().rename(
62-
columns={"customEvent:click_url": "outbound_url"}
63-
)
64+
[DIMENSION_PAGE_PATH["alias"], DIMENSION_CUSTOM_URL["alias"]]
65+
).sum().reset_index()
6466
# Concatenate the two dataframes, avoiding duplicates
6567
# Keep the link from the builtin event, unless the link contains a #fragment, in which case keep the link from the custom event
6668
df_builtin_links["builtin"] = True
67-
df_builtin_links["truncated_url"] = df_builtin_links["builtin_url"]
68-
df_custom_links["truncated_url"] = df_custom_links["outbound_url"].str.replace(r"#.*", "", regex=True)
69-
df_outbound_links_fragments = df_custom_links.loc[df_custom_links["outbound_url"].str.contains("#")]
69+
df_builtin_links["truncated_url"] = df_builtin_links[DIMENSION_BUILTIN_URL["alias"]]
70+
df_custom_links["truncated_url"] = df_custom_links[DIMENSION_CUSTOM_URL["alias"]].str.replace(r"#.*", "", regex=True)
71+
df_outbound_links_fragments = df_custom_links.loc[df_custom_links[DIMENSION_CUSTOM_URL["alias"]].str.contains("#")].copy()
7072
df_outbound_links_fragments["is_fragment"] = True
7173
df_all_links = pd.concat(
7274
[df_builtin_links, df_outbound_links_fragments], ignore_index=True
7375
)
76+
# Use the builtin link, unless the link is not in the custom links, in which case use the custom link
7477
df_all_links = df_all_links.loc[
7578
~(df_all_links["truncated_url"].isin(df_outbound_links_fragments["truncated_url"]) & df_all_links["builtin"])
76-
].sort_values("eventCount", ascending=False)
77-
# Determine whther a link is a fragment or an outbound link
78-
df_all_links["outbound"] = df_all_links["truncated_url"].isin(df_custom_links["truncated_url"])
79+
].sort_values(METRIC_EVENT_COUNT, ascending=False)
7980
df_all_links["is_fragment"] = df_all_links["is_fragment"].fillna(False).astype(bool)
81+
# Use the builtin link, unless the link is a fragment, in which case use the custom link
8082
df_all_links["complete_url"] = df_all_links["builtin_url"].where(
8183
~df_all_links["is_fragment"],
8284
df_all_links["outbound_url"]
@@ -86,12 +88,11 @@ def get_outbound_sheets_df(analytics_params):
8688
columns=["builtin_url", "outbound_url", "builtin", "is_fragment"]
8789
).rename(
8890
columns={
89-
"pagePath": "Page Path",
91+
DIMENSION_PAGE_PATH["alias"]: "Page Path",
9092
"complete_url": "Outbound Link",
91-
"eventCount": "Total Clicks",
92-
"totalUsers": "Total Users",
93-
"outbound": "Is Outbound",
93+
METRIC_EVENT_COUNT: "Total Clicks",
94+
METRIC_TOTAL_USERS: "Total Users",
9495
"hostname": "Hostname",
9596
}
96-
)[["Page Path", "Hostname", "Outbound Link", "Total Clicks", "Total Users", "Is Outbound"]]
97+
)[["Page Path", "Hostname", "Outbound Link", "Total Clicks", "Total Users"]]
9798
return df_all_links.copy().reset_index(drop=True)

0 commit comments

Comments
 (0)