Skip to content

Commit eef1cee

Browse files
author
Jonah Paten
authored
feat: add percent change to analytics package (#4335)
* feat: added percent change column and formatting (#4331) * chore: updated ga package version (#4331) * fix: made argument names consistent (#4331)
1 parent d2ac666 commit eef1cee

File tree

3 files changed

+131
-29
lines changed

3 files changed

+131
-29
lines changed

analytics/analytics_package/analytics/sheets_api.py

Lines changed: 93 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,14 @@
22
import gspread_formatting
33
from enum import Enum
44
from googleapiclient.discovery import build
5+
import numpy as np
6+
7+
FONT_SIZE_PTS = 10
8+
PTS_PIXELS_RATIO = 4/3
9+
DEFAULT_BUFFER_CHARS = 2
10+
GREEN_COLOR = "#00FF00"
11+
RED_COLOR = "#FF0000"
12+
513

614
class FILE_OVERRIDE_BEHAVIORS(Enum):
715
OVERRIDE_IF_IN_SAME_PLACE = 1
@@ -12,9 +20,17 @@ class WORKSHEET_OVERRIDE_BEHAVIORS(Enum):
1220
OVERRIDE = 1
1321
EXIT = 2
1422

15-
FONT_SIZE_PTS = 10
16-
PTS_PIXELS_RATIO = 4/3
17-
DEFAULT_BUFFER_CHARS = 2
23+
class COLUMN_FORMAT_OPTIONS(Enum):
24+
DEFAULT = 1
25+
PERCENT_UNCOLORED = 2
26+
PERCENT_COLORED = 3
27+
28+
DEFAULT_SHEET_FORMATTING_OPTIONS = {
29+
"bold_header": True,
30+
"center_header": True,
31+
"freeze_header": True,
32+
"column_widths": {"justify": True, "buffer_chars": DEFAULT_BUFFER_CHARS}
33+
}
1834

1935
def extract_credentials(authentication_response):
2036
"""Extracts the credentials from the tuple from api.authenticate"""
@@ -141,17 +157,14 @@ def create_sheet_in_folder(authentication_response, sheet_name, parent_folder_na
141157
# Open new file
142158
return gc.open_by_key(spread_id)
143159

160+
144161
def fill_worksheet_with_df(
145162
sheet,
146163
df,
147164
worksheet_name,
148165
overlapBehavior,
149-
options={
150-
"bold_header": True,
151-
"center_header": True,
152-
"freeze_header": True,
153-
"column_widths": {"justify": True, "buffer_chars": DEFAULT_BUFFER_CHARS}
154-
}
166+
sheet_formatting_options=DEFAULT_SHEET_FORMATTING_OPTIONS,
167+
column_formatting_options={}
155168
):
156169
"""
157170
Fill a worksheet with the contents of a DataFrame.
@@ -162,8 +175,10 @@ def fill_worksheet_with_df(
162175
:param df: the DataFrame to fill the worksheet with
163176
:param worksheet_name: the name of the worksheet to fill. Cannot be "Sheet1"
164177
:param overlapBehavior: the behavior to take if the worksheet already exists.
165-
:param options: the formatting options for the worksheet.
178+
:param sheet_formatting_options: the formatting options for the worksheet.
166179
Should be a dictionary with optional elements "bold_header", "center_header", "freeze_header", and "column_widths", optional
180+
:param column_formatting_options: the column formatting options for the worksheet.
181+
Should be a dictionary with dataframe columns as keys and instances of COLUMN_FORMAT_OPTIONS as values, optional
167182
"""
168183
# Sheet1 is special since it's created by default, so it's not allowed
169184
assert worksheet_name != "Sheet1"
@@ -179,19 +194,19 @@ def fill_worksheet_with_df(
179194
)
180195

181196
# Add data to worksheet
182-
worksheet.update([df.columns.values.tolist()] + df.values.tolist())
197+
worksheet.update([df.columns.values.tolist()] + df.fillna("NA").values.tolist())
183198

184199
# Format worksheet
185200
# Justify Column Widths
186-
if "column_widths" not in options or options["column_widths"]["justify"]:
201+
if "column_widths" not in sheet_formatting_options or sheet_formatting_options["column_widths"]["justify"]:
187202
text_widths = df.astype(str).columns.map(
188203
lambda column_name: df[column_name].astype(str).str.len().max()
189204
)
190205
header_widths = df.columns.str.len()
191206
buffer_chars = (
192207
DEFAULT_BUFFER_CHARS
193-
if ("column_widths" not in options or "buffer_chars" not in options["column_widths"])
194-
else options["column_widths"]["buffer_chars"]
208+
if ("column_widths" not in sheet_formatting_options or "buffer_chars" not in sheet_formatting_options["column_widths"])
209+
else sheet_formatting_options["column_widths"]["buffer_chars"]
195210
)
196211
column_widths = [
197212
round((max(len_tuple) + buffer_chars) * FONT_SIZE_PTS * 1/PTS_PIXELS_RATIO)
@@ -202,26 +217,71 @@ def fill_worksheet_with_df(
202217
]
203218
gspread_formatting.set_column_widths(worksheet, zip(column_positions, column_widths))
204219
# Freeze Header
205-
if "freeze_header" not in options or options["freeze_header"]:
220+
if "freeze_header" not in sheet_formatting_options or sheet_formatting_options["freeze_header"]:
206221
gspread_formatting.set_frozen(worksheet, rows=1)
207-
format_options = gspread_formatting.CellFormat()
222+
base_format_options = gspread_formatting.CellFormat()
208223
# Bold Header
209-
if "bold_header" not in options or options["bold_header"]:
210-
format_options += gspread_formatting.CellFormat(textFormat=gspread_formatting.TextFormat(bold=True))
224+
if "bold_header" not in sheet_formatting_options or sheet_formatting_options["bold_header"]:
225+
base_format_options += gspread_formatting.CellFormat(textFormat=gspread_formatting.TextFormat(bold=True))
211226
# Center Header
212-
if "center_header" not in options or options["center_header"]:
213-
format_options += gspread_formatting.CellFormat(horizontalAlignment="CENTER")
227+
if "center_header" not in sheet_formatting_options or sheet_formatting_options["center_header"]:
228+
base_format_options += gspread_formatting.CellFormat(horizontalAlignment="CENTER")
229+
# Handle column specific formatting
230+
for column in column_formatting_options:
231+
if column not in df.columns:
232+
raise KeyError("Formatting column is not in the dataframe")
233+
# Skip if the column is set to default
234+
if column_formatting_options[column] == COLUMN_FORMAT_OPTIONS.DEFAULT:
235+
continue
236+
# Get the column position
237+
column_position_numeric = df.columns.get_loc(column) + 1
238+
column_range_top = gspread.utils.rowcol_to_a1(1, column_position_numeric)
239+
column_range_bottom = gspread.utils.rowcol_to_a1(df.index.size + 1, column_position_numeric)
240+
column_range = f"{column_range_top}:{column_range_bottom}"
241+
column_worksheet_range = gspread_formatting.GridRange.from_a1_range(column_range, worksheet)
242+
# Get conditional formatting rules
243+
if column_formatting_options[column] == COLUMN_FORMAT_OPTIONS.PERCENT_COLORED:
244+
green_rule = gspread_formatting.ConditionalFormatRule(
245+
ranges=[column_worksheet_range],
246+
booleanRule=gspread_formatting.BooleanRule(
247+
condition=gspread_formatting.BooleanCondition('NUMBER_GREATER_THAN_EQ', ['0']),
248+
format=gspread_formatting.CellFormat(
249+
textFormat=gspread_formatting.TextFormat(foregroundColor=gspread_formatting.Color(0,1,0)))
250+
)
251+
)
252+
red_rule = gspread_formatting.ConditionalFormatRule(
253+
ranges=[column_worksheet_range],
254+
booleanRule=gspread_formatting.BooleanRule(
255+
condition=gspread_formatting.BooleanCondition('NUMBER_LESS_THAN_EQ', ['0']),
256+
format=gspread_formatting.CellFormat(
257+
textFormat=gspread_formatting.TextFormat(foregroundColor=gspread_formatting.Color(1,0,0)))
258+
)
259+
)
260+
# Apply conditional formatting rules
261+
conditional_formatting_rules = gspread_formatting.get_conditional_format_rules(worksheet)
262+
conditional_formatting_rules.append(green_rule)
263+
conditional_formatting_rules.append(red_rule)
264+
conditional_formatting_rules.save()
265+
if column_formatting_options[column] in (COLUMN_FORMAT_OPTIONS.PERCENT_COLORED, COLUMN_FORMAT_OPTIONS.PERCENT_UNCOLORED):
266+
# Apply percent format rule
267+
gspread_formatting.format_cell_range(
268+
worksheet,
269+
column_range,
270+
gspread_formatting.CellFormat(numberFormat=gspread_formatting.NumberFormat(type='PERCENT', pattern='0.0%'))
271+
)
272+
273+
# Apply base formatting options
214274
gspread_formatting.format_cell_range(
215275
worksheet,
216276
f"A1:{gspread.utils.rowcol_to_a1(1, len(df.columns))}",
217-
format_options
277+
base_format_options
218278
)
219279

220280
# Delete Sheet1 if it has been created by default
221281
if "Sheet1" in [i.title for i in sheet.worksheets()]:
222282
sheet.del_worksheet(sheet.worksheet("Sheet1"))
223283

224-
def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, options={}):
284+
def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, sheet_formatting_options={}, column_formatting_options={}):
225285
"""
226286
Fill a sheet with the contents of a dictionary of DataFrames.
227287
The keys of the dictionary are the names of the worksheets, and the values contain the data to be placed in the sheet.
@@ -230,8 +290,12 @@ def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, options={}):
230290
:param sheet: the gspread.Spreadsheet object
231291
:param df_dict: the dictionary of DataFrames to fill the worksheets with
232292
:param overlapBehavior: the behavior to take if any of the worksheets already exist
233-
:param options: the formatting options for the worksheets.
234-
Should be a dictionary with optional elements "bold_header", "center_header", "freeze_header", and "column_widths", optional
293+
:param sheet_formatting_options: the formatting options for the worksheets.
294+
Should be a 2 level dictionary with outer keys being names of worksheets and inner keys being some of
295+
"bold_header", "center_header", "freeze_header", and "column_widths", optional
296+
:param column_formatting_options: the column formatting options for the worksheets.
297+
Should be a 2 level dictionary with outer keys being names of worksheets and inner keys being column names.
298+
The inner keys should be an instance of COLUMN_FORMATTING_OPTIONS, optional
235299
"""
236300
if overlapBehavior == WORKSHEET_OVERRIDE_BEHAVIORS.EXIT:
237301
for worksheet_name in df_dict.keys():
@@ -241,5 +305,8 @@ def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, options={}):
241305
except gspread.exceptions.WorksheetNotFound:
242306
pass
243307
for worksheet_name, df in df_dict.items():
244-
fill_worksheet_with_df(sheet, df, worksheet_name, overlapBehavior, options=options)
245-
308+
fill_worksheet_with_df(
309+
sheet, df, worksheet_name, overlapBehavior,
310+
sheet_formatting_options=sheet_formatting_options.get(worksheet_name, DEFAULT_SHEET_FORMATTING_OPTIONS),
311+
column_formatting_options=column_formatting_options.get(worksheet_name, {})
312+
)

analytics/analytics_package/analytics/sheets_elements.py

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def get_rename_dict(dimensions):
3636
zip([dimension["id"] for dimension in dimensions], [dimension["alias"] for dimension in dimensions])
3737
)
3838

39-
def get_outbound_sheets_df(analytics_params):
39+
def get_outbound_links_df(analytics_params):
4040
"""
4141
Get a DF with outbound links from the Analytics API. Merges the builtin and custom events for outbound links.
4242
@@ -95,4 +95,39 @@ def get_outbound_sheets_df(analytics_params):
9595
"hostname": "Hostname",
9696
}
9797
)[["Page Path", "Hostname", "Outbound Link", "Total Clicks", "Total Users"]]
98-
return df_all_links.copy().reset_index(drop=True)
98+
99+
return df_all_links.copy().reset_index(drop=True)
100+
101+
def get_outbound_links_change(analytics_params, start_current, end_current, start_previous, end_previous):
102+
"""
103+
Get a DF with outbound links from the Analytics API and a comparison for the prior month
104+
:param analytics_params: the parameters for the Analytics API, including authentication and property ids
105+
:param start_current: the start date for the current month in the format "YYYY-MM-DD"
106+
:param end_current: the end date for the current month
107+
:param start_previous: the start date for the previous month
108+
:param end_previous: the end date for the previous month
109+
"""
110+
analytics_params_month_1 = {
111+
**analytics_params,
112+
"start_date": start_current,
113+
"end_date": end_current,
114+
}
115+
analytics_params_month_2 = {
116+
**analytics_params,
117+
"start_date": start_previous,
118+
"end_date": end_previous,
119+
}
120+
print(analytics_params_month_2)
121+
df_current = get_outbound_links_df(analytics_params_month_1).set_index(
122+
["Page Path", "Outbound Link", "Hostname"]
123+
)
124+
df_previous = get_outbound_links_df(analytics_params_month_2).set_index(
125+
["Page Path", "Outbound Link", "Hostname"]
126+
)
127+
combined_index = df_current.index.union(df_previous.index)
128+
df_current_reindexed = df_current.reindex(combined_index).fillna(0)
129+
df_previous_reindexed = df_previous.reindex(combined_index)
130+
df_current_reindexed["Total Clicks Percent Change"] = (df_current_reindexed["Total Clicks"] / df_previous_reindexed["Total Clicks"]) - 1
131+
df_current_reindexed["Total Users Percent Change"] = (df_current_reindexed["Total Users"] / df_previous_reindexed["Total Users"]) - 1
132+
return df_current_reindexed.sort_values(["Total Clicks", "Total Users"], ascending=False, kind="stable").reset_index()
133+

analytics/analytics_package/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
setup(
44
name="analytics",
5-
version="3.1.0",
5+
version="3.2.0",
66
packages=["analytics"],
77
install_requires=["matplotlib", "pandas", "numpy", "google-auth-oauthlib", "google-api-python-client", "gspread", "gspread-formatting"],
88
)

0 commit comments

Comments
 (0)