Skip to content

Commit

Permalink
feat: analytics package total views users graphs #4353 (#4360)
Browse files Browse the repository at this point in the history
* feat: basic functions to add charts to google sheets in analytics api (#4353)

* feat: added change over time sheets to analytics package (#4353)

* feat: added datetime handling to analytics sheets package (#4353)

* chore: added missing docstrings in analytics package (#4353)

* chore: bumped setup.py (#4353)
  • Loading branch information
jpaten authored Feb 3, 2025
1 parent 60a7a6e commit 8845dba
Show file tree
Hide file tree
Showing 5 changed files with 291 additions and 30 deletions.
7 changes: 6 additions & 1 deletion analytics/analytics_package/analytics/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@
{},
)

sheets_service_params = (
["https://www.googleapis.com/auth/spreadsheets"],
"sheets", "v4",
{}
)

next_port = None
default_service_system = None

Expand Down Expand Up @@ -291,7 +297,6 @@ def build_params(source, subs):


def results_to_df(results):

df = pd.DataFrame()
for result in results:
# Collect column nmes
Expand Down
9 changes: 7 additions & 2 deletions analytics/analytics_package/analytics/charts.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,8 +416,8 @@ def show_plot(df, title, fontsize=16, **other_params):
fig.suptitle(title, fontsize=fontsize)
plt.show()

def show_plot_over_time(titles, xlabels, metrics, dimensions="ga:date", format_table=True, df_filter=None, pre_plot_df_processor=None, **other_params):
titles, xlabels, metrics = strings_to_lists(titles, xlabels, metrics)
def get_df_over_time(xlabels, metrics, dimensions, df_filter=None, **other_params):
xlabels, metrics = strings_to_lists(xlabels, metrics)

df = get_data_df(metrics, dimensions, **other_params)

Expand All @@ -430,6 +430,11 @@ def show_plot_over_time(titles, xlabels, metrics, dimensions="ga:date", format_t
# Rename for display
df.rename(columns={name: xlabels[i] for i, name in enumerate(df.columns)}, inplace=True)

return df

def show_plot_over_time(titles, xlabels, metrics, dimensions="ga:date", format_table=True, df_filter=None, pre_plot_df_processor=None, **other_params):
df = get_df_over_time(xlabels, metrics, dimensions, df_filter=df_filter, **other_params)

if (not pre_plot_df_processor is None):
df = pre_plot_df_processor(df)

Expand Down
226 changes: 202 additions & 24 deletions analytics/analytics_package/analytics/sheets_api.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from dataclasses import dataclass
import typing
import gspread
import gspread_formatting
from enum import Enum
from googleapiclient.discovery import build
import numpy as np
import pandas as pd

FONT_SIZE_PTS = 10
PTS_PIXELS_RATIO = 4/3
Expand All @@ -16,20 +17,33 @@ class FILE_OVERRIDE_BEHAVIORS(Enum):
EXIT_IF_IN_SAME_PLACE = 2
EXIT_ANYWHERE = 3


class WORKSHEET_OVERRIDE_BEHAVIORS(Enum):
OVERRIDE = 1
EXIT = 2


class COLUMN_FORMAT_OPTIONS(Enum):
DEFAULT = 1
PERCENT_UNCOLORED = 2
PERCENT_COLORED = 3
YEAR_MONTH_DATE = 4


class CHART_TYPES(Enum):
LINE = "LINE"

DEFAULT_SHEET_FORMATTING_OPTIONS = {
"bold_header": True,
"center_header": True,
"freeze_header": True,
"column_widths": {"justify": True, "buffer_chars": DEFAULT_BUFFER_CHARS}
"column_widths": {"justify": True, "buffer_chars": DEFAULT_BUFFER_CHARS},
"extra_columns": 0,
"extra_columns_width": 50,
}

DEFAULT_GSPREAD_UPDATE_ARGS = {
"value_input_option": gspread.utils.ValueInputOption.user_entered,
}

def extract_credentials(authentication_response):
Expand All @@ -41,7 +55,7 @@ def authenticate_gspread(authentication_response):
gc = gspread.authorize(extract_credentials(authentication_response))
return gc

def authenticate_drive_api(authentication_response):
def authenticate_google_api(authentication_response):
"""Authenticates the Drive API using the response from api.authenticate"""
return authentication_response[0]

Expand Down Expand Up @@ -107,21 +121,21 @@ def search_for_folder_id(drive_api, folder_name, allow_trashed = False, allow_du
return [file["id"] for file in files_exact_match]


def create_sheet_in_folder(authentication_response, sheet_name, parent_folder_name=None, override_behavior=FILE_OVERRIDE_BEHAVIORS.EXIT_ANYWHERE):
def create_sheet_in_folder(drive_authentication_response, sheet_name, parent_folder_name=None, override_behavior=FILE_OVERRIDE_BEHAVIORS.EXIT_ANYWHERE):
"""
Create a new sheet in the project with the given name and parent folder.
Returns the new sheet.
:param authentication_response: the service parameters tuple
:param drive_authentication_response: the service parameters tuple
:param sheet_name: the name of the new sheet
:param parent_folder_name: the name of the parent folder for the new sheet
:param override_behavior: the behavior to take if the sheet already exists
:returns: the gspread.Spreadsheet object of the new sheet
:rtype: gspread.Spreadsheet
"""
# Build Drive API
gc = authenticate_gspread(authentication_response)
drive_api = authenticate_drive_api(authentication_response)
gc = authenticate_gspread(drive_authentication_response)
drive_api = authenticate_google_api(drive_authentication_response)
parent_folder_id = None if parent_folder_name is None else search_for_folder_id(drive_api, parent_folder_name)[0]

# Check if sheet already exists and handle based on input
Expand Down Expand Up @@ -163,8 +177,9 @@ def fill_worksheet_with_df(
df,
worksheet_name,
overlapBehavior,
sheet_formatting_options=DEFAULT_SHEET_FORMATTING_OPTIONS,
column_formatting_options={}
sheet_formatting_options={},
column_formatting_options={},
**gspread_update_args
):
"""
Fill a worksheet with the contents of a DataFrame.
Expand Down Expand Up @@ -193,38 +208,50 @@ def fill_worksheet_with_df(
title=worksheet_name, rows=df.shape[0], cols=df.shape[1]
)

sheet_formatting_options_filled = {**DEFAULT_SHEET_FORMATTING_OPTIONS, **sheet_formatting_options}

# Add extra blank columns to the right of the worksheet
df_to_insert = pd.concat(
[df] + [pd.Series(" ", index=df.index, name="")] * sheet_formatting_options_filled["extra_columns"],
axis=1
)
# Add data to worksheet
worksheet.update([df.columns.values.tolist()] + df.fillna("NA").values.tolist())
worksheet.update(
[df_to_insert.columns.values.tolist()] + df_to_insert.fillna("NA").values.tolist(),
**{**DEFAULT_GSPREAD_UPDATE_ARGS, **gspread_update_args}
)

# Format worksheet
# Justify Column Widths
if "column_widths" not in sheet_formatting_options or sheet_formatting_options["column_widths"]["justify"]:
if "column_widths" not in sheet_formatting_options_filled or sheet_formatting_options_filled["column_widths"]["justify"]:
text_widths = df.astype(str).columns.map(
lambda column_name: df[column_name].astype(str).str.len().max()
)
header_widths = df.columns.str.len()
buffer_chars = (
DEFAULT_BUFFER_CHARS
if ("column_widths" not in sheet_formatting_options or "buffer_chars" not in sheet_formatting_options["column_widths"])
else sheet_formatting_options["column_widths"]["buffer_chars"]
if ("column_widths" not in sheet_formatting_options_filled or "buffer_chars" not in sheet_formatting_options_filled["column_widths"])
else sheet_formatting_options_filled["column_widths"]["buffer_chars"]
)
column_widths = [
data_column_widths = [
round((max(len_tuple) + buffer_chars) * FONT_SIZE_PTS * 1/PTS_PIXELS_RATIO)
for len_tuple in zip(text_widths, header_widths)
]
extra_column_widths = [sheet_formatting_options_filled["extra_columns_width"]] * sheet_formatting_options_filled["extra_columns"]
combined_column_widths = data_column_widths + extra_column_widths
column_positions = [
gspread.utils.rowcol_to_a1(1, i + 1)[0] for i, _ in enumerate(column_widths)
gspread.utils.rowcol_to_a1(1, i + 1)[0] for i, _ in enumerate(combined_column_widths)
]
gspread_formatting.set_column_widths(worksheet, zip(column_positions, column_widths))
gspread_formatting.set_column_widths(worksheet, zip(column_positions, combined_column_widths))
# Freeze Header
if "freeze_header" not in sheet_formatting_options or sheet_formatting_options["freeze_header"]:
if "freeze_header" not in sheet_formatting_options_filled or sheet_formatting_options_filled["freeze_header"]:
gspread_formatting.set_frozen(worksheet, rows=1)
base_format_options = gspread_formatting.CellFormat()
# Bold Header
if "bold_header" not in sheet_formatting_options or sheet_formatting_options["bold_header"]:
if "bold_header" not in sheet_formatting_options_filled or sheet_formatting_options_filled["bold_header"]:
base_format_options += gspread_formatting.CellFormat(textFormat=gspread_formatting.TextFormat(bold=True))
# Center Header
if "center_header" not in sheet_formatting_options or sheet_formatting_options["center_header"]:
if "center_header" not in sheet_formatting_options_filled or sheet_formatting_options_filled["center_header"]:
base_format_options += gspread_formatting.CellFormat(horizontalAlignment="CENTER")
# Handle column specific formatting
for column in column_formatting_options:
Expand Down Expand Up @@ -269,6 +296,13 @@ def fill_worksheet_with_df(
column_range,
gspread_formatting.CellFormat(numberFormat=gspread_formatting.NumberFormat(type='PERCENT', pattern='0.0%'))
)
if column_formatting_options[column] == COLUMN_FORMAT_OPTIONS.YEAR_MONTH_DATE:
# Apply date format rule
gspread_formatting.format_cell_range(
worksheet,
column_range,
gspread_formatting.CellFormat(numberFormat=gspread_formatting.NumberFormat(type='DATE', pattern='yyyy-mm'))
)

# Apply base formatting options
gspread_formatting.format_cell_range(
Expand All @@ -281,7 +315,7 @@ def fill_worksheet_with_df(
if "Sheet1" in [i.title for i in sheet.worksheets()]:
sheet.del_worksheet(sheet.worksheet("Sheet1"))

def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, sheet_formatting_options={}, column_formatting_options={}):
def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, sheet_formatting_options={}, column_formatting_options={}, **gspread_update_args):
"""
Fill a sheet with the contents of a dictionary of DataFrames.
The keys of the dictionary are the names of the worksheets, and the values contain the data to be placed in the sheet.
Expand All @@ -307,6 +341,150 @@ def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, sheet_formatt
for worksheet_name, df in df_dict.items():
fill_worksheet_with_df(
sheet, df, worksheet_name, overlapBehavior,
sheet_formatting_options=sheet_formatting_options.get(worksheet_name, DEFAULT_SHEET_FORMATTING_OPTIONS),
column_formatting_options=column_formatting_options.get(worksheet_name, {})
)
sheet_formatting_options=sheet_formatting_options.get(worksheet_name, {}),
column_formatting_options=column_formatting_options.get(worksheet_name, {}),
**gspread_update_args
)

def update_sheet_raw(sheets_authentication_response, sheet, *updates):
"""
Directly call the Google Sheets api to update the specified sheet with the optional arguments.
"""
assert len(updates) > 0
sheets_api = authenticate_google_api(sheets_authentication_response)
sheet_id = sheet.id
body = {"requests": list(updates)}
response = (
sheets_api.spreadsheets()
.batchUpdate(spreadsheetId=sheet_id, body=body)
.execute()
)
return response

REQUIRED_CHART_ARGS = []

DEFAULT_CHART_ARGS = {
"title": "",
"x_axis_title": "",
"y_axis_title": "",
"invert_x_axis": False,
"chart_position": None, # None means it will be created in a new sheet
"chart_position_offset_x": 0,
"chart_position_offset_y": 0,
"chart_width": 600,
"chart_height": 371,
}

@dataclass
class WorksheetRange:
"""
A dataclass to represent a range of cells in a worksheet in the one-sided interval [top_left, bottom_right).
:param worksheet: the gspread.worksheet.Worksheet object
:param top_left: the top left cell of the range. This cell will be included in the range
:param bottom_right: the bottom right cell of the range. This cell will not be included in the range
"""
worksheet: gspread.worksheet.Worksheet
top_left: gspread.cell.Cell
bottom_right: gspread.cell.Cell

@property
def range_dict(self):
"""The range as a dictionary for the sources field in the Google Sheets api"""
return {
"sheetId": self.worksheet.id,
"startRowIndex": self.top_left.row - 1,
"endRowIndex": self.bottom_right.row - 1,
"startColumnIndex": self.top_left.col - 1,
"endColumnIndex": self.bottom_right.col - 1,
}

def _cell_to_grid_coordinate(cell, worksheet):
return {
"sheetId": worksheet.id,
"rowIndex": cell.row - 1,
"columnIndex": cell.col - 1,
}

def add_chart_to_sheet(sheets_authentication_response, sheet, worksheet, chart_type, domain, series, **chart_args):
"""
Add a chart to a specified workshet
:param sheets_authentication_response: the response from ga.authenticate. Must be for the sheets api v4
:param sheet: the gspread.Spreadsheet object
:param worksheet: the gspread.Worksheet object
:param chart_type: the type of chart to add
:param domain: the domain of the chart as a WorksheetRange. Must contain either one row or one column
:param series: the series of the chart as a WorksheetRange. Must contain either one row or one column
:param chart_args: other arguments to create the chart. See DEFAULT_CHART_ARGS
"""
complete_chart_args = {**DEFAULT_CHART_ARGS, **chart_args}
if complete_chart_args["chart_position"] is not None:
position_dict = {
"overlayPosition": {
"anchorCell": _cell_to_grid_coordinate(complete_chart_args["chart_position"], worksheet),
"offsetXPixels": complete_chart_args["chart_position_offset_x"],
"offsetYPixels": complete_chart_args["chart_position_offset_y"],
"widthPixels": complete_chart_args["chart_width"],
"heightPixels": complete_chart_args["chart_height"],
}
}
else:
position_dict = {"newSheet": True}
formatted_domains = [
{
"domain": {
"sourceRange": {
"sources": [
domain.range_dict
],
},
},
"reversed": complete_chart_args["invert_x_axis"],
},
]

formatted_series = [
{
"series": {
"sourceRange": {
"sources": [
series_source.range_dict
],
},
},
"targetAxis": "LEFT_AXIS",
}
for series_source in series
]
formatted_axis = []
if complete_chart_args["x_axis_title"]:
formatted_axis.append({
"title": complete_chart_args["x_axis_title"],
"position": "BOTTOM_AXIS",
})
if complete_chart_args["y_axis_title"]:
formatted_axis.append({
"title": complete_chart_args["y_axis_title"],
"position": "LEFT_AXIS",
})
request = {
"addChart": {
"chart": {
"spec": {
"title": complete_chart_args["title"],
#TODO: insert legend position
#TODO: insert axis positions
"basicChart": {
"axis": formatted_axis,
"chartType": chart_type.value,
"domains": formatted_domains,
"headerCount": 1, #TODO: not sure what this means
"series": formatted_series,
},
},
"position": position_dict
},
},
}

response = update_sheet_raw(sheets_authentication_response, sheet, request)
return response
Loading

0 comments on commit 8845dba

Please sign in to comment.