Skip to content

Commit f6e7ebb

Browse files
authoredMay 7, 2024··
[COST-3617] - Fix GCP end of month crossover summary (#5080)
* [COST-3617] - Fix GCP end of month crossover summary
1 parent 2dd7915 commit f6e7ebb

File tree

6 files changed

+30
-15
lines changed

6 files changed

+30
-15
lines changed
 

‎koku/api/utils.py

+8
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,14 @@ def gcp_find_invoice_months_in_date_range(self, start, end):
448448
invoice_months.append(invoice_month)
449449
return invoice_months
450450

451+
def get_year_month_list_from_start_end(self, start, end):
452+
if isinstance(start, datetime.date):
453+
start = datetime.datetime(start.year, start.month, start.day, tzinfo=settings.UTC)
454+
if isinstance(end, datetime.date):
455+
end = datetime.datetime(end.year, end.month, end.day, tzinfo=settings.UTC)
456+
dates = self.list_months(start, end)
457+
return [{"year": date.strftime("%Y"), "month": date.strftime("%m")} for date in dates]
458+
451459

452460
def materialized_view_month_start(dh=DateHelper()):
453461
"""Datetime of midnight on the first of the month where materialized summary starts."""

‎koku/masu/database/gcp_report_db_accessor.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
from api.common import log_json
2323
from api.provider.models import Provider
24+
from api.utils import DateHelper
2425
from koku.database import SQLScriptAtomicExecutorMixin
2526
from masu.database import GCP_REPORT_TABLE_MAP
2627
from masu.database import OCP_REPORT_TABLE_MAP
@@ -107,6 +108,7 @@ def populate_line_item_daily_summary_table_trino(
107108
(None)
108109
109110
"""
111+
date_dicts = DateHelper().get_year_month_list_from_start_end(start_date, end_date)
110112
last_month_end = datetime.date.today().replace(day=1) - datetime.timedelta(days=1)
111113
if end_date == last_month_end:
112114

@@ -134,15 +136,14 @@ def populate_line_item_daily_summary_table_trino(
134136
"schema": self.schema,
135137
"table": TRINO_LINE_ITEM_TABLE,
136138
"source_uuid": source_uuid,
137-
"year": invoice_month_date.strftime("%Y"),
138-
"month": invoice_month_date.strftime("%m"),
139139
"markup": markup_value or 0,
140140
"bill_id": bill_id,
141141
}
142-
143-
self._execute_trino_raw_sql_query(
144-
sql, sql_params=sql_params, log_ref="reporting_gcpcostentrylineitem_daily_summary.sql"
145-
)
142+
for date_dict in date_dicts:
143+
sql_params = sql_params | {"year": date_dict["year"], "month": date_dict["month"]}
144+
self._execute_trino_raw_sql_query(
145+
sql, sql_params=sql_params, log_ref="reporting_gcpcostentrylineitem_daily_summary.sql"
146+
)
146147

147148
def populate_tags_summary_table(self, bill_ids, start_date, end_date):
148149
"""Populate the line item aggregated totals data table."""

‎koku/masu/database/trino_sql/reporting_gcpcostentrylineitem_daily_summary.sql

-1
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,6 @@ CROSS JOIN
6363
WHERE source = '{{source_uuid | sqlsafe}}'
6464
AND year = '{{year | sqlsafe}}'
6565
AND month = '{{month | sqlsafe}}'
66-
AND invoice_month = '{{year | sqlsafe}}{{month | sqlsafe}}'
6766
AND usage_start_time >= TIMESTAMP '{{start_date | sqlsafe}}'
6867
AND usage_start_time < date_add('day', 1, TIMESTAMP '{{end_date | sqlsafe}}')
6968
GROUP BY billing_account_id,

‎koku/masu/external/downloader/gcp/gcp_report_downloader.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -91,12 +91,14 @@ def create_daily_archives(
9191
data_frame = pd_read_csv(local_file_path)
9292
data_frame = add_label_columns(data_frame)
9393
# putting it in for loop handles crossover data, when we have distinct invoice_month
94+
unique_usage_days = pd.to_datetime(data_frame["usage_start_time"]).dt.date.unique()
95+
days = list({day.strftime("%Y-%m-%d") for day in unique_usage_days})
96+
date_range = {"start": min(days), "end": max(days)}
9497
for invoice_month in data_frame["invoice.month"].unique():
9598
invoice_filter = data_frame["invoice.month"] == invoice_month
9699
invoice_month_data = data_frame[invoice_filter]
97-
unique_usage_days = pd.to_datetime(invoice_month_data["usage_start_time"]).dt.date.unique()
98-
days = list({day.strftime("%Y-%m-%d") for day in unique_usage_days})
99-
date_range = {"start": min(days), "end": max(days), "invoice_month": str(invoice_month)}
100+
# We may be able to completely remove invoice month in the future
101+
date_range["invoice_month"] = str(invoice_month)
100102
partition_dates = invoice_month_data.partition_date.unique()
101103
for partition_date in partition_dates:
102104
partition_date_filter = invoice_month_data["partition_date"] == partition_date
@@ -129,8 +131,8 @@ def create_daily_archives(
129131
tracing_id, s3_csv_path, day_filepath, day_file, manifest_id, context
130132
)
131133
daily_file_names.append(day_filepath)
132-
except Exception:
133-
msg = f"unable to create daily archives from: {local_file_paths}"
134+
except Exception as e:
135+
msg = f"unable to create daily archives from: {local_file_paths}. reason: {e}"
134136
LOG.info(log_json(tracing_id, msg=msg, context=context))
135137
raise CreateDailyArchivesError(msg)
136138
return daily_file_names, date_range

‎koku/masu/external/downloader/gcp_local/gcp_local_report_downloader.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,14 @@ def create_daily_archives(tracing_id, account, provider_uuid, filename, filepath
5454
LOG.error(f"File {filepath} could not be parsed. Reason: {str(error)}")
5555
raise GCPReportDownloaderError(error)
5656
# putting it in for loop handles crossover data, when we have distinct invoice_month
57+
unique_usage_days = pd.to_datetime(data_frame["usage_start_time"]).dt.date.unique()
58+
days = list({day.strftime("%Y-%m-%d") for day in unique_usage_days})
59+
date_range = {"start": min(days), "end": max(days)}
5760
for invoice_month in data_frame["invoice.month"].unique():
5861
invoice_filter = data_frame["invoice.month"] == invoice_month
5962
invoice_month_data = data_frame[invoice_filter]
60-
unique_usage_days = pd.to_datetime(invoice_month_data["usage_start_time"]).dt.date.unique()
61-
days = list({day.strftime("%Y-%m-%d") for day in unique_usage_days})
62-
date_range = {"start": min(days), "end": max(days), "invoice_month": str(invoice_month)}
63+
# We may be able to completely remove invoice month in the future
64+
date_range["invoice_month"] = str(invoice_month)
6365
partition_dates = invoice_month_data.partition_date.unique()
6466
for partition_date in partition_dates:
6567
partition_date_filter = invoice_month_data["partition_date"] == partition_date

‎koku/masu/processor/parquet/parquet_report_processor.py

+3
Original file line numberDiff line numberDiff line change
@@ -476,6 +476,9 @@ def convert_to_parquet(self): # noqa: C901
476476
daily_data_frames.extend(daily_frame)
477477
if self.provider_type not in (Provider.PROVIDER_AZURE):
478478
self.create_daily_parquet(parquet_base_filename, daily_frame)
479+
if self.provider_type in [Provider.PROVIDER_GCP, Provider.PROVIDER_GCP_LOCAL]:
480+
# Sync partitions on each file to create partitions that cross month bondaries
481+
self.create_parquet_table(parquet_base_filename)
479482
if not success:
480483
msg = "failed to convert files to parquet"
481484
LOG.warning(

0 commit comments

Comments
 (0)
Please sign in to comment.