@@ -91,12 +91,14 @@ def create_daily_archives(
91
91
data_frame = pd_read_csv (local_file_path )
92
92
data_frame = add_label_columns (data_frame )
93
93
# putting it in for loop handles crossover data, when we have distinct invoice_month
94
+ unique_usage_days = pd .to_datetime (data_frame ["usage_start_time" ]).dt .date .unique ()
95
+ days = list ({day .strftime ("%Y-%m-%d" ) for day in unique_usage_days })
96
+ date_range = {"start" : min (days ), "end" : max (days )}
94
97
for invoice_month in data_frame ["invoice.month" ].unique ():
95
98
invoice_filter = data_frame ["invoice.month" ] == invoice_month
96
99
invoice_month_data = data_frame [invoice_filter ]
97
- unique_usage_days = pd .to_datetime (invoice_month_data ["usage_start_time" ]).dt .date .unique ()
98
- days = list ({day .strftime ("%Y-%m-%d" ) for day in unique_usage_days })
99
- date_range = {"start" : min (days ), "end" : max (days ), "invoice_month" : str (invoice_month )}
100
+ # We may be able to completely remove invoice month in the future
101
+ date_range ["invoice_month" ] = str (invoice_month )
100
102
partition_dates = invoice_month_data .partition_date .unique ()
101
103
for partition_date in partition_dates :
102
104
partition_date_filter = invoice_month_data ["partition_date" ] == partition_date
@@ -129,8 +131,8 @@ def create_daily_archives(
129
131
tracing_id , s3_csv_path , day_filepath , day_file , manifest_id , context
130
132
)
131
133
daily_file_names .append (day_filepath )
132
- except Exception :
133
- msg = f"unable to create daily archives from: { local_file_paths } "
134
+ except Exception as e :
135
+ msg = f"unable to create daily archives from: { local_file_paths } . reason: { e } "
134
136
LOG .info (log_json (tracing_id , msg = msg , context = context ))
135
137
raise CreateDailyArchivesError (msg )
136
138
return daily_file_names , date_range
0 commit comments