Skip to content

Commit 703b296

Browse files
authored
Bugfixes for output generation in case of series failure (#731)
2 parents 6d80d1d + 1138d2d commit 703b296

File tree

13 files changed

+4199
-383
lines changed

13 files changed

+4199
-383
lines changed

ads/dataset/label_encoder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def fit(self, X: "pandas.DataFrame"):
5252
5353
"""
5454
for column in X.columns:
55-
if X[column].dtype.name in ["object", "category"]:
55+
if X[column].dtype.name in ["object", "category", "bool"]:
5656
X[column] = X[column].astype(str)
5757
self.label_encoders[column] = LabelEncoder()
5858
self.label_encoders[column].fit(X[column])

ads/opctl/operator/lowcode/forecast/model/arima.py

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ def __init__(self, config: ForecastOperatorConfig, datasets: ForecastDatasets):
2929
self.local_explanation = {}
3030
self.formatted_global_explanation = None
3131
self.formatted_local_explanation = None
32+
self.constant_cols = {}
3233

3334
def set_kwargs(self):
3435
# Extract the Confidence Interval Width and convert to arima's equivalent - alpha
@@ -64,6 +65,10 @@ def _train_model(self, i, s_id, df, model_kwargs):
6465
try:
6566
target = self.original_target_column
6667
self.forecast_output.init_series_output(series_id=s_id, data_at_series=df)
68+
# If trend is constant, remove constant columns
69+
if 'trend' not in model_kwargs or model_kwargs['trend'] == 'c':
70+
self.constant_cols[s_id] = df.columns[df.nunique() == 1]
71+
df = df.drop(columns=self.constant_cols[s_id])
6772

6873
# format the dataframe for this target. Dropping NA on target[df] will remove all future data
6974
data = self.preprocess(df, s_id)
@@ -74,7 +79,7 @@ def _train_model(self, i, s_id, df, model_kwargs):
7479
X_in = data_i.drop(target, axis=1) if len(data_i.columns) > 1 else None
7580
X_pred = self.get_horizon(data).drop(target, axis=1)
7681

77-
if self.loaded_models is not None:
82+
if self.loaded_models is not None and s_id in self.loaded_models:
7883
model = self.loaded_models[s_id]
7984
else:
8085
# Build and fit model
@@ -143,17 +148,18 @@ def _build_model(self) -> pd.DataFrame:
143148
def _generate_report(self):
144149
"""The method that needs to be implemented on the particular model level."""
145150
import datapane as dp
146-
147-
sec5_text = dp.Text(f"## ARIMA Model Parameters")
148-
blocks = [
149-
dp.HTML(
150-
m.summary().as_html(),
151-
label=s_id,
152-
)
153-
for i, (s_id, m) in enumerate(self.models.items())
154-
]
155-
sec5 = dp.Select(blocks=blocks) if len(blocks) > 1 else blocks[0]
156-
all_sections = [sec5_text, sec5]
151+
all_sections = []
152+
if len(self.models) > 0:
153+
sec5_text = dp.Text(f"## ARIMA Model Parameters")
154+
blocks = [
155+
dp.HTML(
156+
m.summary().as_html(),
157+
label=s_id,
158+
)
159+
for i, (s_id, m) in enumerate(self.models.items())
160+
]
161+
sec5 = dp.Select(blocks=blocks) if len(blocks) > 1 else blocks[0]
162+
all_sections = [sec5_text, sec5]
157163

158164
if self.spec.generate_explanations:
159165
try:
@@ -239,6 +245,9 @@ def _custom_predict(
239245
"""
240246
data: ForecastDatasets.get_data_at_series(s_id)
241247
"""
248+
if series_id in self.constant_cols:
249+
data = data.drop(columns=self.constant_cols[series_id])
250+
242251
data = data.drop([target_col], axis=1)
243252
data[dt_column_name] = seconds_to_datetime(
244253
data[dt_column_name], dt_format=self.spec.datetime_column.format

ads/opctl/operator/lowcode/forecast/model/automlx.py

Lines changed: 77 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
seconds_to_datetime,
2323
datetime_to_seconds,
2424
)
25+
from ads.opctl.operator.lowcode.forecast.utils import _label_encode_dataframe
2526

2627
AUTOMLX_N_ALGOS_TUNED = 4
2728
AUTOMLX_DEFAULT_SCORE_METRIC = "neg_sym_mean_abs_percent_error"
@@ -51,8 +52,13 @@ def set_kwargs(self):
5152
] = self.spec.preprocessing or model_kwargs_cleaned.get("preprocessing", True)
5253
return model_kwargs_cleaned, time_budget
5354

54-
def preprocess(self, data, series_id=None):
55-
return data.set_index(self.spec.datetime_column.name)
55+
56+
def preprocess(self, data, series_id=None): # TODO: re-use self.le for explanations
57+
_, df_encoded = _label_encode_dataframe(
58+
data,
59+
no_encode={self.spec.datetime_column.name, self.original_target_column},
60+
)
61+
return df_encoded.set_index(self.spec.datetime_column.name)
5662

5763
@runtime_dependency(
5864
module="automlx",
@@ -105,7 +111,7 @@ def _build_model(self) -> pd.DataFrame:
105111

106112
logger.debug(f"Time Index Monotonic: {data_i.index.is_monotonic}")
107113

108-
if self.loaded_models is not None:
114+
if self.loaded_models is not None and s_id in self.loaded_models:
109115
model = self.loaded_models[s_id]
110116
else:
111117
model = automlx.Pipeline(
@@ -195,82 +201,85 @@ def _generate_report(self):
195201
)
196202
selected_models = dict()
197203
models = self.models
198-
for i, (s_id, df) in enumerate(self.full_data_dict.items()):
199-
selected_models[s_id] = {
200-
"series_id": s_id,
201-
"selected_model": models[s_id].selected_model_,
202-
"model_params": models[s_id].selected_model_params_,
203-
}
204-
selected_models_df = pd.DataFrame(
205-
selected_models.items(), columns=["series_id", "best_selected_model"]
206-
)
207-
selected_df = selected_models_df["best_selected_model"].apply(pd.Series)
208-
selected_models_section = dp.Blocks(
209-
"### Best Selected Model", dp.DataTable(selected_df)
210-
)
204+
all_sections = []
205+
206+
if len(self.models) > 0:
207+
for i, (s_id, m) in enumerate(models.items()):
208+
selected_models[s_id] = {
209+
"series_id": s_id,
210+
"selected_model": m.selected_model_,
211+
"model_params": m.selected_model_params_,
212+
}
213+
selected_models_df = pd.DataFrame(
214+
selected_models.items(), columns=["series_id", "best_selected_model"]
215+
)
216+
selected_df = selected_models_df["best_selected_model"].apply(pd.Series)
217+
selected_models_section = dp.Blocks(
218+
"### Best Selected Model", dp.DataTable(selected_df)
219+
)
211220

212-
all_sections = [selected_models_text, selected_models_section]
221+
all_sections = [selected_models_text, selected_models_section]
213222

214223
if self.spec.generate_explanations:
215-
# try:
216-
# If the key is present, call the "explain_model" method
217-
self.explain_model()
218-
219-
# Create a markdown text block for the global explanation section
220-
global_explanation_text = dp.Text(
221-
f"## Global Explanation of Models \n "
222-
"The following tables provide the feature attribution for the global explainability."
223-
)
224-
225-
# Convert the global explanation data to a DataFrame
226-
global_explanation_df = pd.DataFrame(self.global_explanation)
224+
try:
225+
# If the key is present, call the "explain_model" method
226+
self.explain_model()
227227

228-
self.formatted_global_explanation = (
229-
global_explanation_df / global_explanation_df.sum(axis=0) * 100
230-
)
231-
self.formatted_global_explanation = (
232-
self.formatted_global_explanation.rename(
233-
{self.spec.datetime_column.name: ForecastOutputColumns.DATE}, axis=1
228+
# Create a markdown text block for the global explanation section
229+
global_explanation_text = dp.Text(
230+
f"## Global Explanation of Models \n "
231+
"The following tables provide the feature attribution for the global explainability."
234232
)
235-
)
236233

237-
# Create a markdown section for the global explainability
238-
global_explanation_section = dp.Blocks(
239-
"### Global Explainability ",
240-
dp.DataTable(self.formatted_global_explanation),
241-
)
234+
# Convert the global explanation data to a DataFrame
235+
global_explanation_df = pd.DataFrame(self.global_explanation)
242236

243-
aggregate_local_explanations = pd.DataFrame()
244-
for s_id, local_ex_df in self.local_explanation.items():
245-
local_ex_df_copy = local_ex_df.copy()
246-
local_ex_df_copy["Series"] = s_id
247-
aggregate_local_explanations = pd.concat(
248-
[aggregate_local_explanations, local_ex_df_copy], axis=0
237+
self.formatted_global_explanation = (
238+
global_explanation_df / global_explanation_df.sum(axis=0) * 100
239+
)
240+
self.formatted_global_explanation = (
241+
self.formatted_global_explanation.rename(
242+
{self.spec.datetime_column.name: ForecastOutputColumns.DATE}, axis=1
243+
)
249244
)
250-
self.formatted_local_explanation = aggregate_local_explanations
251245

252-
local_explanation_text = dp.Text(f"## Local Explanation of Models \n ")
253-
blocks = [
254-
dp.DataTable(
255-
local_ex_df.div(local_ex_df.abs().sum(axis=1), axis=0) * 100,
256-
label=s_id,
246+
# Create a markdown section for the global explainability
247+
global_explanation_section = dp.Blocks(
248+
"### Global Explainability ",
249+
dp.DataTable(self.formatted_global_explanation),
257250
)
258-
for s_id, local_ex_df in self.local_explanation.items()
259-
]
260-
local_explanation_section = (
261-
dp.Select(blocks=blocks) if len(blocks) > 1 else blocks[0]
262-
)
263251

264-
# Append the global explanation text and section to the "all_sections" list
265-
all_sections = all_sections + [
266-
global_explanation_text,
267-
global_explanation_section,
268-
local_explanation_text,
269-
local_explanation_section,
270-
]
271-
# except Exception as e:
272-
# logger.warn(f"Failed to generate Explanations with error: {e}.")
273-
# logger.debug(f"Full Traceback: {traceback.format_exc()}")
252+
aggregate_local_explanations = pd.DataFrame()
253+
for s_id, local_ex_df in self.local_explanation.items():
254+
local_ex_df_copy = local_ex_df.copy()
255+
local_ex_df_copy["Series"] = s_id
256+
aggregate_local_explanations = pd.concat(
257+
[aggregate_local_explanations, local_ex_df_copy], axis=0
258+
)
259+
self.formatted_local_explanation = aggregate_local_explanations
260+
261+
local_explanation_text = dp.Text(f"## Local Explanation of Models \n ")
262+
blocks = [
263+
dp.DataTable(
264+
local_ex_df.div(local_ex_df.abs().sum(axis=1), axis=0) * 100,
265+
label=s_id,
266+
)
267+
for s_id, local_ex_df in self.local_explanation.items()
268+
]
269+
local_explanation_section = (
270+
dp.Select(blocks=blocks) if len(blocks) > 1 else blocks[0]
271+
)
272+
273+
# Append the global explanation text and section to the "all_sections" list
274+
all_sections = all_sections + [
275+
global_explanation_text,
276+
global_explanation_section,
277+
local_explanation_text,
278+
local_explanation_section,
279+
]
280+
except Exception as e:
281+
logger.warn(f"Failed to generate Explanations with error: {e}.")
282+
logger.debug(f"Full Traceback: {traceback.format_exc()}")
274283

275284
model_description = dp.Text(
276285
"The AutoMLx model automatically preprocesses, selects and engineers "

0 commit comments

Comments
 (0)