|
22 | 22 | seconds_to_datetime,
|
23 | 23 | datetime_to_seconds,
|
24 | 24 | )
|
| 25 | +from ads.opctl.operator.lowcode.forecast.utils import _label_encode_dataframe |
25 | 26 |
|
26 | 27 | AUTOMLX_N_ALGOS_TUNED = 4
|
27 | 28 | AUTOMLX_DEFAULT_SCORE_METRIC = "neg_sym_mean_abs_percent_error"
|
@@ -51,8 +52,13 @@ def set_kwargs(self):
|
51 | 52 | ] = self.spec.preprocessing or model_kwargs_cleaned.get("preprocessing", True)
|
52 | 53 | return model_kwargs_cleaned, time_budget
|
53 | 54 |
|
54 |
| - def preprocess(self, data, series_id=None): |
55 |
| - return data.set_index(self.spec.datetime_column.name) |
| 55 | + |
| 56 | + def preprocess(self, data, series_id=None): # TODO: re-use self.le for explanations |
| 57 | + _, df_encoded = _label_encode_dataframe( |
| 58 | + data, |
| 59 | + no_encode={self.spec.datetime_column.name, self.original_target_column}, |
| 60 | + ) |
| 61 | + return df_encoded.set_index(self.spec.datetime_column.name) |
56 | 62 |
|
57 | 63 | @runtime_dependency(
|
58 | 64 | module="automlx",
|
@@ -105,7 +111,7 @@ def _build_model(self) -> pd.DataFrame:
|
105 | 111 |
|
106 | 112 | logger.debug(f"Time Index Monotonic: {data_i.index.is_monotonic}")
|
107 | 113 |
|
108 |
| - if self.loaded_models is not None: |
| 114 | + if self.loaded_models is not None and s_id in self.loaded_models: |
109 | 115 | model = self.loaded_models[s_id]
|
110 | 116 | else:
|
111 | 117 | model = automlx.Pipeline(
|
@@ -195,82 +201,85 @@ def _generate_report(self):
|
195 | 201 | )
|
196 | 202 | selected_models = dict()
|
197 | 203 | models = self.models
|
198 |
| - for i, (s_id, df) in enumerate(self.full_data_dict.items()): |
199 |
| - selected_models[s_id] = { |
200 |
| - "series_id": s_id, |
201 |
| - "selected_model": models[s_id].selected_model_, |
202 |
| - "model_params": models[s_id].selected_model_params_, |
203 |
| - } |
204 |
| - selected_models_df = pd.DataFrame( |
205 |
| - selected_models.items(), columns=["series_id", "best_selected_model"] |
206 |
| - ) |
207 |
| - selected_df = selected_models_df["best_selected_model"].apply(pd.Series) |
208 |
| - selected_models_section = dp.Blocks( |
209 |
| - "### Best Selected Model", dp.DataTable(selected_df) |
210 |
| - ) |
| 204 | + all_sections = [] |
| 205 | + |
| 206 | + if len(self.models) > 0: |
| 207 | + for i, (s_id, m) in enumerate(models.items()): |
| 208 | + selected_models[s_id] = { |
| 209 | + "series_id": s_id, |
| 210 | + "selected_model": m.selected_model_, |
| 211 | + "model_params": m.selected_model_params_, |
| 212 | + } |
| 213 | + selected_models_df = pd.DataFrame( |
| 214 | + selected_models.items(), columns=["series_id", "best_selected_model"] |
| 215 | + ) |
| 216 | + selected_df = selected_models_df["best_selected_model"].apply(pd.Series) |
| 217 | + selected_models_section = dp.Blocks( |
| 218 | + "### Best Selected Model", dp.DataTable(selected_df) |
| 219 | + ) |
211 | 220 |
|
212 |
| - all_sections = [selected_models_text, selected_models_section] |
| 221 | + all_sections = [selected_models_text, selected_models_section] |
213 | 222 |
|
214 | 223 | if self.spec.generate_explanations:
|
215 |
| - # try: |
216 |
| - # If the key is present, call the "explain_model" method |
217 |
| - self.explain_model() |
218 |
| - |
219 |
| - # Create a markdown text block for the global explanation section |
220 |
| - global_explanation_text = dp.Text( |
221 |
| - f"## Global Explanation of Models \n " |
222 |
| - "The following tables provide the feature attribution for the global explainability." |
223 |
| - ) |
224 |
| - |
225 |
| - # Convert the global explanation data to a DataFrame |
226 |
| - global_explanation_df = pd.DataFrame(self.global_explanation) |
| 224 | + try: |
| 225 | + # If the key is present, call the "explain_model" method |
| 226 | + self.explain_model() |
227 | 227 |
|
228 |
| - self.formatted_global_explanation = ( |
229 |
| - global_explanation_df / global_explanation_df.sum(axis=0) * 100 |
230 |
| - ) |
231 |
| - self.formatted_global_explanation = ( |
232 |
| - self.formatted_global_explanation.rename( |
233 |
| - {self.spec.datetime_column.name: ForecastOutputColumns.DATE}, axis=1 |
| 228 | + # Create a markdown text block for the global explanation section |
| 229 | + global_explanation_text = dp.Text( |
| 230 | + f"## Global Explanation of Models \n " |
| 231 | + "The following tables provide the feature attribution for the global explainability." |
234 | 232 | )
|
235 |
| - ) |
236 | 233 |
|
237 |
| - # Create a markdown section for the global explainability |
238 |
| - global_explanation_section = dp.Blocks( |
239 |
| - "### Global Explainability ", |
240 |
| - dp.DataTable(self.formatted_global_explanation), |
241 |
| - ) |
| 234 | + # Convert the global explanation data to a DataFrame |
| 235 | + global_explanation_df = pd.DataFrame(self.global_explanation) |
242 | 236 |
|
243 |
| - aggregate_local_explanations = pd.DataFrame() |
244 |
| - for s_id, local_ex_df in self.local_explanation.items(): |
245 |
| - local_ex_df_copy = local_ex_df.copy() |
246 |
| - local_ex_df_copy["Series"] = s_id |
247 |
| - aggregate_local_explanations = pd.concat( |
248 |
| - [aggregate_local_explanations, local_ex_df_copy], axis=0 |
| 237 | + self.formatted_global_explanation = ( |
| 238 | + global_explanation_df / global_explanation_df.sum(axis=0) * 100 |
| 239 | + ) |
| 240 | + self.formatted_global_explanation = ( |
| 241 | + self.formatted_global_explanation.rename( |
| 242 | + {self.spec.datetime_column.name: ForecastOutputColumns.DATE}, axis=1 |
| 243 | + ) |
249 | 244 | )
|
250 |
| - self.formatted_local_explanation = aggregate_local_explanations |
251 | 245 |
|
252 |
| - local_explanation_text = dp.Text(f"## Local Explanation of Models \n ") |
253 |
| - blocks = [ |
254 |
| - dp.DataTable( |
255 |
| - local_ex_df.div(local_ex_df.abs().sum(axis=1), axis=0) * 100, |
256 |
| - label=s_id, |
| 246 | + # Create a markdown section for the global explainability |
| 247 | + global_explanation_section = dp.Blocks( |
| 248 | + "### Global Explainability ", |
| 249 | + dp.DataTable(self.formatted_global_explanation), |
257 | 250 | )
|
258 |
| - for s_id, local_ex_df in self.local_explanation.items() |
259 |
| - ] |
260 |
| - local_explanation_section = ( |
261 |
| - dp.Select(blocks=blocks) if len(blocks) > 1 else blocks[0] |
262 |
| - ) |
263 | 251 |
|
264 |
| - # Append the global explanation text and section to the "all_sections" list |
265 |
| - all_sections = all_sections + [ |
266 |
| - global_explanation_text, |
267 |
| - global_explanation_section, |
268 |
| - local_explanation_text, |
269 |
| - local_explanation_section, |
270 |
| - ] |
271 |
| - # except Exception as e: |
272 |
| - # logger.warn(f"Failed to generate Explanations with error: {e}.") |
273 |
| - # logger.debug(f"Full Traceback: {traceback.format_exc()}") |
| 252 | + aggregate_local_explanations = pd.DataFrame() |
| 253 | + for s_id, local_ex_df in self.local_explanation.items(): |
| 254 | + local_ex_df_copy = local_ex_df.copy() |
| 255 | + local_ex_df_copy["Series"] = s_id |
| 256 | + aggregate_local_explanations = pd.concat( |
| 257 | + [aggregate_local_explanations, local_ex_df_copy], axis=0 |
| 258 | + ) |
| 259 | + self.formatted_local_explanation = aggregate_local_explanations |
| 260 | + |
| 261 | + local_explanation_text = dp.Text(f"## Local Explanation of Models \n ") |
| 262 | + blocks = [ |
| 263 | + dp.DataTable( |
| 264 | + local_ex_df.div(local_ex_df.abs().sum(axis=1), axis=0) * 100, |
| 265 | + label=s_id, |
| 266 | + ) |
| 267 | + for s_id, local_ex_df in self.local_explanation.items() |
| 268 | + ] |
| 269 | + local_explanation_section = ( |
| 270 | + dp.Select(blocks=blocks) if len(blocks) > 1 else blocks[0] |
| 271 | + ) |
| 272 | + |
| 273 | + # Append the global explanation text and section to the "all_sections" list |
| 274 | + all_sections = all_sections + [ |
| 275 | + global_explanation_text, |
| 276 | + global_explanation_section, |
| 277 | + local_explanation_text, |
| 278 | + local_explanation_section, |
| 279 | + ] |
| 280 | + except Exception as e: |
| 281 | + logger.warn(f"Failed to generate Explanations with error: {e}.") |
| 282 | + logger.debug(f"Full Traceback: {traceback.format_exc()}") |
274 | 283 |
|
275 | 284 | model_description = dp.Text(
|
276 | 285 | "The AutoMLx model automatically preprocesses, selects and engineers "
|
|
0 commit comments