oracle
diff --git a/‎ads/dataset/label_encoder.py
Lines changed: 1 addition & 1 deletion b/‎ads/dataset/label_encoder.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py
Lines changed: 0 additions & 1 deletion b/‎ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎ads/opctl/operator/lowcode/anomaly/operator_config.py
Lines changed: 18 additions & 1 deletion b/‎ads/opctl/operator/lowcode/anomaly/operator_config.py
Lines changed: 18 additions & 1 deletion
diff --git a/‎ads/opctl/operator/lowcode/anomaly/schema.yaml
Lines changed: 16 additions & 4 deletions b/‎ads/opctl/operator/lowcode/anomaly/schema.yaml
Lines changed: 16 additions & 4 deletions
diff --git a/‎ads/opctl/operator/lowcode/common/transformations.py
Lines changed: 20 additions & 14 deletions b/‎ads/opctl/operator/lowcode/common/transformations.py
Lines changed: 20 additions & 14 deletions
diff --git a/‎ads/opctl/operator/lowcode/forecast/model/arima.py
Lines changed: 21 additions & 12 deletions b/‎ads/opctl/operator/lowcode/forecast/model/arima.py
Lines changed: 21 additions & 12 deletions
diff --git a/‎ads/opctl/operator/lowcode/forecast/model/automlx.py
Lines changed: 77 additions & 68 deletions b/‎ads/opctl/operator/lowcode/forecast/model/automlx.py
Lines changed: 77 additions & 68 deletions
@@ -52,7 +52,7 @@ def fit(self, X: "pandas.DataFrame"):
 
         """
         for column in X.columns:
-            if X[column].dtype.name in ["object", "category"]:
+            if X[column].dtype.name in ["object", "category", "bool"]:
                 X[column] = X[column].astype(str)
                 self.label_encoders[column] = LabelEncoder()
                 self.label_encoders[column].fit(X[column])
 
@@ -10,7 +10,6 @@
     merge_category_columns,
 )
 from ads.opctl.operator.lowcode.common.data import AbstractData
-from ads.opctl.operator.lowcode.common.data import AbstractData
 from ads.opctl.operator.lowcode.anomaly.utils import get_frequency_of_datetime
 from ads.opctl import logger
 import pandas as pd
 
@@ -36,6 +36,21 @@ class TestData(InputData):
     """Class representing operator specification test data details."""
 
 
+@dataclass(repr=True)
+class PreprocessingSteps(DataClassSerializable):
+    """Class representing preprocessing steps for operator."""
+
+    missing_value_imputation: bool = True
+    outlier_treatment: bool = False
+
+
+@dataclass(repr=True)
+class DataPreprocessor(DataClassSerializable):
+    """Class representing operator specification preprocessing details."""
+
+    enabled: bool = True
+    steps: PreprocessingSteps = field(default_factory=PreprocessingSteps)
+
 @dataclass(repr=True)
 class AnomalyOperatorSpec(DataClassSerializable):
     """Class representing operator specification."""
@@ -74,7 +89,9 @@ def __post_init__(self):
             self.generate_inliers if self.generate_inliers is not None else False
         )
         self.model_kwargs = self.model_kwargs or dict()
-
+        self.preprocessing = (
+            self.preprocessing if self.preprocessing is not None else DataPreprocessor(enabled=True)
+        )
 
 @dataclass(repr=True)
 class AnomalyOperatorConfig(OperatorConfig):
 
@@ -307,11 +307,23 @@ spec:
         description: "When provided, target_category_columns [list] indexes the data into multiple related datasets for anomaly detection"
 
     preprocessing:
-      type: boolean
+      type: dict
       required: false
-      default: true
-      meta:
-        description: "preprocessing and feature engineering can be disabled using this flag, Defaults to true"
+      schema:
+        enabled:
+          type: boolean
+          required: false
+          default: true
+          meta:
+            description: "preprocessing and feature engineering can be disabled using this flag, Defaults to true"
+        steps:
+          type: dict
+          required: false
+          schema:
+            missing_value_imputation:
+              type: boolean
+              required: false
+              default: true
 
     generate_report:
       type: boolean
 
@@ -58,20 +58,26 @@ def run(self, data):
         clean_df = self._format_datetime_col(clean_df)
         clean_df = self._set_multi_index(clean_df)
 
-        if self.name == "historical_data":
-            try:
-                clean_df = self._missing_value_imputation_hist(clean_df)
-            except Exception as e:
-                logger.debug(f"Missing value imputation failed with {e.args}")
-            if self.preprocessing:
-                try:
-                    clean_df = self._outlier_treatment(clean_df)
-                except Exception as e:
-                    logger.debug(f"Outlier Treatment failed with {e.args}")
-            else:
-                logger.debug("Skipping outlier treatment as preprocessing is disabled")
-        elif self.name == "additional_data":
-            clean_df = self._missing_value_imputation_add(clean_df)
+        if self.preprocessing and self.preprocessing.enabled:
+            if self.name == "historical_data":
+                if self.preprocessing.steps.missing_value_imputation:
+                    try:
+                        clean_df = self._missing_value_imputation_hist(clean_df)
+                    except Exception as e:
+                        logger.debug(f"Missing value imputation failed with {e.args}")
+                else:
+                    logger.info("Skipping missing value imputation because it is disabled")
+                if self.preprocessing.steps.outlier_treatment:
+                    try:
+                        clean_df = self._outlier_treatment(clean_df)
+                    except Exception as e:
+                        logger.debug(f"Outlier Treatment failed with {e.args}")
+                else:
+                    logger.info("Skipping outlier treatment because it is disabled")
+            elif self.name == "additional_data":
+                clean_df = self._missing_value_imputation_add(clean_df)
+        else:
+            logger.info("Skipping all preprocessing steps because preprocessing is disabled")
         return clean_df
 
     def _remove_trailing_whitespace(self, df):
 
@@ -29,6 +29,7 @@ def __init__(self, config: ForecastOperatorConfig, datasets: ForecastDatasets):
         self.local_explanation = {}
         self.formatted_global_explanation = None
         self.formatted_local_explanation = None
+        self.constant_cols = {}
 
     def set_kwargs(self):
         # Extract the Confidence Interval Width and convert to arima's equivalent - alpha
@@ -64,6 +65,10 @@ def _train_model(self, i, s_id, df, model_kwargs):
         try:
             target = self.original_target_column
             self.forecast_output.init_series_output(series_id=s_id, data_at_series=df)
+            # If trend is constant, remove constant columns
+            if 'trend' not in model_kwargs or model_kwargs['trend'] == 'c':
+                self.constant_cols[s_id] = df.columns[df.nunique() == 1]
+                df = df.drop(columns=self.constant_cols[s_id])
 
             # format the dataframe for this target. Dropping NA on target[df] will remove all future data
             data = self.preprocess(df, s_id)
@@ -74,7 +79,7 @@ def _train_model(self, i, s_id, df, model_kwargs):
             X_in = data_i.drop(target, axis=1) if len(data_i.columns) > 1 else None
             X_pred = self.get_horizon(data).drop(target, axis=1)
 
-            if self.loaded_models is not None:
+            if self.loaded_models is not None and s_id in self.loaded_models:
                 model = self.loaded_models[s_id]
             else:
                 # Build and fit model
@@ -143,17 +148,18 @@ def _build_model(self) -> pd.DataFrame:
     def _generate_report(self):
         """The method that needs to be implemented on the particular model level."""
         import datapane as dp
-
-        sec5_text = dp.Text(f"## ARIMA Model Parameters")
-        blocks = [
-            dp.HTML(
-                m.summary().as_html(),
-                label=s_id,
-            )
-            for i, (s_id, m) in enumerate(self.models.items())
-        ]
-        sec5 = dp.Select(blocks=blocks) if len(blocks) > 1 else blocks[0]
-        all_sections = [sec5_text, sec5]
+        all_sections = []
+        if len(self.models) > 0:
+            sec5_text = dp.Text(f"## ARIMA Model Parameters")
+            blocks = [
+                dp.HTML(
+                    m.summary().as_html(),
+                    label=s_id,
+                )
+                for i, (s_id, m) in enumerate(self.models.items())
+            ]
+            sec5 = dp.Select(blocks=blocks) if len(blocks) > 1 else blocks[0]
+            all_sections = [sec5_text, sec5]
 
         if self.spec.generate_explanations:
             try:
@@ -239,6 +245,9 @@ def _custom_predict(
             """
             data: ForecastDatasets.get_data_at_series(s_id)
             """
+            if series_id in self.constant_cols:
+                data = data.drop(columns=self.constant_cols[series_id])
+
             data = data.drop([target_col], axis=1)
             data[dt_column_name] = seconds_to_datetime(
                 data[dt_column_name], dt_format=self.spec.datetime_column.format
 
@@ -22,6 +22,7 @@
     seconds_to_datetime,
     datetime_to_seconds,
 )
+from ads.opctl.operator.lowcode.forecast.utils import _label_encode_dataframe
 
 AUTOMLX_N_ALGOS_TUNED = 4
 AUTOMLX_DEFAULT_SCORE_METRIC = "neg_sym_mean_abs_percent_error"
@@ -51,8 +52,13 @@ def set_kwargs(self):
         ] = self.spec.preprocessing or model_kwargs_cleaned.get("preprocessing", True)
         return model_kwargs_cleaned, time_budget
 
-    def preprocess(self, data, series_id=None):
-        return data.set_index(self.spec.datetime_column.name)
+
+    def preprocess(self, data, series_id=None):  # TODO: re-use self.le for explanations
+        _, df_encoded = _label_encode_dataframe(
+            data,
+            no_encode={self.spec.datetime_column.name, self.original_target_column},
+        )
+        return df_encoded.set_index(self.spec.datetime_column.name)
 
     @runtime_dependency(
         module="automlx",
@@ -105,7 +111,7 @@ def _build_model(self) -> pd.DataFrame:
 
                 logger.debug(f"Time Index Monotonic: {data_i.index.is_monotonic}")
 
-                if self.loaded_models is not None:
+                if self.loaded_models is not None and s_id in self.loaded_models:
                     model = self.loaded_models[s_id]
                 else:
                     model = automlx.Pipeline(
@@ -195,82 +201,85 @@ def _generate_report(self):
         )
         selected_models = dict()
         models = self.models
-        for i, (s_id, df) in enumerate(self.full_data_dict.items()):
-            selected_models[s_id] = {
-                "series_id": s_id,
-                "selected_model": models[s_id].selected_model_,
-                "model_params": models[s_id].selected_model_params_,
-            }
-        selected_models_df = pd.DataFrame(
-            selected_models.items(), columns=["series_id", "best_selected_model"]
-        )
-        selected_df = selected_models_df["best_selected_model"].apply(pd.Series)
-        selected_models_section = dp.Blocks(
-            "### Best Selected Model", dp.DataTable(selected_df)
-        )
+        all_sections = []
+
+        if len(self.models) > 0:
+            for i, (s_id, m) in enumerate(models.items()):
+                selected_models[s_id] = {
+                    "series_id": s_id,
+                    "selected_model": m.selected_model_,
+                    "model_params": m.selected_model_params_,
+                }
+            selected_models_df = pd.DataFrame(
+                selected_models.items(), columns=["series_id", "best_selected_model"]
+            )
+            selected_df = selected_models_df["best_selected_model"].apply(pd.Series)
+            selected_models_section = dp.Blocks(
+                "### Best Selected Model", dp.DataTable(selected_df)
+            )
 
-        all_sections = [selected_models_text, selected_models_section]
+            all_sections = [selected_models_text, selected_models_section]
 
         if self.spec.generate_explanations:
-            # try:
-            # If the key is present, call the "explain_model" method
-            self.explain_model()
-
-            # Create a markdown text block for the global explanation section
-            global_explanation_text = dp.Text(
-                f"## Global Explanation of Models \n "
-                "The following tables provide the feature attribution for the global explainability."
-            )
-
-            # Convert the global explanation data to a DataFrame
-            global_explanation_df = pd.DataFrame(self.global_explanation)
+            try:
+                # If the key is present, call the "explain_model" method
+                self.explain_model()
 
-            self.formatted_global_explanation = (
-                global_explanation_df / global_explanation_df.sum(axis=0) * 100
-            )
-            self.formatted_global_explanation = (
-                self.formatted_global_explanation.rename(
-                    {self.spec.datetime_column.name: ForecastOutputColumns.DATE}, axis=1
+                # Create a markdown text block for the global explanation section
+                global_explanation_text = dp.Text(
+                    f"## Global Explanation of Models \n "
+                    "The following tables provide the feature attribution for the global explainability."
                 )
-            )
 
-            # Create a markdown section for the global explainability
-            global_explanation_section = dp.Blocks(
-                "### Global Explainability ",
-                dp.DataTable(self.formatted_global_explanation),
-            )
+                # Convert the global explanation data to a DataFrame
+                global_explanation_df = pd.DataFrame(self.global_explanation)
 
-            aggregate_local_explanations = pd.DataFrame()
-            for s_id, local_ex_df in self.local_explanation.items():
-                local_ex_df_copy = local_ex_df.copy()
-                local_ex_df_copy["Series"] = s_id
-                aggregate_local_explanations = pd.concat(
-                    [aggregate_local_explanations, local_ex_df_copy], axis=0
+                self.formatted_global_explanation = (
+                        global_explanation_df / global_explanation_df.sum(axis=0) * 100
+                )
+                self.formatted_global_explanation = (
+                    self.formatted_global_explanation.rename(
+                        {self.spec.datetime_column.name: ForecastOutputColumns.DATE}, axis=1
+                    )
                 )
-            self.formatted_local_explanation = aggregate_local_explanations
 
-            local_explanation_text = dp.Text(f"## Local Explanation of Models \n ")
-            blocks = [
-                dp.DataTable(
-                    local_ex_df.div(local_ex_df.abs().sum(axis=1), axis=0) * 100,
-                    label=s_id,
+                # Create a markdown section for the global explainability
+                global_explanation_section = dp.Blocks(
+                    "### Global Explainability ",
+                    dp.DataTable(self.formatted_global_explanation),
                 )
-                for s_id, local_ex_df in self.local_explanation.items()
-            ]
-            local_explanation_section = (
-                dp.Select(blocks=blocks) if len(blocks) > 1 else blocks[0]
-            )
 
-            # Append the global explanation text and section to the "all_sections" list
-            all_sections = all_sections + [
-                global_explanation_text,
-                global_explanation_section,
-                local_explanation_text,
-                local_explanation_section,
-            ]
-            # except Exception as e:
-            #     logger.warn(f"Failed to generate Explanations with error: {e}.")
-            #     logger.debug(f"Full Traceback: {traceback.format_exc()}")
+                aggregate_local_explanations = pd.DataFrame()
+                for s_id, local_ex_df in self.local_explanation.items():
+                    local_ex_df_copy = local_ex_df.copy()
+                    local_ex_df_copy["Series"] = s_id
+                    aggregate_local_explanations = pd.concat(
+                        [aggregate_local_explanations, local_ex_df_copy], axis=0
+                    )
+                self.formatted_local_explanation = aggregate_local_explanations
+
+                local_explanation_text = dp.Text(f"## Local Explanation of Models \n ")
+                blocks = [
+                    dp.DataTable(
+                        local_ex_df.div(local_ex_df.abs().sum(axis=1), axis=0) * 100,
+                        label=s_id,
+                    )
+                    for s_id, local_ex_df in self.local_explanation.items()
+                ]
+                local_explanation_section = (
+                    dp.Select(blocks=blocks) if len(blocks) > 1 else blocks[0]
+                )
+
+                # Append the global explanation text and section to the "all_sections" list
+                all_sections = all_sections + [
+                    global_explanation_text,
+                    global_explanation_section,
+                    local_explanation_text,
+                    local_explanation_section,
+                ]
+            except Exception as e:
+                logger.warn(f"Failed to generate Explanations with error: {e}.")
+                logger.debug(f"Full Traceback: {traceback.format_exc()}")
 
         model_description = dp.Text(
             "The AutoMLx model automatically preprocesses, selects and engineers "
Original file line number	Diff line number	Diff line change
`@@ -10,7 +10,6 @@`
`10`	`10`	`merge_category_columns,`
`11`	`11`	`)`
`12`	`12`	`from ads.opctl.operator.lowcode.common.data import AbstractData`
`13`		`-from ads.opctl.operator.lowcode.common.data import AbstractData`
`14`	`13`	`from ads.opctl.operator.lowcode.anomaly.utils import get_frequency_of_datetime`
`15`	`14`	`from ads.opctl import logger`
`16`	`15`	`import pandas as pd`