oracle
diff --git a/‎ads/dataset/label_encoder.py
Lines changed: 1 addition & 1 deletion b/‎ads/dataset/label_encoder.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py
Lines changed: 8 additions & 15 deletions b/‎ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py
Lines changed: 8 additions & 15 deletions
diff --git a/‎ads/opctl/operator/lowcode/anomaly/model/base_model.py
Lines changed: 2 additions & 2 deletions b/‎ads/opctl/operator/lowcode/anomaly/model/base_model.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎ads/opctl/operator/lowcode/anomaly/operator_config.py
Lines changed: 18 additions & 1 deletion b/‎ads/opctl/operator/lowcode/anomaly/operator_config.py
Lines changed: 18 additions & 1 deletion
diff --git a/‎ads/opctl/operator/lowcode/anomaly/schema.yaml
Lines changed: 16 additions & 4 deletions b/‎ads/opctl/operator/lowcode/anomaly/schema.yaml
Lines changed: 16 additions & 4 deletions
diff --git a/‎ads/opctl/operator/lowcode/common/data.py
Lines changed: 16 additions & 2 deletions b/‎ads/opctl/operator/lowcode/common/data.py
Lines changed: 16 additions & 2 deletions
diff --git a/‎ads/opctl/operator/lowcode/common/transformations.py
Lines changed: 48 additions & 14 deletions b/‎ads/opctl/operator/lowcode/common/transformations.py
Lines changed: 48 additions & 14 deletions
diff --git a/‎ads/opctl/operator/lowcode/forecast/model/arima.py
Lines changed: 21 additions & 12 deletions b/‎ads/opctl/operator/lowcode/forecast/model/arima.py
Lines changed: 21 additions & 12 deletions
@@ -52,7 +52,7 @@ def fit(self, X: "pandas.DataFrame"):
 
         """
         for column in X.columns:
-            if X[column].dtype.name in ["object", "category"]:
+            if X[column].dtype.name in ["object", "category", "bool"]:
                 X[column] = X[column].astype(str)
                 self.label_encoders[column] = LabelEncoder()
                 self.label_encoders[column].fit(X[column])
 
@@ -10,7 +10,6 @@
     merge_category_columns,
 )
 from ads.opctl.operator.lowcode.common.data import AbstractData
-from ads.opctl.operator.lowcode.common.data import AbstractData
 from ads.opctl.operator.lowcode.anomaly.utils import get_frequency_of_datetime
 from ads.opctl import logger
 import pandas as pd
@@ -56,6 +55,10 @@ def __init__(self, spec: AnomalyOperatorSpec):
             self.X_valid_dict = self.valid_data.X_valid_dict
             self.y_valid_dict = self.valid_data.y_valid_dict
 
+    # Returns raw data based on the series_id i.e; the merged target_category_column value
+    def get_raw_data_by_cat(self, category):
+        return self._data.get_raw_data_by_cat(category)
+
 
 class AnomalyOutput:
     def __init__(self, date_column):
@@ -94,38 +97,28 @@ def get_outliers_by_cat(self, category: str, data: pd.DataFrame):
             outliers = pd.merge(outliers, scores, on=self.date_column, how="inner")
         return outliers
 
-    def get_inliers(self, data):
+    def get_inliers(self, datasets):
         inliers = pd.DataFrame()
 
         for category in self.list_categories():
             inliers = pd.concat(
                 [
                     inliers,
-                    self.get_inliers_by_cat(
-                        category,
-                        data[data[OutputColumns.Series] == category]
-                        .reset_index(drop=True)
-                        .drop(OutputColumns.Series, axis=1),
-                    ),
+                    self.get_inliers_by_cat(category, datasets.get_raw_data_by_cat(category)),
                 ],
                 axis=0,
                 ignore_index=True,
             )
         return inliers
 
-    def get_outliers(self, data):
+    def get_outliers(self, datasets):
         outliers = pd.DataFrame()
 
         for category in self.list_categories():
             outliers = pd.concat(
                 [
                     outliers,
-                    self.get_outliers_by_cat(
-                        category,
-                        data[data[OutputColumns.Series] == category]
-                        .reset_index(drop=True)
-                        .drop(OutputColumns.Series, axis=1),
-                    ),
+                    self.get_outliers_by_cat(category, datasets.get_raw_data_by_cat(category)),
                 ],
                 axis=0,
                 ignore_index=True,
 
@@ -272,15 +272,15 @@ def _save_report(
                     f2.write(f1.read())
 
         if self.spec.generate_inliers:
-            inliers = anomaly_output.get_inliers(self.datasets.data)
+            inliers = anomaly_output.get_inliers(self.datasets)
             write_data(
                 data=inliers,
                 filename=os.path.join(unique_output_dir, self.spec.inliers_filename),
                 format="csv",
                 storage_options=storage_options,
             )
 
-        outliers = anomaly_output.get_outliers(self.datasets.data)
+        outliers = anomaly_output.get_outliers(self.datasets)
         write_data(
             data=outliers,
             filename=os.path.join(unique_output_dir, self.spec.outliers_filename),
 
@@ -36,6 +36,21 @@ class TestData(InputData):
     """Class representing operator specification test data details."""
 
 
+@dataclass(repr=True)
+class PreprocessingSteps(DataClassSerializable):
+    """Class representing preprocessing steps for operator."""
+
+    missing_value_imputation: bool = True
+    outlier_treatment: bool = False
+
+
+@dataclass(repr=True)
+class DataPreprocessor(DataClassSerializable):
+    """Class representing operator specification preprocessing details."""
+
+    enabled: bool = True
+    steps: PreprocessingSteps = field(default_factory=PreprocessingSteps)
+
 @dataclass(repr=True)
 class AnomalyOperatorSpec(DataClassSerializable):
     """Class representing operator specification."""
@@ -74,7 +89,9 @@ def __post_init__(self):
             self.generate_inliers if self.generate_inliers is not None else False
         )
         self.model_kwargs = self.model_kwargs or dict()
-
+        self.preprocessing = (
+            self.preprocessing if self.preprocessing is not None else DataPreprocessor(enabled=True)
+        )
 
 @dataclass(repr=True)
 class AnomalyOperatorConfig(OperatorConfig):
 
@@ -307,11 +307,23 @@ spec:
         description: "When provided, target_category_columns [list] indexes the data into multiple related datasets for anomaly detection"
 
     preprocessing:
-      type: boolean
+      type: dict
       required: false
-      default: true
-      meta:
-        description: "preprocessing and feature engineering can be disabled using this flag, Defaults to true"
+      schema:
+        enabled:
+          type: boolean
+          required: false
+          default: true
+          meta:
+            description: "preprocessing and feature engineering can be disabled using this flag, Defaults to true"
+        steps:
+          type: dict
+          required: false
+          schema:
+            missing_value_imputation:
+              type: boolean
+              required: false
+              default: true
 
     generate_report:
       type: boolean
 
@@ -16,6 +16,7 @@
     DataMismatchError,
 )
 from abc import ABC
+import pandas as pd
 
 
 class AbstractData(ABC):
@@ -26,6 +27,19 @@ def __init__(self, spec: dict, name="input_data"):
         self.name = name
         self.load_transform_ingest_data(spec)
 
+    def get_raw_data_by_cat(self, category):
+        mapping = self._data_transformer.get_target_category_columns_map()
+        # For given category, mapping gives the target_category_columns and it's values.
+        # condition filters raw_data based on the values of target_category_columns for the given category
+        condition = pd.Series(True, index=self.raw_data.index)
+        if category in mapping:
+            for col, val in mapping[category].items():
+                condition &= (self.raw_data[col] == val)
+        data_by_cat = self.raw_data[condition].reset_index(drop=True)
+        data_by_cat = self._data_transformer._format_datetime_col(data_by_cat)
+        return data_by_cat
+
+
     def get_dict_by_series(self):
         if not self._data_dict:
             for s_id in self.list_series_ids():
@@ -73,8 +87,8 @@ def _transform_data(self, spec, raw_data, **kwargs):
         return data
 
     def load_transform_ingest_data(self, spec):
-        raw_data = self._load_data(getattr(spec, self.name))
-        self.data = self._transform_data(spec, raw_data)
+        self.raw_data = self._load_data(getattr(spec, self.name))
+        self.data = self._transform_data(spec, self.raw_data)
         self._ingest_data(spec)
 
     def _ingest_data(self, spec):
 
@@ -58,33 +58,45 @@ def run(self, data):
         clean_df = self._format_datetime_col(clean_df)
         clean_df = self._set_multi_index(clean_df)
 
-        if self.name == "historical_data":
-            try:
-                clean_df = self._missing_value_imputation_hist(clean_df)
-            except Exception as e:
-                logger.debug(f"Missing value imputation failed with {e.args}")
-            if self.preprocessing:
-                try:
-                    clean_df = self._outlier_treatment(clean_df)
-                except Exception as e:
-                    logger.debug(f"Outlier Treatment failed with {e.args}")
-            else:
-                logger.debug("Skipping outlier treatment as preprocessing is disabled")
-        elif self.name == "additional_data":
-            clean_df = self._missing_value_imputation_add(clean_df)
+        if self.preprocessing and self.preprocessing.enabled:
+            if self.name == "historical_data":
+                if self.preprocessing.steps.missing_value_imputation:
+                    try:
+                        clean_df = self._missing_value_imputation_hist(clean_df)
+                    except Exception as e:
+                        logger.debug(f"Missing value imputation failed with {e.args}")
+                else:
+                    logger.info("Skipping missing value imputation because it is disabled")
+                if self.preprocessing.steps.outlier_treatment:
+                    try:
+                        clean_df = self._outlier_treatment(clean_df)
+                    except Exception as e:
+                        logger.debug(f"Outlier Treatment failed with {e.args}")
+                else:
+                    logger.info("Skipping outlier treatment because it is disabled")
+            elif self.name == "additional_data":
+                clean_df = self._missing_value_imputation_add(clean_df)
+        else:
+            logger.info("Skipping all preprocessing steps because preprocessing is disabled")
         return clean_df
 
     def _remove_trailing_whitespace(self, df):
         return df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)
 
     def _set_series_id_column(self, df):
+        self._target_category_columns_map = dict()
         if not self.target_category_columns:
             df[DataColumns.Series] = "Series 1"
             self.has_artificial_series = True
         else:
             df[DataColumns.Series] = merge_category_columns(
                 df, self.target_category_columns
             )
+            merged_values = df[DataColumns.Series].unique().tolist()
+            if self.target_category_columns:
+                for value in merged_values:
+                    self._target_category_columns_map[value] = df[df[DataColumns.Series] == value][self.target_category_columns].drop_duplicates().iloc[0].to_dict()
+
             df = df.drop(self.target_category_columns, axis=1)
         return df
 
@@ -189,3 +201,25 @@ def _check_historical_dataset(self, df):
             raise DataMismatchError(
                 f"Expected {self.name} to have columns: {expected_names}, but instead found column names: {df.columns}. Is the {self.name} path correct?"
             )
+
+    """
+        Map between merged target category column values and target category column and its value
+        If target category columns are PPG_Code, Class, Num
+        Merged target category column values are Product Category 1__A__1, Product Category 2__A__2
+        Then target_category_columns_map would be
+        {
+            "Product Category 1__A__1": {
+                "PPG_Code": "Product Category 1",
+                "Class": "A",
+                "Num": 1
+            },
+             "Product Category 2__A__2": {
+                "PPG_Code": "Product Category 2",
+                "Class": "A",
+                "Num": 2
+            },
+            
+        }
+    """
+    def get_target_category_columns_map(self):
+        return self._target_category_columns_map
@@ -29,6 +29,7 @@ def __init__(self, config: ForecastOperatorConfig, datasets: ForecastDatasets):
         self.local_explanation = {}
         self.formatted_global_explanation = None
         self.formatted_local_explanation = None
+        self.constant_cols = {}
 
     def set_kwargs(self):
         # Extract the Confidence Interval Width and convert to arima's equivalent - alpha
@@ -64,6 +65,10 @@ def _train_model(self, i, s_id, df, model_kwargs):
         try:
             target = self.original_target_column
             self.forecast_output.init_series_output(series_id=s_id, data_at_series=df)
+            # If trend is constant, remove constant columns
+            if 'trend' not in model_kwargs or model_kwargs['trend'] == 'c':
+                self.constant_cols[s_id] = df.columns[df.nunique() == 1]
+                df = df.drop(columns=self.constant_cols[s_id])
 
             # format the dataframe for this target. Dropping NA on target[df] will remove all future data
             data = self.preprocess(df, s_id)
@@ -74,7 +79,7 @@ def _train_model(self, i, s_id, df, model_kwargs):
             X_in = data_i.drop(target, axis=1) if len(data_i.columns) > 1 else None
             X_pred = self.get_horizon(data).drop(target, axis=1)
 
-            if self.loaded_models is not None:
+            if self.loaded_models is not None and s_id in self.loaded_models:
                 model = self.loaded_models[s_id]
             else:
                 # Build and fit model
@@ -143,17 +148,18 @@ def _build_model(self) -> pd.DataFrame:
     def _generate_report(self):
         """The method that needs to be implemented on the particular model level."""
         import datapane as dp
-
-        sec5_text = dp.Text(f"## ARIMA Model Parameters")
-        blocks = [
-            dp.HTML(
-                m.summary().as_html(),
-                label=s_id,
-            )
-            for i, (s_id, m) in enumerate(self.models.items())
-        ]
-        sec5 = dp.Select(blocks=blocks) if len(blocks) > 1 else blocks[0]
-        all_sections = [sec5_text, sec5]
+        all_sections = []
+        if len(self.models) > 0:
+            sec5_text = dp.Text(f"## ARIMA Model Parameters")
+            blocks = [
+                dp.HTML(
+                    m.summary().as_html(),
+                    label=s_id,
+                )
+                for i, (s_id, m) in enumerate(self.models.items())
+            ]
+            sec5 = dp.Select(blocks=blocks) if len(blocks) > 1 else blocks[0]
+            all_sections = [sec5_text, sec5]
 
         if self.spec.generate_explanations:
             try:
@@ -239,6 +245,9 @@ def _custom_predict(
             """
             data: ForecastDatasets.get_data_at_series(s_id)
             """
+            if series_id in self.constant_cols:
+                data = data.drop(columns=self.constant_cols[series_id])
+
             data = data.drop([target_col], axis=1)
             data[dt_column_name] = seconds_to_datetime(
                 data[dt_column_name], dt_format=self.spec.datetime_column.format