Skip to content

Commit 1138d2d

Browse files
authored
Merge branch 'main' into forecast_series_failure_bugfixes
2 parents f086d2a + 6d80d1d commit 1138d2d

File tree

16 files changed

+159
-36
lines changed

16 files changed

+159
-36
lines changed

.github/workflows/run-operators-unit-tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ jobs:
3131
strategy:
3232
fail-fast: false
3333
matrix:
34-
python-version: ["3.8", "3.10.8"]
34+
python-version: ["3.8"]
3535

3636
steps:
3737
- uses: actions/checkout@v4

CODEOWNERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
* @darenr @mayoor @mrDzurb @VipulMascarenhas @qiuosier

ads/common/serializer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,7 @@ def from_dict(
464464
)
465465

466466
obj = cls(
467-
**{key: obj_dict.get(key) for key in allowed_fields if key in obj_dict}
467+
**{key: obj_dict.get(key) for key in allowed_fields}
468468
)
469469

470470
for key, value in obj_dict.items():

ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
merge_category_columns,
1111
)
1212
from ads.opctl.operator.lowcode.common.data import AbstractData
13-
from ads.opctl.operator.lowcode.common.data import AbstractData
1413
from ads.opctl.operator.lowcode.anomaly.utils import get_frequency_of_datetime
1514
from ads.opctl import logger
1615
import pandas as pd

ads/opctl/operator/lowcode/anomaly/model/automlx.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,9 @@ class AutoMLXOperatorModel(AnomalyOperatorBaseModel):
2626
)
2727
def _build_model(self) -> pd.DataFrame:
2828
from automlx import init
29+
import logging
2930
try:
30-
init(engine="ray", engine_opts={"ray_setup": {"_temp_dir": "/tmp/ray-temp"}})
31+
init(engine="ray", engine_opts={"ray_setup": {"_temp_dir": "/tmp/ray-temp"}}, loglevel=logging.CRITICAL)
3132
except Exception as e:
3233
logger.info("Ray already initialized")
3334
date_column = self.spec.datetime_column.name

ads/opctl/operator/lowcode/anomaly/operator_config.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,21 @@ class TestData(InputData):
3636
"""Class representing operator specification test data details."""
3737

3838

39+
@dataclass(repr=True)
40+
class PreprocessingSteps(DataClassSerializable):
41+
"""Class representing preprocessing steps for operator."""
42+
43+
missing_value_imputation: bool = True
44+
outlier_treatment: bool = False
45+
46+
47+
@dataclass(repr=True)
48+
class DataPreprocessor(DataClassSerializable):
49+
"""Class representing operator specification preprocessing details."""
50+
51+
enabled: bool = True
52+
steps: PreprocessingSteps = field(default_factory=PreprocessingSteps)
53+
3954
@dataclass(repr=True)
4055
class AnomalyOperatorSpec(DataClassSerializable):
4156
"""Class representing operator specification."""
@@ -74,7 +89,9 @@ def __post_init__(self):
7489
self.generate_inliers if self.generate_inliers is not None else False
7590
)
7691
self.model_kwargs = self.model_kwargs or dict()
77-
92+
self.preprocessing = (
93+
self.preprocessing if self.preprocessing is not None else DataPreprocessor(enabled=True)
94+
)
7895

7996
@dataclass(repr=True)
8097
class AnomalyOperatorConfig(OperatorConfig):

ads/opctl/operator/lowcode/anomaly/schema.yaml

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -307,11 +307,23 @@ spec:
307307
description: "When provided, target_category_columns [list] indexes the data into multiple related datasets for anomaly detection"
308308

309309
preprocessing:
310-
type: boolean
310+
type: dict
311311
required: false
312-
default: true
313-
meta:
314-
description: "preprocessing and feature engineering can be disabled using this flag, Defaults to true"
312+
schema:
313+
enabled:
314+
type: boolean
315+
required: false
316+
default: true
317+
meta:
318+
description: "preprocessing and feature engineering can be disabled using this flag, Defaults to true"
319+
steps:
320+
type: dict
321+
required: false
322+
schema:
323+
missing_value_imputation:
324+
type: boolean
325+
required: false
326+
default: true
315327

316328
generate_report:
317329
type: boolean

ads/opctl/operator/lowcode/common/transformations.py

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -58,20 +58,26 @@ def run(self, data):
5858
clean_df = self._format_datetime_col(clean_df)
5959
clean_df = self._set_multi_index(clean_df)
6060

61-
if self.name == "historical_data":
62-
try:
63-
clean_df = self._missing_value_imputation_hist(clean_df)
64-
except Exception as e:
65-
logger.debug(f"Missing value imputation failed with {e.args}")
66-
if self.preprocessing:
67-
try:
68-
clean_df = self._outlier_treatment(clean_df)
69-
except Exception as e:
70-
logger.debug(f"Outlier Treatment failed with {e.args}")
71-
else:
72-
logger.debug("Skipping outlier treatment as preprocessing is disabled")
73-
elif self.name == "additional_data":
74-
clean_df = self._missing_value_imputation_add(clean_df)
61+
if self.preprocessing and self.preprocessing.enabled:
62+
if self.name == "historical_data":
63+
if self.preprocessing.steps.missing_value_imputation:
64+
try:
65+
clean_df = self._missing_value_imputation_hist(clean_df)
66+
except Exception as e:
67+
logger.debug(f"Missing value imputation failed with {e.args}")
68+
else:
69+
logger.info("Skipping missing value imputation because it is disabled")
70+
if self.preprocessing.steps.outlier_treatment:
71+
try:
72+
clean_df = self._outlier_treatment(clean_df)
73+
except Exception as e:
74+
logger.debug(f"Outlier Treatment failed with {e.args}")
75+
else:
76+
logger.info("Skipping outlier treatment because it is disabled")
77+
elif self.name == "additional_data":
78+
clean_df = self._missing_value_imputation_add(clean_df)
79+
else:
80+
logger.info("Skipping all preprocessing steps because preprocessing is disabled")
7581
return clean_df
7682

7783
def _remove_trailing_whitespace(self, df):

ads/opctl/operator/lowcode/forecast/environment.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,4 @@ dependencies:
1818
- optuna==3.1.0
1919
- oracle-automlx==23.4.1
2020
- oracle-automlx[forecasting]==23.4.1
21+
- fire

ads/opctl/operator/lowcode/forecast/model/automlx.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,17 +76,15 @@ def preprocess(self, data, series_id=None): # TODO: re-use self.le for explanat
7676
)
7777
def _build_model(self) -> pd.DataFrame:
7878
from automlx import init
79-
from sktime.forecasting.model_selection import temporal_train_test_split
79+
import logging
8080
try:
81-
init(engine="ray", engine_opts={"ray_setup": {"_temp_dir": "/tmp/ray-temp"}})
81+
init(engine="ray", engine_opts={"ray_setup": {"_temp_dir": "/tmp/ray-temp"}}, loglevel=logging.CRITICAL)
8282
except Exception as e:
8383
logger.info("Ray already initialized")
8484

85-
8685
full_data_dict = self.datasets.get_data_by_series()
8786

8887
self.models = dict()
89-
date_column = self.spec.datetime_column.name
9088
horizon = self.spec.horizon
9189
self.spec.confidence_interval_width = self.spec.confidence_interval_width or 0.8
9290
self.forecast_output = ForecastOutput(

0 commit comments

Comments
 (0)