Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Updated join key in transform_new function in scale transformer #228

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -405,3 +405,5 @@ $RECYCLE.BIN/
*.lnk

# End of https://www.toptal.com/developers/gitignore/api/visualstudiocode,pycharm,macos,windows,python,rust,linux,asdf
requirements-dev.lock
requirements.lock
8 changes: 4 additions & 4 deletions functime/base/forecaster.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,8 @@ class Forecaster(Model):
----------
freq : str
Offset alias supported by Polars.
lags : int
Number of lagged target variables.
lags : Optional[int]
Number of lagged target variables. Can also be set to 0, this requires `X` to be provided for features.
max_horizons: Optional[int]
Maximum number of horizons to predict directly.
Only applied if `strategy` equals "direct" or "ensemble".
Expand All @@ -114,7 +114,7 @@ class Forecaster(Model):
def __init__(
self,
freq: Union[str, None],
lags: int,
lags: Optional[int],
max_horizons: Optional[int] = None,
strategy: FORECAST_STRATEGIES = None,
target_transform: Optional[Union[Transformer, List[Transformer]]] = None,
Expand Down Expand Up @@ -290,7 +290,7 @@ def backtest(

y = check_backtest_lengths(
y,
max_lags=self.lags,
max_lags=self.lags if self.lags else 0,
test_size=test_size,
drop_short=drop_short,
drop_tolerance=drop_tolerance,
Expand Down
17 changes: 11 additions & 6 deletions functime/forecasting/_ar.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

def fit_recursive(
regress: Callable[[pl.LazyFrame, pl.LazyFrame], Any],
lags: int,
lags: Optional[int],
y: pl.LazyFrame,
X: Optional[pl.LazyFrame] = None,
) -> Mapping[str, Any]:
Expand All @@ -39,7 +39,7 @@ def fit_recursive(
# 2. Fit
fitted_regressor = regress(X=X_final, y=y_final)
# 3. Collect artifacts
y_lag = make_y_lag(X_y_final, target_col=y.columns[-1], lags=lags)
y_lag = make_y_lag(X_y_final, target_col=y.columns[-1], lags=lags if lags else 0)
artifacts = {
"regressor": fitted_regressor,
"y_lag": y_lag.collect(streaming=True),
Expand Down Expand Up @@ -79,7 +79,7 @@ def fit_direct(

def fit_autoreg(
regress: Callable[[pl.LazyFrame, pl.LazyFrame], Any],
lags: int,
lags: Optional[int],
y: Union[pl.DataFrame, pl.LazyFrame],
X: Optional[Union[pl.DataFrame, pl.LazyFrame]] = None,
max_horizons: Optional[int] = None,
Expand Down Expand Up @@ -229,10 +229,15 @@ def predict_recursive(
lag_cols = y_lag.columns[2:]
lead_col = lag_cols[0]

Y_LAG_BLUEPRINT = y_lag

def _get_x_y_slice(y_lag: pl.DataFrame, i: int):
x_y_slice = y_lag.select(
[entity_col, pl.all().exclude(entity_col).list.get(-1)]
)
if lead_col[-1] == "0": # checks if there are no lags
x_y_slice = Y_LAG_BLUEPRINT
else:
x_y_slice = y_lag.select(
[entity_col, pl.all().exclude(entity_col).list.get(-1)]
)
if X is not None:
x = X.select([entity_col, pl.all().exclude(entity_col).list.get(i)])
x_y_slice = x_y_slice.join(x, on=entity_col, how="left")
Expand Down
11 changes: 7 additions & 4 deletions functime/forecasting/_reduction.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,13 @@ def make_reduction(
y = y.lazy()
X = X.lazy() if X is not None else X
# Get lags
y_lag = y.pipe(lag(lags=list(range(1, lags + 1))))
X_y = y_lag.join(y, on=idx_cols, how="inner").select(
[*y.columns, *y_lag.columns[2:]]
)
if lags:
y_lag = y.pipe(lag(lags=list(range(1, lags + 1))))
X_y = y_lag.join(y, on=idx_cols, how="inner").select(
[*y.columns, *y_lag.columns[2:]]
)
else:
X_y = y.with_columns(pl.lit(None).alias(f"{y.columns[-1]}__lag_0"))
# Exogenous features
if X is not None:
X_y = _join_X_y(X_y, X)
Expand Down
15 changes: 8 additions & 7 deletions functime/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,17 +401,18 @@ def invert(state: ModelState, X: pl.LazyFrame) -> pl.LazyFrame:
return X

def transform_new(state: ModelState, X: pl.LazyFrame) -> pl.LazyFrame:
artifacts = state.artifacts
idx_cols = X.columns[:2]
numeric_cols = state.artifacts["numeric_cols"]
_mean = artifacts["_mean"]
_std = artifacts["_std"]
entity_col = idx_cols[0]
artifacts = state.artifacts
numeric_cols = artifacts["numeric_cols"]
if use_mean:
X = X.join(_mean, on=idx_cols, how="left").select(
_mean = artifacts["_mean"]
X = X.join(_mean, on=entity_col, how="left").select(
idx_cols + [pl.col(col) - pl.col(f"{col}_mean") for col in numeric_cols]
)
if use_std:
X = X.join(_std, on=idx_cols, how="left").select(
_std = artifacts["_std"]
X = X.join(_std, on=entity_col, how="left").select(
idx_cols + [pl.col(col) / pl.col(f"{col}_std") for col in numeric_cols]
)
if rescale_bool:
Expand Down Expand Up @@ -1074,4 +1075,4 @@ def transform(X: pl.LazyFrame) -> pl.LazyFrame:
)
return {"X_new": X_new}

return transform
return transform