functime-org · baggiponte · Jan 12, 2024 · Jan 12, 2024
diff --git a/docs/forecasting.md b/docs/forecasting.md
@@ -58,7 +58,7 @@ X_train, X_test = train_test_split(test_size)(X)
 ## Fit / Predict / Score
 
 `functime` forecasters expose sklearn-compatible `.fit` and `.predict` methods.
-`functime.metrics` contains a comprehensive range of scoring functions for both point and probablistic forecasts.
+`functime.metrics` contains a comprehensive range of scoring functions for both point and probabilistic forecasts.
 
 ??? info "Supported Forecast Metrics"
 
@@ -250,7 +250,7 @@ from functime.preprocessing import diff, scale, boxcox
 # Apply first differences
 forecaster = linear_model(freq="1mo", lags=12, target_transform=diff(order=1, fill_strategy="backward"))
 
-# Or local standarization
+# Or local standardization
 forecaster = linear_model(freq="1mo", lags=12, target_transform=scale())
 
 # Or Box-cox
@@ -489,7 +489,7 @@ y_preds, y_resids = forecaster.backtest(
 )
 ```
 
-## Probablistic Forecasts
+## Probabilistic Forecasts
 
 `functime` supports two methods for generating prediction intervals.
 

diff --git a/docs/index.md b/docs/index.md
@@ -63,14 +63,14 @@ Check out this [guide](installation.md) to install functime. Requires Python 3.8
 
 ### Forecasting
 
-Point and probablistic forecasts using machine learning.
+Point and probabilistic forecasts using machine learning.
 Includes utilities to support the full forecasting lifecycle: preprocessing, feature extraction, time-series cross-validation / splitters, backtesting, automated hyperparameter tuning, and scoring.
 
 - Every forecaster supports **exogenous features**
 - **Seasonality** effects using [calendar, Fourier, and holiday features](https://docs.functime.ai/seasonality/)
 - **Backtesting** with [expanding window and sliding window splitters](https://docs.functime.ai/ref/cross-validation/)
 - **Automated lags and hyperparameter tuning** using [`FLAML`](https://github.com/microsoft/FLAML)
-- **Probablistic forecasts** via quantile regression and conformal prediction
+- **Probabilistic forecasts** via quantile regression and conformal prediction
 - **Forecast metrics** (e.g. MASE, SMAPE, CRPS) for scoring in parallel
 - Supports **recursive and direct** forecast strategies
 - **Censored model** for zero-inflated forecasts

diff --git a/docs/notebooks/benchmarks.ipynb b/docs/notebooks/benchmarks.ipynb
@@ -56,7 +56,7 @@
    "source": [
     "## 1. Setup for the comparison\n",
     "---\n",
-    "We are using the M4 dataset. We create a `pd.DataFrame` and `pl.DataFrame` and we define a list of dictionnary with the following structure:\n",
+    "We are using the M4 dataset. We create a `pd.DataFrame` and `pl.DataFrame` and we define a list of dictionary with the following structure:\n",
     "<br>\n",
     "(<br>\n",
     "&emsp;  `<functime_function>`,<br>\n",

diff --git a/functime/base/forecaster.py b/functime/base/forecaster.py
@@ -16,7 +16,7 @@
 
 # The parameters of the Model
 P = ParamSpec("P")
-# The return type of the esimator's curried function
+# The return type of the estimator's curried function
 R = Tuple[TypeVar("fit", bound=Callable), TypeVar("predict", bound=Callable)]
 
 FORECAST_STRATEGIES = Optional[Literal["direct", "recursive", "naive"]]

diff --git a/functime/base/metric.py b/functime/base/metric.py
@@ -33,7 +33,7 @@ def _score(
             _set_string_cache
         )
         y_pred = y_pred.pipe(_enforce_string_cache, string_cache=string_cache)
-        # Coerce columnn names and dtypes
+        # Coerce column names and dtypes
         cols = y_true.columns
         y_pred = y_pred.rename({x: y for x, y in zip(y_pred.columns, cols)}).select(
             [pl.col(col).cast(dtype) for col, dtype in y_true.schema.items()]

diff --git a/functime/forecasting/lance.py b/functime/forecasting/lance.py
@@ -58,7 +58,7 @@ def fit(self, X: pl.DataFrame, y: pl.DataFrame):
             metric=self.metric,
             num_partitions=self.num_partitions,
             ivf_centroids=self.ivf_centroids,
-            # Must satisfy contraints:
+            # Must satisfy constraints:
             # 1. (n_dims / num_sub_vectors) % 8 == 0
             # 2. n_dims % num_sub_vectors == 0
             num_sub_vectors=self.num_sub_vectors or n_dims // 8,

diff --git a/functime/preprocessing.py b/functime/preprocessing.py
@@ -68,7 +68,7 @@ def transform(X: pl.LazyFrame) -> pl.LazyFrame:
 def time_to_arange(eager: bool = False):
     """Coerces time column into arange per entity.
 
-    Assumes even-spaced time-series and homogenous start dates.
+    Assumes even-spaced time-series and homogeneous start dates.
     """
 
     def transform(X: pl.LazyFrame) -> pl.LazyFrame:

diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py
@@ -66,7 +66,7 @@ def test_sklearn_impute(strategy, pd_X, benchmark):
 
 @pytest.mark.benchmark
 def test_sklearn_boxcox(pd_X, benchmark):
-    # All values must be stricty positive
+    # All values must be strictly positive
     X = pd_X.abs() + 0.001
     transformer = PowerTransformer(method="box-cox", standardize=False)
     benchmark(pd_gb_transform, X, transformer)

diff --git a/tests/test_tsfresh.py b/tests/test_tsfresh.py
@@ -391,7 +391,7 @@ def test_linear_trend(S, res, k):
         ([1], [1]),
     ],
 )
-def test_abolute_energy(S, res):
+def test_absolute_energy(S, res):
     assert_frame_equal(
         pl.DataFrame({"a": S}).select(absolute_energy(pl.col("a"))),
         pl.DataFrame(pl.Series("a", res)),
@@ -879,7 +879,7 @@ def test_benford_correlation():
     # Nan, division by 0
     X_uniform = pl.DataFrame({"a": [1, 2, 3, 4, 5, 6, 7, 8, 9]})
     X_uniform_lazy = pl.LazyFrame({"a": [1, 2, 3, 4, 5, 6, 7, 8, 9]})
-    # Random serie
+    # Random series
     X_random = pl.DataFrame(
         {"a": [26.24, 3.03, -2.92, 3.5, -0.07, 0.35, 0.10, 0.51, -0.43]}
     )
@@ -1170,7 +1170,7 @@ def test_percent_reoccuring_values(S, res):  # noqa
         ([1.111, -2.45, 1.111, 2.45], [2.222]),
     ],
 )
-def test_sum_reocurring_points(S, res):
+def test_sum_reoccurring_points(S, res):
     assert_frame_equal(
         pl.DataFrame({"a": S}).select(sum_reoccurring_points(pl.col("a"))),
         pl.DataFrame(pl.Series("a", res)),
@@ -1190,7 +1190,7 @@ def test_sum_reocurring_points(S, res):
         ([1.111, -2.45, 1.111, 2.45], [1.111]),
     ],
 )
-def test_sum_reocurring_values(S, res):
+def test_sum_reoccurring_values(S, res):
     assert_frame_equal(
         pl.DataFrame({"a": S}).select(sum_reoccurring_values(pl.col("a"))),
         pl.DataFrame(pl.Series("a", res)),
@@ -1211,7 +1211,7 @@ def test_sum_reocurring_values(S, res):
         ([], [np.nan]),
     ],
 )
-def test_percent_reocurring_points(S, res):
+def test_percent_reoccurring_points(S, res):
     assert_frame_equal(
         pl.DataFrame({"a": S}).select(percent_reoccurring_points(pl.col("a"))),
         pl.DataFrame(pl.Series("literal", res, dtype=pl.Float64)),
@@ -1653,7 +1653,7 @@ def test_permutation_entropy(S, t, d, b, res):
 )
 def test_sample_entropy(S, res):
     # Test 1's answer comes from comparing result with Tsfresh
-    # Thest 2's answer comes from running this using the Python code on Wikipedia
+    # Test 2's answer comes from running this using the Python code on Wikipedia
     # Test 3 is an edge case. Should get nan.
     x = pl.Series(S)
     res_series = sample_entropy(x)