aeon-toolkit · chrisholder · Aug 12, 2025 · Aug 12, 2025 · Aug 12, 2025 · Aug 12, 2025
diff --git a/.gitignore b/.gitignore
@@ -165,3 +165,7 @@ local_code/
 local/
 local_code.py
 local.py
+
+/benchmarks/benchmark_results/
+
+benchmarks/html/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -31,6 +31,7 @@ repos:
     hooks:
       - id: ruff
         args: [ "--fix"]
+        exclude: '(^|/)benchmarks/'
 
   - repo: https://github.com/asottile/pyupgrade
     rev: v3.20.0

diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -0,0 +1,74 @@
+[//]: # (This was adapted from: https://github.com/scipy/scipy/tree/main/benchmarks)
+# aeon Time Series Benchmarks
+
+Benchmarking aeon with Airspeed Velocity.
+
+## Usage
+
+Airspeed Velocity manages building and Python environments by itself, unless told
+otherwise. Some of the benchmarking features in `spin` also tell ASV to use the aeon
+compiled by `spin`. To run the benchmarks, you will need to install the "dev"
+dependencies of aeon:
+
+```bash
+pip install --editble .[dev]
+# NOTE: If the above fails, try running pip install --editable ".[dev]"
+```
+
+Run a benchmark against currently checked-out aeon version (don't record the result):
+
+```bash
+spin bench --submodule classification.distance_based
+```
+
+Compare change in benchmark results with another branch:
+
+```bash
+spin bench --compare main --submodule classification.distance_based
+```
+
+Run ASV commands directly (note, this will not set env vars for `ccache` and disabling BLAS/LAPACK multi-threading, as `spin` does):
+
+```bash
+cd benchmarks
+asv run --skip-existing-commits --steps 10 ALL
+asv publish
+asv preview
+```
+
+More on how to use `asv` can be found in [ASV documentation](https://asv.readthedocs.io/). Command-line help is available as usual via `asv --help` and `asv run --help`.
+
+## Writing benchmarks
+
+See [ASV documentation](https://asv.readthedocs.io/) for the basics on how to write benchmarks.
+
+Some things to consider:
+
+- When importing things from aeon on the top of the test files, do it as:
+
+  ```python
+  from .common import safe_import
+
+  with safe_import():
+      from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier
+  ```
+
+  The benchmark files need to be importable also when benchmarking old versions of aeon. The benchmarks themselves don't need any guarding against missing features — only the top-level imports.
+
+- Try to keep the runtime of the benchmark reasonable.
+
+- Use ASV's `time_` methods for benchmarking times rather than cooking up time measurements via `time.clock`, even if it requires some juggling when writing the benchmark.
+
+- Preparing arrays etc., should generally be put in the `setup` method rather than the `time_` methods, to avoid counting preparation time together with the time of the benchmarked operation.
+
+- Use `run_monitored` from `common.py` if you need to measure memory usage.
+
+- Benchmark versioning: by default `asv` invalidates old results when there is any code change in the benchmark routine or in setup/setup_cache.
+
+  This can be controlled manually by setting a fixed benchmark version number, using the `version` attribute. See [ASV documentation](https://asv.readthedocs.io/) for details.
+
+  If set manually, the value needs to be changed manually when old results should be invalidated. In case you want to preserve previous benchmark results when the benchmark did not previously have a manual `version` attribute, the automatically computed default values can be found in `results/benchmark.json`.
+
+- Benchmark attributes such as `params` and `param_names` must be the same regardless of whether some features are available, or e.g. AEON_XSLOW=1 is set.
+
+  Instead, benchmarks that should not be run can be skipped by raising `NotImplementedError` in `setup()`.
diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json
@@ -0,0 +1,87 @@
+// This file was taken and adapted from: https://github.com/scipy/scipy/blob/main/benchmarks/asv.conf.json
+{
+    // The version of the config file format.  Do not change, unless
+    // you know what you are doing.
+    "version": 1,
+
+    // The name of the project being benchmarked
+    "project": "aeon",
+
+    // The project's homepage
+    "project_url": "https://www.aeon-toolkit.org",
+
+    // The URL of the source code repository for the project being
+    // benchmarked
+    "repo": "..",
+    "dvcs": "git",
+    "branches": ["HEAD"],
+
+    // Customizable commands for building, installing, and
+    // uninstalling the project. See asv.conf.json documentation.
+    //
+    // "install_command": ["in-dir={env_dir} python -mpip install {wheel_file}"],
+    // "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"],
+    // "build_command": [
+    //     "PIP_NO_BUILD_ISOLATION=false python -m pip install . --no-deps --no-index -w {build_cache_dir} {build_dir}"
+    // ],
+
+    "build_command": [
+        "python -m build --wheel -o {build_cache_dir} {build_dir}"
+    ],
+
+    // The base URL to show a commit for the project.
+    "show_commit_url": "https://github.com/aeon-toolkit/aeon/commit",
+
+    // The Pythons you'd like to test against.  If not provided, defaults
+    // to the current version of Python used to run `asv`.
+    // "pythons": ["3.9"],
+
+    // The matrix of dependencies to test.  Each key is the name of a
+    // package (in PyPI) and the values are version numbers.  An empty
+    // list indicates to just test against the default (latest)
+    // version.
+    "matrix": {
+        "numpy": [],
+        "numba": []
+    },
+
+    // The directory (relative to the current directory) that benchmarks are
+    // stored in.  If not provided, defaults to "benchmarks"
+    "benchmark_dir": "benchmarks",
+
+    // The directory (relative to the current directory) to cache the Python
+    // environments in.  If not provided, defaults to "env"
+    "env_dir": "env",
+
+    // The tool to use to create environments.  May be "conda",
+    // "virtualenv" or other value depending on the plugins in use.
+    // If missing or the empty string, the tool will be automatically
+    // determined by looking for tools on the PATH environment
+    // variable.
+    "environment_type": "virtualenv",
+    // "environment_type": "mamba",
+    "build_cache_size": 10,
+
+    // The directory (relative to the current directory) that raw benchmark
+    // results are stored in.  If not provided, defaults to "results".
+    "results_dir": "benchmark_results",
+
+    // The directory (relative to the current directory) that the html tree
+    // should be written to.  If not provided, defaults to "html".
+    "html_dir": "html",
+
+    // The number of characters to retain in the commit hashes.
+    "hash_length": 8,
+
+    // The commits after which the regression search in `asv publish`
+    // should start looking for regressions. Dictionary whose keys are
+    // regexps matching to benchmark names, and values corresponding to
+    // the commit (exclusive) after which to start looking for
+    // regressions.  The default is to start from the first commit
+    // with results. If the commit is `null`, regression detection is
+    // skipped for the matching benchmark.
+
+    "regressions_first_commits": {
+       "io_matlab\\.StructArr\\..*": "1a002f1"
+    }
+}
diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py
diff --git a/benchmarks/benchmarks/clustering.py b/benchmarks/benchmarks/clustering.py
@@ -0,0 +1,48 @@
+from sklearn.base import BaseEstimator
+
+from .common import EstimatorBenchmark, safe_import
+
+with safe_import():
+    import aeon.clustering as aeon_clust
+
+
+class KMeansBenchmark(EstimatorBenchmark):
+    """This runs kmeans with mean averaging method."""
+
+    inits = ["random", "kmeans++"]
+
+    params = EstimatorBenchmark.params + [inits]
+    param_names = EstimatorBenchmark.param_names + ["init"]
+
+    def _build_estimator(self, k, init, distance) -> BaseEstimator:
+        return aeon_clust.TimeSeriesKMeans(
+            n_clusters=4,
+            init=init,
+            distance="euclidean",
+            averaging_method="mean",
+            n_init=1,
+            max_iter=20,
+            random_state=1,
+        )
+
+
+class KMeansBABenchmark(EstimatorBenchmark):
+    """This runs kmeans with ba averaging method."""
+
+    distances = ["dtw", "msm"]
+    params = EstimatorBenchmark.params + [
+        KMeansBenchmark.inits,
+        distances,
+    ]
+    param_names = EstimatorBenchmark.param_names + ["init", "distance"]
+
+    def _build_estimator(self, init, distance) -> BaseEstimator:
+        return aeon_clust.TimeSeriesKMeans(
+            n_clusters=4,
+            init=init,
+            distance=distance,
+            averaging_method="ba",
+            n_init=1,
+            max_iter=10,
+            random_state=1,
+        )
diff --git a/benchmarks/benchmarks/common.py b/benchmarks/benchmarks/common.py
@@ -0,0 +1,66 @@
+import os
+from abc import ABC, abstractmethod
+
+from sklearn.base import BaseEstimator
+
+
+class safe_import:
+
+    def __enter__(self):
+        self.error = False
+        return self
+
+    def __exit__(self, type_, value, traceback):
+        if type_ is not None:
+            self.error = True
+            suppress = not (
+                os.getenv("SCIPY_ALLOW_BENCH_IMPORT_ERRORS", "1").lower()
+                in ("0", "false")
+                or not issubclass(type_, ImportError)
+            )
+            return suppress
+
+
+class Benchmark:
+    """
+    Base class with sensible options
+    """
+
+
+with safe_import():
+    from aeon.testing.data_generation import make_example_3d_numpy
+
+
+class EstimatorBenchmark(Benchmark, ABC):
+    # Base grid (shared across all estimators)
+    shapes = [
+        (10, 1, 10),
+        (10, 1, 1000),
+        (50, 1, 100),
+        (10, 3, 10),
+        (10, 3, 1000),
+        (50, 3, 100),
+    ]
+
+    params = [shapes]
+    param_names = ["shape"]
+
+    def setup(self, shape, *est_params):
+        # Data
+        self.X_train = make_example_3d_numpy(*shape, return_y=False, random_state=1)
+        self.X_test = make_example_3d_numpy(*shape, return_y=False, random_state=2)
+
+        self.prefit_estimator = self._build_estimator(*est_params)
+        self.prefit_estimator.fit(self.X_train)
+
+    def time_fit(self, shape, *est_params):
+        est = self._build_estimator(*est_params)  # fresh each run
+        est.fit(self.X_train)
+
+    def time_predict(self, shape, *est_params):
+        self.prefit_estimator.predict(self.X_test)
+
+    @abstractmethod
+    def _build_estimator(self, *est_params) -> BaseEstimator:
+        """Return an unfitted estimator configured with the given params."""
+        ...