makeabilitylab · jonfroehlich · Jun 24, 2026 · Jun 24, 2026 · Jun 24, 2026 · Jun 24, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,50 @@
+name: CI
+
+# Runs on every PR and on pushes to master. The heavy gesture notebooks are
+# NOT run here (they are slow + data-heavy) -- see notebooks-nightly.yml.
+on:
+  push:
+    branches: [master]
+  pull_request:
+
+jobs:
+  unit:
+    name: Unit tests (helper packages)
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+          cache: pip
+          cache-dependency-path: requirements.txt
+      - name: Install pinned env + test extras
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+          pip install -e ".[test]"
+      - name: Run pytest
+        run: pytest tests/ -v
+
+  notebooks-fast:
+    name: Notebook smoke tests (fast tier)
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+          cache: pip
+          cache-dependency-path: requirements.txt
+      - name: Install audio system lib (librosa/soundfile)
+        run: sudo apt-get update && sudo apt-get install -y libsndfile1
+      - name: Install pinned env + test extras
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+          pip install -e ".[test]"
+      # nbmake executes each notebook in its own directory, so the repo-relative
+      # data loads (./Logs, data/audio/...) resolve. Intentional error cells are
+      # tagged `raises-exception` in the notebooks and are honored automatically.
+      - name: Execute fast notebooks (Tutorials + StepTracker)
+        run: pytest --nbmake --nbmake-timeout=900 -n auto Tutorials/ Projects/StepTracker/
diff --git a/.github/workflows/notebooks-heavy.yml b/.github/workflows/notebooks-heavy.yml
@@ -0,0 +1,53 @@
+name: Notebooks (heavy gesture)
+
+# The GestureRecognizer notebooks are slow (k-fold cross-validation over the full
+# GestureLogs corpus) and produce large outputs, so they are NOT run on every push.
+# Triggers:
+#   - PRs / pushes to master that touch the gesture notebooks, the gesturerec package,
+#     its data, or the pinned deps (the paths filter below) -- i.e. only when a change
+#     could actually affect these notebooks;
+#   - a monthly cron, purely as a drift canary for unpinned transitive deps / system
+#     libs (the top-level stack is pinned, so day-to-day reruns add nothing);
+#   - manual "Run workflow" (workflow_dispatch).
+# Note: paths filters apply only to push/pull_request; schedule + workflow_dispatch
+# always run.
+on:
+  push:
+    branches: [master]
+    paths:
+      - "Projects/GestureRecognizer/**"
+      - "requirements.txt"
+      - "environment.yml"
+      - "pyproject.toml"
+      - ".github/workflows/notebooks-heavy.yml"
+  pull_request:
+    paths:
+      - "Projects/GestureRecognizer/**"
+      - "requirements.txt"
+      - "environment.yml"
+      - "pyproject.toml"
+      - ".github/workflows/notebooks-heavy.yml"
+  schedule:
+    - cron: "0 8 1 * *" # 08:00 UTC on the 1st of each month
+  workflow_dispatch: # manual "Run workflow" button
+
+jobs:
+  notebooks-heavy:
+    name: Execute gesture notebooks
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+          cache: pip
+          cache-dependency-path: requirements.txt
+      - name: Install audio system lib (librosa/soundfile)
+        run: sudo apt-get update && sudo apt-get install -y libsndfile1
+      - name: Install pinned env + test extras
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+          pip install -e ".[test]"
+      - name: Execute gesture notebooks
+        run: pytest --nbmake --nbmake-timeout=1800 -n auto Projects/GestureRecognizer/
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -4,7 +4,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
 
 ## What this is
 
-Educational materials for applied signal processing and time-series classification in HCI / ubiquitous computing (a University of Washington course, part of the Makeability Lab "physcomp" curriculum). The artifacts are **Jupyter notebooks** backed by small supporting Python packages. There is no build, no test suite, no CI — work is done interactively in notebooks.
+Educational materials for applied signal processing and time-series classification in HCI / ubiquitous computing (a University of Washington course, part of the Makeability Lab "physcomp" curriculum). The artifacts are **Jupyter notebooks** backed by small supporting Python packages. There is no build; work is done interactively in notebooks. Tests do exist (added after the v2 modernization): pytest unit tests for the helper packages plus `nbmake` headless notebook smoke tests, run in GitHub Actions CI — see the **Testing** section below. The tests live entirely outside the `.ipynb` files (nothing was added inside the notebooks).
 
 Top-level layout:
 - `Tutorials/` — standalone teaching notebooks (NumPy, Matplotlib, Python, and signals: sampling/quantization, frequency analysis, comparing signals). Supported by the `makelab/` package.
@@ -29,6 +29,15 @@ Pinned dependencies live in `requirements.txt` / `environment.yml`: NumPy, SciPy
 
 Notebook files contain non-ASCII characters (arrows, curly quotes). When parsing a `.ipynb` with a script, open as UTF-8 and run Python with `PYTHONUTF8=1` — the default Windows cp1252 codec will raise `UnicodeDecodeError`.
 
+## Testing
+
+Install the test stack with `pip install -e ".[test]"` (pytest + nbmake + pytest-xdist). Two layers, both **outside** the notebooks:
+
+- **Unit tests** (`tests/`): pure-function tests for the helper packages — `makelab.signal`, `gesturerec.signalproc`/`utility`/`data`/`experiments`. Run with `pytest tests/`. `tests/fixtures/TestGestures/` is a tiny synthetic gesture corpus (3 trial CSVs + a `*_fulldatastream_*` exclusion file) so the `data.py` parser tests don't depend on the large real `GestureLogs/`. It deliberately includes a `Midair Zorro _Z__*.csv` file to exercise the Windows double-underscore filename quirk.
+- **Notebook smoke tests** (`nbmake`): `pytest --nbmake <paths>` executes notebooks headless and fails on any uncaught error. Intentional teaching errors are already tagged `raises-exception` (notebooks 2 and 5) and are honored — no notebook edits needed. nbmake runs each notebook in its own dir, so the CWD-relative data loads work.
+
+`[tool.pytest.ini_options].testpaths = ["tests"]` keeps a bare `pytest` fast and notebook-free; the notebook sweeps are invoked explicitly by path. CI (`.github/workflows/`) runs units + the fast notebooks (Tutorials + StepTracker) on every push/PR; the slow gesture notebooks run only when a change touches them (`Projects/GestureRecognizer/**` or the deps), plus a monthly drift-canary cron and on-demand (`workflow_dispatch`). If you add a helper function, add a unit test; if a dependency bump breaks a notebook, the nbmake job is what catches it (this automates the manual Pass 2–4 "Restart & Run All" sweeps).
+
 ## gesturerec architecture (Projects/GestureRecognizer)
 
 This package abstracts the data loading and experiment bookkeeping so notebook code can focus on the classification algorithm itself. The data flow is:

diff --git a/MODERNIZATION-NOTES.md b/MODERNIZATION-NOTES.md
@@ -475,3 +475,46 @@ one per notebook/group on `signals-v2-pass4`.
 **Note:** an unrelated stale-stat refresh to FeatureBased (scikit-learn star/commit counts)
 appeared in the working tree from outside this pass and was reverted — flag for Jon if a
 stats refresh is wanted.
+
+---
+
+## Pass 5 — Test infrastructure (2026-06-24)
+
+Tracking issue **#8**. The Pass 2–4 "does it run" verification was done by *manually*
+running each notebook headless; this pass automates that and adds real unit coverage for
+the helper packages. **Branch `signals-v2-tests` (off `master`). Zero changes inside the
+notebooks** — all test code lives in `tests/` + `.github/workflows/`.
+
+**Decisions (with Jon):** both layers (unit + notebook smoke); GitHub Actions CI; tiered
+notebook execution (fast notebooks on every PR; heavy gesture notebooks only when a relevant
+change is made — path-filtered — plus a monthly drift canary, since the top-level deps are
+pinned so a nightly rerun adds nothing); **execute-only** notebook checks via **nbmake** (not
+output-diffing — random amplitudes / `random` xzoom / timing make strict output comparison flaky).
+
+**Added**
+- **Unit tests (`tests/`, pytest):** `test_makelab_signal.py` (wave gen, `shift_array`,
+  `calc_zero_crossings`, `map`/`remap`, top-N indices), `test_gesturerec_signalproc.py`
+  (`compute_fft` half-spectrum + peak bin + scaling, `get_top_n_frequency_peaks`),
+  `test_gesturerec_utility.py` (the `fulldatastream` exclusion guard, `extract_gesture_name`,
+  `path_leaf`, subdirs), `test_gesturerec_data.py` (`SensorData` mag/rate + int64 cast, `Trial`
+  CSV parse, `GestureSet.load` ordering **and the Windows `__` double-underscore quirk**),
+  `test_gesturerec_experiments.py` (`TrialClassificationResult` n-best sort + `is_correct`,
+  via stub trials). **26 tests, green locally.** Tiny synthetic fixture corpus under
+  `tests/fixtures/TestGestures/` (avoids depending on the large real `GestureLogs/`).
+  - *Caught a real contract detail while writing them:* `create_sine_wave(return_time=True)`
+    returns `(time, sine_wave)` — order matters.
+- **nbmake smoke tests:** verified locally that nbmake honors the `raises-exception` tags
+  (NB2, NB5) and executes notebooks in their own dir (CWD-relative `./Logs` loads work).
+- **`pyproject.toml`:** `[project.optional-dependencies].test` (pytest, nbmake, pytest-xdist,
+  ipykernel) + `[tool.pytest.ini_options].testpaths = ["tests"]` (keeps bare `pytest` fast;
+  notebook sweeps invoked explicitly by path).
+- **CI:** `.github/workflows/ci.yml` (push/PR → `unit` job + `notebooks-fast` job over
+  Tutorials + StepTracker) and `notebooks-heavy.yml` (the slow GestureRecognizer notebooks,
+  triggered by a `paths`-filtered push/PR on `Projects/GestureRecognizer/**` or the deps,
+  plus a monthly `schedule` canary and `workflow_dispatch`). Both install pinned
+  `requirements.txt` + `.[test]` on Python 3.12 and `libsndfile1` for librosa.
+- **Docs:** updated `CLAUDE.md` ("no test suite, no CI" line + a Testing section) and
+  `README.md` (Testing section + layout).
+
+**Still to validate post-push:** the GitHub Actions runs themselves (Linux + pinned stack) —
+CI can only be confirmed green after the branch lands and a PR triggers it.
diff --git a/README.md b/README.md
@@ -72,6 +72,26 @@ import makelab.signal          # used by the Tutorials notebooks
 import gesturerec.data         # used by the GestureRecognizer notebooks
 ```
 
+## Tests
+
+The helper packages have unit tests and the notebooks have headless "does it still
+execute" smoke tests. None of this lives inside the notebooks. Install the test extras
+and run:
+
+```bash
+pip install -e ".[test]"
+
+pytest tests/                                  # fast unit tests for makelab + gesturerec
+pytest --nbmake Tutorials/ Projects/StepTracker/      # execute the fast notebooks
+pytest --nbmake Projects/GestureRecognizer/           # execute the (slow) gesture notebooks
+```
+
+`nbmake` executes each notebook in its own directory and fails on any uncaught error
+(intentional teaching errors are tagged `raises-exception` and allowed). CI runs the
+unit tests + fast notebooks on every push/PR, and the slow gesture notebooks only when
+a change touches them (plus a monthly canary and on-demand) —
+see [`.github/workflows/`](.github/workflows/).
+
 ## Repository layout
 
 ```
@@ -85,6 +105,8 @@ import gesturerec.data         # used by the GestureRecognizer notebooks
 │       ├── gesturerec/           # data structures + experiment scaffolding (package)
 │       ├── GestureLogs/          # per-participant gesture training data
 │       └── ADXL335GestureLogs/   # alternate-sensor gesture data
+├── tests/                        # pytest unit tests for makelab + gesturerec
+├── .github/workflows/            # CI: unit + notebook smoke tests
 ├── pyproject.toml                # packaging for makelab + gesturerec
 ├── requirements.txt              # pinned pip environment
 └── environment.yml               # pinned conda environment

diff --git a/pyproject.toml b/pyproject.toml
@@ -32,6 +32,15 @@ notebooks = [
     "ipykernel>=6.29",
     "ipympl>=0.9",
 ]
+# Test stack: pytest for the helper-package unit tests, nbmake for headless
+# "does every notebook still execute" smoke tests, pytest-xdist to run them in
+# parallel. Install with: pip install -e ".[test]"
+test = [
+    "pytest>=8",
+    "nbmake>=1.5",
+    "pytest-xdist>=3.6",
+    "ipykernel>=6.29",  # nbmake needs a kernel to execute the notebooks
+]
 
 [project.urls]
 Homepage = "https://makeabilitylab.github.io/physcomp/signals/"
@@ -47,3 +56,9 @@ packages = ["makelab", "gesturerec"]
 [tool.setuptools.package-dir]
 makelab = "Tutorials/makelab"
 gesturerec = "Projects/GestureRecognizer/gesturerec"
+
+# Unit tests live in tests/. The notebook smoke tests are run explicitly by path
+# (e.g. `pytest --nbmake Tutorials/`), so testpaths intentionally lists only tests/
+# to keep a bare `pytest` fast and notebook-free.
+[tool.pytest.ini_options]
+testpaths = ["tests"]
diff --git a/tests/fixtures/TestGestures/Midair Zorro _Z__3000_3.csv b/tests/fixtures/TestGestures/Midair Zorro _Z__3000_3.csv
@@ -0,0 +1,4 @@
+timestamp,sensor_timestamp,x,y,z
+3000,10,5,0,0
+3100,20,0,5,0
+3200,30,0,0,5
diff --git a/tests/fixtures/TestGestures/Shake_1000_3.csv b/tests/fixtures/TestGestures/Shake_1000_3.csv
@@ -0,0 +1,4 @@
+timestamp,sensor_timestamp,x,y,z
+1000,10,3,4,0
+1100,20,0,0,0
+1200,30,1,2,2
diff --git a/tests/fixtures/TestGestures/Shake_2000_3.csv b/tests/fixtures/TestGestures/Shake_2000_3.csv
@@ -0,0 +1,4 @@
+timestamp,sensor_timestamp,x,y,z
+2000,10,1,1,1
+2100,20,2,2,2
+2200,30,3,3,3
diff --git a/tests/fixtures/TestGestures/armGestureData_fulldatastream_999_9.csv b/tests/fixtures/TestGestures/armGestureData_fulldatastream_999_9.csv
@@ -0,0 +1,3 @@
+timestamp,sensor_timestamp,x,y,z
+1,1,0,0,0
+2,2,0,0,0
diff --git a/tests/test_gesturerec_data.py b/tests/test_gesturerec_data.py
@@ -0,0 +1,70 @@
+"""Unit tests for gesturerec.data -- SensorData, Trial, and GestureSet.
+
+Uses a tiny synthetic fixture corpus under tests/fixtures/TestGestures/ rather than
+the large real GestureLogs/, so the suite stays fast and self-contained. The fixture
+deliberately includes:
+  - two "Shake" trials (to verify chronological trial ordering by end-time),
+  - a "Midair Zorro _Z_" file exercising the Windows double-underscore filename quirk,
+  - a *_fulldatastream_* file that must be excluded from per-trial loading.
+"""
+from pathlib import Path
+
+import numpy as np
+import pytest
+
+import gesturerec.data as grdata
+
+FIXTURE_DIR = Path(__file__).parent / "fixtures" / "TestGestures"
+
+
+def test_sensordata_magnitude_and_rate():
+    time = np.array([1000, 1100, 1200])
+    sensor_time = np.array([10, 20, 30])
+    x = np.array([3, 0, 0])
+    y = np.array([4, 0, 0])
+    z = np.array([0, 0, 0])
+    sd = grdata.SensorData("Accelerometer", time, sensor_time, x, y, z)
+
+    # mag = sqrt(x^2 + y^2 + z^2); first row 3,4,0 -> 5.
+    assert sd.mag[0] == pytest.approx(5.0)
+    assert sd.length() == 3
+    # length_in_secs = (1200-1000)/1000 = 0.2s; sampling_rate = 3 / 0.2 = 15 Hz.
+    assert sd.length_in_secs == pytest.approx(0.2)
+    assert sd.sampling_rate == pytest.approx(15.0)
+
+
+def test_sensordata_casts_time_to_int64():
+    sd = grdata.SensorData("Accelerometer",
+                           np.array([1000, 2000]), np.array([1, 2]),
+                           np.array([1, 2]), np.array([1, 2]), np.array([1, 2]))
+    # int64 cast is deliberate (Windows long is 32-bit) -- keep it.
+    assert sd.time.dtype == np.int64
+
+
+def test_trial_parses_csv_in_constructor():
+    trial = grdata.Trial("Shake", 0, str(FIXTURE_DIR / "Shake_1000_3.csv"))
+    assert trial.gesture_name == "Shake"
+    assert trial.length() == 3
+    assert trial.get_start_time() == 1000
+    assert trial.get_end_time() == 1200
+    # First data row is 3,4,0 -> magnitude 5.
+    assert trial.accel.mag[0] == pytest.approx(5.0)
+
+
+def test_gestureset_load_orders_trials_and_handles_windows_quirk():
+    gs = grdata.GestureSet(str(FIXTURE_DIR))
+    gs.load()
+
+    # The *_fulldatastream_* file is excluded -> exactly two gestures.
+    names = gs.get_gesture_names_sorted()
+    assert "Shake" in names
+    # The "Midair Zorro _Z_" filename (Windows replaced ' with _) must decode back
+    # to the apostrophe form.
+    assert "Midair Zorro 'Z'" in names
+    assert gs.get_num_gestures() == 2
+
+    # Two Shake trials, ordered chronologically by end-time (1000 then 2000).
+    shake_trials = gs.get_trials("Shake")
+    assert len(shake_trials) == 2
+    assert shake_trials[0].get_end_time() == 1200
+    assert shake_trials[1].get_end_time() == 2200
diff --git a/tests/test_gesturerec_experiments.py b/tests/test_gesturerec_experiments.py
@@ -0,0 +1,51 @@
+"""Unit tests for gesturerec.experiments.TrialClassificationResult.
+
+Exercises the n-best-list sorting and is_correct contract with lightweight stub
+trials, so no real classifier run or data loading is needed.
+"""
+from gesturerec.experiments import TrialClassificationResult
+
+
+class _StubTrial:
+    """Minimal stand-in for a Trial: only the attributes the result class touches."""
+
+    def __init__(self, gesture_name, trial_num=0):
+        self.gesture_name = gesture_name
+        self.trial_num = trial_num
+
+    def get_ground_truth_gesture_name(self):
+        return self.gesture_name
+
+
+def test_nbest_list_sorted_ascending_by_score_and_closest_is_lowest():
+    test_trial = _StubTrial("Shake")
+    good = _StubTrial("Shake")
+    bad = _StubTrial("Wave")
+    # Lower score == closer match (the algorithms return distances).
+    result = TrialClassificationResult(test_trial, [(bad, 9.0), (good, 1.0)])
+
+    assert result.n_best_list_sorted[0][1] == 1.0
+    assert result.closest_trial is good
+    assert result.score == 1.0
+
+
+def test_is_correct_true_when_closest_matches_ground_truth():
+    test_trial = _StubTrial("Shake")
+    result = TrialClassificationResult(
+        test_trial, [(_StubTrial("Shake"), 2.0), (_StubTrial("Wave"), 5.0)])
+    assert result.is_correct is True
+
+
+def test_is_correct_false_when_closest_is_wrong_gesture():
+    test_trial = _StubTrial("Shake")
+    result = TrialClassificationResult(
+        test_trial, [(_StubTrial("Wave"), 0.5), (_StubTrial("Shake"), 5.0)])
+    assert result.is_correct is False
+
+
+def test_correct_match_index_in_nbest_list():
+    test_trial = _StubTrial("Shake")
+    # Closest is Wave (0.5); the correct Shake template is next (index 1).
+    result = TrialClassificationResult(
+        test_trial, [(_StubTrial("Wave"), 0.5), (_StubTrial("Shake"), 5.0)])
+    assert result.get_correct_match_index_nbestlist() == 1