AudioActions: compile error

pschatzmann · pschatzmann · commit 8f8f1d819deb · 2025-11-30T15:35:34.000+01:00
diff --git a/src/AudioTools/AudioLibs/AudioFFT.h b/src/AudioTools/AudioLibs/AudioFFT.h
@@ -638,6 +638,12 @@ class AudioFFTBase : public AudioStream {
   /// Provides the actual configuration
   AudioFFTConfig &config() { return cfg; }
 
+  /// Provides the reference pointer
+  template <typename T>
+  T& reference() {
+    return *((T*)cfg.ref);
+  }
+
  protected:
   FFTDriver *p_driver = nullptr;
   int current_pos = 0;
diff --git a/src/AudioTools/CoreAudio/AudioActions.h b/src/AudioTools/CoreAudio/AudioActions.h
@@ -68,7 +68,7 @@ class AudioActions {
                touchLimit, result ? "true" : "false");
         }
       } else {
-        if () result = readPin(this->pin);
+        result = readPin(this->pin);
       }
       return result;
 #else
diff --git a/src/AudioTools/STT/EchoCanellation.h b/src/AudioTools/STT/EchoCanellation.h
diff --git a/src/AudioTools/STT/WakeWordDetector.h b/src/AudioTools/STT/WakeWordDetector.h
@@ -0,0 +1,163 @@
+namespace audio_tools {
+
+#pragma once
+
+#include <algorithm>
+#include <cmath>
+
+#include "AudioOutput.h"
+#include "Vector.h"
+
+namespace audio_tools {
+
+/*
+ * @brief Frame holding the indices of the top 3 frequencies in an FFT window.
+ *
+ * Used as a compact representation of the dominant frequency content in a frame
+ * of audio.
+ */
+template <size_t N>
+struct FrequencyFrame {
+  uint16_t top_freqs[N];  ///< Indices of top 3 frequencies in FFT
+};
+
+/**
+ * @class WakeWordDetector
+ * @brief Template-based wake word detector for microcontrollers using dominant
+ * frequency patterns.
+ *
+ * This class detects wake words by comparing the sequence of the top N dominant
+ * frequencies in each audio frame to stored templates for each wake word. When
+ * the percentage of matching frames exceeds a configurable threshold, the
+ * corresponding wake word is considered detected.
+ *
+ * @tparam N Number of dominant frequencies to track per frame (default: 3)
+ *
+ * Usage:
+ * - Record each wake word and extract the top N frequencies for each frame to
+ * build templates.
+ * - Instantiate WakeWordDetector<N> and add templates for each wake word.
+ * - Register a callback to handle detection events using setWakeWordCallback().
+ *
+ * Example:
+ * @code
+ * audio_tools::WakeWordDetector<3> detector(fft, fft_size, frame_size);
+ *detector.addTemplate(my_template_frames, 80.0f, "hello");
+ *detector.setWakeWordCallback([](const char* name) { Serial.println(name); });
+  ... (file header and includes)
+*/
+template <size_t N = 3>
+class WakeWordDetector : public AudioOutput {
+ public:
+  struct Template {
+    Vector<FrequencyFrame<N>>
+        frames;  ///< Sequence of frequency frames for the wake word
+    float threshold_percent;  ///< Minimum percent of matching frames required
+                              ///< for detection (0-100)
+    const char* name;         ///< Name/label of the wake word
+    float last_match_percent =
+        0.0f;  ///< Last computed match percent for this template
+  };
+
+  using WakeWordCallback = void (*)(const char* name);
+
+  WakeWordDetector(AudioFFTBase& fft, size_t fft_size, size_t frame_size)
+      : _fft_size(fft_size), _frame_size(frame_size), p_fft(&fft) {
+    _buffer.resize(_frame_size, 0);
+    _frame_pos = 0;
+    fft.config().ref = this;
+    fft.callback = fftResult;
+  }
+
+  void startRecording() {
+    _recent_frames.clear();
+    _is_recording = true;
+  }
+
+  Vector<FrequencyFrame<N>> stopRecording() {
+    _is_recording = false;
+    return _recent_frames;
+  }
+
+  bool isRecording() const { return _is_recording; }
+
+  void addTemplate(const Vector<FrequencyFrame<N>>& frames,
+                   float threshold_percent, const char* name) {
+    Template t;
+    t.frames = frames;
+    t.threshold_percent = threshold_percent;
+    t.name = name;
+    t.last_match_percent = 0.0f;
+    _templates.push_back(t);
+    if (frames.size() > _max_template_len) _max_template_len = frames.size();
+  }
+
+  void setWakeWordCallback(WakeWordCallback cb) { _callback = cb; }
+
+  size_t write(const void* buf, size_t count) override {
+    return p_fft->write((const uint8_t*)buf, count);
+  }
+
+  static void fftResult(AudioFFTBase& fft) {
+    // This static method must access instance data via fft.config().ref
+    auto* self = static_cast<WakeWordDetector<N>*>(fft.config().ref);
+    if (!self) return;
+    FrequencyFrame<N> frame;
+    AudioFFTResult result[N];
+    self->p_fft->resultArray(result, N);
+    for (size_t j = 0; j < N; j++) {
+      frame.top_freqs[j] = result[j].frequency;
+    }
+    self->_recent_frames.push_back(frame);
+
+    if (self->_is_recording) {
+      return;
+    }
+
+    if (self->_recent_frames.size() > self->_max_template_len)
+      self->_recent_frames.erase(self->_recent_frames.begin());
+    for (size_t i = 0; i < self->_templates.size(); ++i) {
+      Template& tmpl = self->_templates[i];
+      if (self->_recent_frames.size() >= tmpl.frames.size()) {
+        float percent = self->matchTemplate(tmpl);
+        if (percent >= tmpl.threshold_percent) {
+          if (self->_callback) self->_callback(tmpl.name);
+        }
+      }
+    }
+  }
+
+ protected:
+  Vector<Template> _templates;               ///< List of wake word templates
+  Vector<FrequencyFrame<N>> _recent_frames;  ///< Recent frames for comparison
+  Vector<int16_t> _buffer;  ///< Buffer for incoming PCM samples
+  AudioFFTBase* p_fft = nullptr;
+  bool _is_recording = false;    ///< True if currently recording a template
+  size_t _fft_size;              ///< FFT size per frame
+  size_t _frame_size;            ///< Number of PCM samples per frame
+  size_t _frame_pos;             ///< Current position in frame buffer
+  size_t _max_template_len = 0;  ///< Length of the longest template
+  WakeWordCallback _callback = nullptr;
+
+  float matchTemplate(Template& tmpl) {
+    size_t matches = 0;
+    size_t offset = _recent_frames.size() - tmpl.frames.size();
+    for (size_t i = 0; i < tmpl.frames.size(); ++i) {
+      size_t frame_matches = 0;
+      for (size_t j = 0; j < N; ++j) {
+        if (tmpl.frames[i].top_freqs[j] ==
+            _recent_frames[offset + i].top_freqs[j])
+          frame_matches++;
+      }
+      if (frame_matches >= (N >= 2 ? N - 1 : 1))  // at least N-1 out of N match
+        matches++;
+    }
+    float percent = (tmpl.frames.size() > 0)
+                        ? (100.0f * matches / tmpl.frames.size())
+                        : 0.0f;
+    tmpl.last_match_percent = percent;
+    return percent;
+  }
+};
+
+}  // namespace audio_tools

Original file line number	Diff line number	Diff line change
`@@ -68,7 +68,7 @@ class AudioActions {`
`68`	`68`	`touchLimit, result ? "true" : "false");`
`69`	`69`	`}`
`70`	`70`	`} else {`
`71`		`- if () result = readPin(this->pin);`
	`71`	`+ result = readPin(this->pin);`
`72`	`72`	`}`
`73`	`73`	`return result;`
`74`	`74`	`#else`