Skip to content

Commit 8f8f1d8

Browse files
committed
AudioActions: compile error
1 parent c237cfe commit 8f8f1d8

File tree

4 files changed

+170
-1
lines changed

4 files changed

+170
-1
lines changed

src/AudioTools/AudioLibs/AudioFFT.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -638,6 +638,12 @@ class AudioFFTBase : public AudioStream {
638638
/// Provides the actual configuration
639639
AudioFFTConfig &config() { return cfg; }
640640

641+
/// Provides the reference pointer
642+
template <typename T>
643+
T& reference() {
644+
return *((T*)cfg.ref);
645+
}
646+
641647
protected:
642648
FFTDriver *p_driver = nullptr;
643649
int current_pos = 0;

src/AudioTools/CoreAudio/AudioActions.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ class AudioActions {
6868
touchLimit, result ? "true" : "false");
6969
}
7070
} else {
71-
if () result = readPin(this->pin);
71+
result = readPin(this->pin);
7272
}
7373
return result;
7474
#else

src/AudioTools/STT/EchoCanellation.h

Whitespace-only changes.
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
namespace audio_tools {
2+
3+
#pragma once
4+
5+
#include <algorithm>
6+
#include <cmath>
7+
8+
#include "AudioOutput.h"
9+
#include "Vector.h"
10+
11+
namespace audio_tools {
12+
13+
/*
14+
* @brief Frame holding the indices of the top 3 frequencies in an FFT window.
15+
*
16+
* Used as a compact representation of the dominant frequency content in a frame
17+
* of audio.
18+
*/
19+
template <size_t N>
20+
struct FrequencyFrame {
21+
uint16_t top_freqs[N]; ///< Indices of top 3 frequencies in FFT
22+
};
23+
24+
/**
25+
* @class WakeWordDetector
26+
* @brief Template-based wake word detector for microcontrollers using dominant
27+
* frequency patterns.
28+
*
29+
* This class detects wake words by comparing the sequence of the top N dominant
30+
* frequencies in each audio frame to stored templates for each wake word. When
31+
* the percentage of matching frames exceeds a configurable threshold, the
32+
* corresponding wake word is considered detected.
33+
*
34+
* @tparam N Number of dominant frequencies to track per frame (default: 3)
35+
*
36+
* Usage:
37+
* - Record each wake word and extract the top N frequencies for each frame to
38+
* build templates.
39+
* - Instantiate WakeWordDetector<N> and add templates for each wake word.
40+
* - Register a callback to handle detection events using setWakeWordCallback().
41+
*
42+
* Example:
43+
* @code
44+
* audio_tools::WakeWordDetector<3> detector(fft, fft_size, frame_size);
45+
*detector.addTemplate(my_template_frames, 80.0f, "hello");
46+
*detector.setWakeWordCallback([](const char* name) { Serial.println(name); });
47+
... (file header and includes)
48+
*/
49+
template <size_t N = 3>
50+
class WakeWordDetector : public AudioOutput {
51+
public:
52+
struct Template {
53+
Vector<FrequencyFrame<N>>
54+
frames; ///< Sequence of frequency frames for the wake word
55+
float threshold_percent; ///< Minimum percent of matching frames required
56+
///< for detection (0-100)
57+
const char* name; ///< Name/label of the wake word
58+
float last_match_percent =
59+
0.0f; ///< Last computed match percent for this template
60+
};
61+
62+
using WakeWordCallback = void (*)(const char* name);
63+
64+
WakeWordDetector(AudioFFTBase& fft, size_t fft_size, size_t frame_size)
65+
: _fft_size(fft_size), _frame_size(frame_size), p_fft(&fft) {
66+
_buffer.resize(_frame_size, 0);
67+
_frame_pos = 0;
68+
fft.config().ref = this;
69+
fft.callback = fftResult;
70+
}
71+
72+
void startRecording() {
73+
_recent_frames.clear();
74+
_is_recording = true;
75+
}
76+
77+
Vector<FrequencyFrame<N>> stopRecording() {
78+
_is_recording = false;
79+
return _recent_frames;
80+
}
81+
82+
bool isRecording() const { return _is_recording; }
83+
84+
void addTemplate(const Vector<FrequencyFrame<N>>& frames,
85+
float threshold_percent, const char* name) {
86+
Template t;
87+
t.frames = frames;
88+
t.threshold_percent = threshold_percent;
89+
t.name = name;
90+
t.last_match_percent = 0.0f;
91+
_templates.push_back(t);
92+
if (frames.size() > _max_template_len) _max_template_len = frames.size();
93+
}
94+
95+
void setWakeWordCallback(WakeWordCallback cb) { _callback = cb; }
96+
97+
size_t write(const void* buf, size_t count) override {
98+
return p_fft->write((const uint8_t*)buf, count);
99+
}
100+
101+
static void fftResult(AudioFFTBase& fft) {
102+
// This static method must access instance data via fft.config().ref
103+
auto* self = static_cast<WakeWordDetector<N>*>(fft.config().ref);
104+
if (!self) return;
105+
FrequencyFrame<N> frame;
106+
AudioFFTResult result[N];
107+
self->p_fft->resultArray(result, N);
108+
for (size_t j = 0; j < N; j++) {
109+
frame.top_freqs[j] = result[j].frequency;
110+
}
111+
self->_recent_frames.push_back(frame);
112+
113+
if (self->_is_recording) {
114+
return;
115+
}
116+
117+
if (self->_recent_frames.size() > self->_max_template_len)
118+
self->_recent_frames.erase(self->_recent_frames.begin());
119+
for (size_t i = 0; i < self->_templates.size(); ++i) {
120+
Template& tmpl = self->_templates[i];
121+
if (self->_recent_frames.size() >= tmpl.frames.size()) {
122+
float percent = self->matchTemplate(tmpl);
123+
if (percent >= tmpl.threshold_percent) {
124+
if (self->_callback) self->_callback(tmpl.name);
125+
}
126+
}
127+
}
128+
}
129+
130+
protected:
131+
Vector<Template> _templates; ///< List of wake word templates
132+
Vector<FrequencyFrame<N>> _recent_frames; ///< Recent frames for comparison
133+
Vector<int16_t> _buffer; ///< Buffer for incoming PCM samples
134+
AudioFFTBase* p_fft = nullptr;
135+
bool _is_recording = false; ///< True if currently recording a template
136+
size_t _fft_size; ///< FFT size per frame
137+
size_t _frame_size; ///< Number of PCM samples per frame
138+
size_t _frame_pos; ///< Current position in frame buffer
139+
size_t _max_template_len = 0; ///< Length of the longest template
140+
WakeWordCallback _callback = nullptr;
141+
142+
float matchTemplate(Template& tmpl) {
143+
size_t matches = 0;
144+
size_t offset = _recent_frames.size() - tmpl.frames.size();
145+
for (size_t i = 0; i < tmpl.frames.size(); ++i) {
146+
size_t frame_matches = 0;
147+
for (size_t j = 0; j < N; ++j) {
148+
if (tmpl.frames[i].top_freqs[j] ==
149+
_recent_frames[offset + i].top_freqs[j])
150+
frame_matches++;
151+
}
152+
if (frame_matches >= (N >= 2 ? N - 1 : 1)) // at least N-1 out of N match
153+
matches++;
154+
}
155+
float percent = (tmpl.frames.size() > 0)
156+
? (100.0f * matches / tmpl.frames.size())
157+
: 0.0f;
158+
tmpl.last_match_percent = percent;
159+
return percent;
160+
}
161+
};
162+
163+
} // namespace audio_tools

0 commit comments

Comments
 (0)