Skip to content

Commit e5a0806

Browse files
authored
Merge pull request #2515 from makermelissa/main
Magic Storybook: Added stripped out stuff from listener back in
2 parents 17ffff6 + b77743b commit e5a0806

File tree

2 files changed

+68
-19
lines changed

2 files changed

+68
-19
lines changed

Magic_AI_Storybook/listener.py

Lines changed: 59 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,62 +2,109 @@
22
#
33
# SPDX-License-Identifier: MIT
44

5+
from queue import Queue
56
import time
67

78
import speech_recognition as sr
89

910

1011
class Listener:
11-
def __init__(self, api_key, energy_threshold=300, record_timeout=30):
12+
def __init__(
13+
self, api_key, energy_threshold=300, phrase_timeout=3.0, record_timeout=30
14+
):
1215
self.listener_handle = None
1316
self.microphone = sr.Microphone()
1417
self.recognizer = sr.Recognizer()
1518
self.recognizer.energy_threshold = energy_threshold
19+
self.recognizer.dynamic_energy_threshold = False
20+
self.recognizer.pause_threshold = 1
21+
self.last_sample = bytes()
22+
self.phrase_time = time.monotonic()
23+
self.phrase_timeout = phrase_timeout
1624
with self.microphone as source:
1725
self.recognizer.adjust_for_ambient_noise(
1826
source
1927
) # we only need to calibrate once, before we start listening
2028
self.record_timeout = record_timeout
29+
self.phrase_complete = False
30+
self.data_queue = Queue()
2131
self.listener_handle = None
22-
self.audio = None
2332
self.api_key = api_key
2433

2534
def listen(self, ready_callback=None):
35+
print("Start listening...")
36+
self.phrase_complete = False
37+
start = time.monotonic()
2638
self._start_listening()
2739
if ready_callback:
2840
ready_callback()
29-
while self.listener_handle and self.audio is None:
30-
time.sleep(0.1)
41+
while (
42+
self.listener_handle and not self.speech_waiting()
43+
) or not self.phrase_complete:
44+
if self.phrase_time and time.monotonic() > start + self.phrase_timeout:
45+
self.last_sample = bytes()
46+
self.phrase_complete = True
47+
self.phrase_time = time.monotonic() - start
3148
self.stop_listening()
3249

33-
def _save_audio_callback(self, _recognizer, audio):
34-
self.audio = audio
50+
def _save_audio_callback(self, _, audio):
51+
print("Saving audio")
52+
data = audio.get_raw_data()
53+
self.data_queue.put(data)
54+
55+
def _get_audio(self):
56+
"""Concatenate and convert the queued raw data back to audio and return it"""
57+
start = time.monotonic()
58+
if self.speech_waiting():
59+
self.phrase_complete = False
60+
if self.phrase_time and time.monotonic() > start + self.phrase_timeout:
61+
self.last_sample = bytes()
62+
self.phrase_complete = True
63+
self.phrase_time = time.monotonic() - start
64+
65+
# Concatenate our current audio data with the latest audio data.
66+
while self.speech_waiting():
67+
data = self.data_queue.get()
68+
self.last_sample += data
69+
70+
# Use AudioData to convert the raw data to wav data.
71+
return sr.AudioData(
72+
self.last_sample,
73+
self.microphone.SAMPLE_RATE,
74+
self.microphone.SAMPLE_WIDTH,
75+
)
76+
return None
3577

3678
def _start_listening(self):
37-
self.listener_handle = self.recognizer.listen_in_background(
38-
self.microphone, self._save_audio_callback
39-
)
79+
if not self.listener_handle:
80+
self.listener_handle = self.recognizer.listen_in_background(
81+
self.microphone,
82+
self._save_audio_callback,
83+
phrase_time_limit=self.record_timeout,
84+
)
4085

4186
def stop_listening(self, wait_for_stop=False):
4287
if self.listener_handle:
4388
self.listener_handle(wait_for_stop=wait_for_stop)
4489
self.listener_handle = None
90+
print("Stop listening...")
4591

4692
def is_listening(self):
4793
return self.listener_handle is not None
4894

4995
def speech_waiting(self):
50-
return self.audio is not None
96+
return not self.data_queue.empty()
5197

5298
def recognize(self):
53-
if self.audio:
99+
audio = self._get_audio()
100+
if audio:
54101
# Transcribe the audio data to text using Whisper
55102
print("Recognizing...")
56103
attempts = 0
57104
while attempts < 3:
58105
try:
59106
result = self.recognizer.recognize_whisper_api(
60-
self.audio, api_key=self.api_key
107+
audio, api_key=self.api_key
61108
)
62109

63110
return result.strip()

Magic_AI_Storybook/story.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@
7575
TITLE_FADE_STEPS = 25
7676
TEXT_FADE_TIME = 0.25
7777
TEXT_FADE_STEPS = 51
78-
ALSA_ERROR_DELAY = 1.0 # Delay to wait after an ALSA errors
78+
ALSA_ERROR_DELAY = 0.5 # Delay to wait after an ALSA errors
7979

8080
# Whitespace Settings (in Pixels)
8181
PAGE_TOP_MARGIN = 20
@@ -92,6 +92,7 @@
9292

9393
# Speech Recognition Parameters
9494
ENERGY_THRESHOLD = 300 # Energy level for mic to detect
95+
PHRASE_TIMEOUT = 1.0 # Space between recordings for separating phrases
9596
RECORD_TIMEOUT = 30 # Maximum time in seconds to wait for speech
9697

9798
# Do some checks and Import API keys from API_KEYS_FILE
@@ -248,7 +249,9 @@ def start(self):
248249
self._prompt = f.read()
249250

250251
# Initialize the Listener
251-
self.listener = Listener(openai.api_key, ENERGY_THRESHOLD, RECORD_TIMEOUT)
252+
self.listener = Listener(
253+
openai.api_key, ENERGY_THRESHOLD, PHRASE_TIMEOUT, RECORD_TIMEOUT
254+
)
252255

253256
# Preload remaining images
254257
self._load_image("background", BACKGROUND_IMAGE)
@@ -636,28 +639,28 @@ def generate_new_story(self):
636639
if self._sleep_request:
637640
self._busy = False
638641
time.sleep(0.2)
639-
print("Not busy anymore")
640642
return
641643

642-
def show_waiting():
644+
def show_listening():
643645
# Pause for a beat because the listener doesn't
644646
# immediately start listening sometimes
645647
time.sleep(ALSA_ERROR_DELAY)
646648
self.pixels.fill(NEOPIXEL_WAITING_COLOR)
647649
self.pixels.show()
648650

649-
self.listener.listen(ready_callback=show_waiting)
651+
self.listener.listen(ready_callback=show_listening)
650652

651653
if self._sleep_request:
652654
self._busy = False
653655
return
654656

655657
if not self.listener.speech_waiting():
656658
# No response from user, so return
659+
print("No response from user.")
657660
return
658661

659662
story_request = self.listener.recognize()
660-
663+
print(f"Whisper heard: {story_request}")
661664
story_prompt = self._make_story_prompt(story_request)
662665
self.display_loading()
663666
response = self._sendchat(story_prompt)
@@ -680,7 +683,6 @@ def _sleep(self):
680683
if self.listener.is_listening():
681684
self.listener.stop_listening()
682685
while self._busy:
683-
print("Still busy")
684686
time.sleep(0.1)
685687
self._sleep_request = False
686688

0 commit comments

Comments
 (0)