forked from nicholasjconn/python-alexa-voice-service
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathalexa_audio.py
122 lines (101 loc) · 4.8 KB
/
alexa_audio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import pyaudio
import wave
import subprocess
import speech_recognition
import time
__author__ = "NJC"
__license__ = "MIT"
class AlexaAudio:
""" This object handles all audio playback and recording required by the Alexa enabled device. Audio playback
and recording both use the PyAudio package.
"""
def __init__(self):
""" AlexaAudio initialization function.
"""
# Initialize pyaudio
self.pyaudio_instance = pyaudio.PyAudio()
def close(self):
""" Called when the AlexaAudio object is no longer needed. This closes the PyAudio instance.
"""
# Terminate the pyaudio instance
self.pyaudio_instance.terminate()
def get_audio(self, timeout=None):
""" Get audio from the microphone. The SpeechRecognition package is used to automatically stop listening
when the user stops speaking. A timeout can also be specified. If the timeout is reached, the function
returns None.
This function can also be used for debugging purposes to read an example audio file.
:param timeout: timeout in seconds, when to give up if the user did not speak.
:return: the raw binary audio string (PCM)
"""
# Create a speech recognizer
r = speech_recognition.Recognizer()
# Open the microphone (and release is when done using "with")
with speech_recognition.Microphone() as source:
if timeout is None:
# Prompt user to say something
print("You can start talking now...")
# TODO add sounds to prompt the user to do something, rather than text
# Record audio until the user stops talking
audio = r.listen(source)
else:
print("Start talking now, you have %d seconds" % timeout)
# TODO add sounds to prompt the user to do something, rather than text
try:
audio = r.listen(source, timeout=timeout)
except speech_recognition.WaitTimeoutError:
return None
# Convert audio to raw_data (PCM)
raw_audio = audio.get_raw_data()
# Rather than recording, read a pre-recorded example (for testing)
# with open('files/example_get_time.pcm', 'rb') as f:
# raw_audio = f.read()
return raw_audio
def play_mp3(self, raw_audio):
""" Play an MP3 file. Alexa uses the MP3 format for all audio responses. PyAudio does not support this, so
the MP3 file must first be converted to a wave file before playing.
This function assumes ffmpeg is located in the current working directory (ffmpeg/bin/ffmpeg).
:param raw_audio: the raw audio as a binary string
"""
# Save MP3 data to a file
with open("files/response.mp3", 'wb') as f:
f.write(raw_audio)
# Convert mp3 response to wave (pyaudio doesn't work with MP3 files)
subprocess.call(['ffmpeg/bin/ffmpeg', '-y', '-i', 'files/response.mp3', 'files/response.wav'],
stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)
# Play a wave file directly
self.play_wav('files/response.wav')
def play_wav(self, file, timeout=None, stop_event=None, repeat=False):
""" Play a wave file using PyAudio. The file must be specified as a path.
:param file: path to wave file
"""
# Open wave wave
with wave.open(file, 'rb') as wf:
# Create pyaudio stream
stream = self.pyaudio_instance.open(
format=self.pyaudio_instance.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True)
# Set chunk size for playback
chunk = 1024
# Get start time
start_time = time.mktime(time.gmtime())
end = False
while not end:
# Read first chunk of data
data = wf.readframes(chunk)
# Continue until there is no data left
while len(data) > 0 and not end:
if timeout is not None and time.mktime(time.gmtime())-start_time > timeout:
end = True
if stop_event is not None and stop_event.is_set():
end = True
stream.write(data)
data = wf.readframes(chunk)
if not repeat:
end = True
else:
wf.rewind()
# When done, stop stream and close
stream.stop_stream()
stream.close()