Skip to content

Commit 2a1b04e

Browse files
committed
Added support for Google Cloud Text-to-Speech API in speech.voices
1 parent b9c4b0d commit 2a1b04e

File tree

4 files changed

+113
-11
lines changed

4 files changed

+113
-11
lines changed

requirements.txt

+4-2
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,13 @@ timezonefinder>=6.0.1
2121
rjsmin>=1.2.1
2222
python-youtube>=0.8.3
2323
country-list>=1.0.0
24-
# For AWS Polly text-to-speech
24+
# AWS Polly text-to-speech
2525
botocore==1.21.40
2626
boto3==1.18.40
27-
# For Azure text-to-speech.
27+
# Azure text-to-speech.
2828
azure-cognitiveservices-speech>=1.24.2
29+
# Google text-to-speech
30+
google-cloud-texttospeech>=2.14.1
2931
# Ours
3032
reynir>=3.5.3
3133
islenska>=0.4.6

scripts/gen_embla_rec.sh

+4
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ python3 speak.py -w -f "pcm" --voice "$1" -n --override "dunno07-${VOICE_LOWER}.
2626
# Error messages
2727
python3 speak.py -w -f "pcm" --voice "$1" -n --override "err-${VOICE_LOWER}.wav" "Villa kom upp í samskiptum við netþjón."
2828
python3 speak.py -w -f "pcm" --voice "$1" -n --override "conn-${VOICE_LOWER}.wav" "Ekki næst samband við netið."
29+
python3 speak.py -w -f "pcm" --voice "$1" -n --override "nomic-${VOICE_LOWER}.wav" "Mig vantar heimild til að nota hljóðnema."
2930

3031
# My name is
3132
python3 speak.py -w -f "pcm" --voice "$1" -n --override "mynameis-${VOICE_LOWER}.wav" "Svona hljómar þessi rödd."
33+
34+
# Voice speed
35+
python3 speak.py -w -f "pcm" --voice "$1" -n --override "voicespeed-${VOICE_LOWER}.wav" "Svona hljómar þessi hraði."

speech/voices/google.py

+105
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
#!/usr/bin/env python
2+
"""
3+
4+
Greynir: Natural language processing for Icelandic
5+
6+
Copyright (C) 2023 Miðeind ehf.
7+
8+
This program is free software: you can redistribute it and/or modify
9+
it under the terms of the GNU General Public License as published by
10+
the Free Software Foundation, either version 3 of the License, or
11+
(at your option) any later version.
12+
This program is distributed in the hope that it will be useful,
13+
but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
GNU General Public License for more details.
16+
17+
You should have received a copy of the GNU General Public License
18+
along with this program. If not, see http://www.gnu.org/licenses/.
19+
20+
21+
Icelandic-language text to speech via the Google Cloud API.
22+
23+
"""
24+
25+
from typing import Optional
26+
27+
import logging
28+
import uuid
29+
from pathlib import Path
30+
31+
from google.cloud import texttospeech
32+
33+
from . import AUDIO_SCRATCH_DIR, suffix_for_audiofmt
34+
35+
# from speech.trans import strip_markup
36+
37+
38+
NAME = "Google"
39+
VOICES = frozenset(("Anna",))
40+
AUDIO_FORMATS = frozenset(("mp3"))
41+
42+
43+
def text_to_audio_data(
44+
text: str,
45+
text_format: str,
46+
audio_format: str,
47+
voice_id: str,
48+
speed: float = 1.0,
49+
) -> Optional[bytes]:
50+
"""Feeds text to Google's TTS API and returns audio data received from server."""
51+
52+
# Instantiates a client
53+
client = texttospeech.TextToSpeechClient()
54+
55+
# Set the text input to be synthesized
56+
synthesis_input = texttospeech.SynthesisInput(text=text)
57+
58+
# Build the voice request, select the language code
59+
# and the SSML voice gender.
60+
voice = texttospeech.VoiceSelectionParams(
61+
language_code="is-IS", ssml_gender=texttospeech.SsmlVoiceGender.FEMALE
62+
)
63+
64+
# Select the type of audio file you want returned.
65+
# We only support mp3 for now.
66+
audio_config = texttospeech.AudioConfig(
67+
audio_encoding=texttospeech.AudioEncoding.MP3
68+
)
69+
70+
try:
71+
# Perform the text-to-speech request on the text input with the selected
72+
# voice parameters and audio file type
73+
response = client.synthesize_speech(
74+
input=synthesis_input, voice=voice, audio_config=audio_config
75+
)
76+
return response.audio_content
77+
except Exception as e:
78+
logging.error(f"Error communicating with Google Cloud STT API: {e}")
79+
80+
81+
def text_to_audio_url(
82+
text: str,
83+
text_format: str,
84+
audio_format: str,
85+
voice_id: str,
86+
speed: float = 1.0,
87+
) -> Optional[str]:
88+
"""Returns URL for speech-synthesized text."""
89+
90+
data = text_to_audio_data(**locals())
91+
if not data:
92+
return None
93+
94+
suffix = suffix_for_audiofmt(audio_format)
95+
out_fn: str = str(AUDIO_SCRATCH_DIR / f"{uuid.uuid4()}.{suffix}")
96+
try:
97+
with open(out_fn, "wb") as f:
98+
f.write(data)
99+
except Exception as e:
100+
logging.error(f"Error writing audio file {out_fn}: {e}")
101+
return None
102+
103+
# Generate and return file:// URL to audio file
104+
url = Path(out_fn).as_uri()
105+
return url

speech/voices/tiro.py

-9
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232

3333
from . import AUDIO_SCRATCH_DIR, suffix_for_audiofmt
3434
from speech.trans import strip_markup
35-
from speech.voices import generate_data_uri, mimetype_for_audiofmt
3635

3736
NAME = "Tiro"
3837
VOICES = frozenset(("Alfur", "Dilja", "Bjartur", "Rosa", "Alfur_v2", "Dilja_v2"))
@@ -110,11 +109,3 @@ def text_to_audio_url(
110109
# Generate and return file:// URL to audio file
111110
url = Path(out_fn).as_uri()
112111
return url
113-
114-
# Old method returned Data URI instead of writing to file
115-
# and returning a file:// URL
116-
# # Generate Data URI from the bytes received
117-
# mime_type = mimetype_for_audiofmt(audio_format)
118-
# data_uri = generate_data_uri(data, mime_type=mime_type)
119-
120-
# return data_uri

0 commit comments

Comments
 (0)