Added support for Google Cloud Text-to-Speech API in speech.voices

sveinbjornt · sveinbjornt · commit 2a1b04ee2c34 · 2023-04-04T12:03:33.000Z
diff --git a/requirements.txt b/requirements.txt
@@ -21,11 +21,13 @@ timezonefinder>=6.0.1
 rjsmin>=1.2.1
 python-youtube>=0.8.3
 country-list>=1.0.0
-# For AWS Polly text-to-speech
+# AWS Polly text-to-speech
 botocore==1.21.40
 boto3==1.18.40
-# For Azure text-to-speech.
+# Azure text-to-speech.
 azure-cognitiveservices-speech>=1.24.2
+# Google text-to-speech
+google-cloud-texttospeech>=2.14.1
 # Ours
 reynir>=3.5.3
 islenska>=0.4.6
diff --git a/scripts/gen_embla_rec.sh b/scripts/gen_embla_rec.sh
@@ -26,6 +26,10 @@ python3 speak.py -w -f "pcm" --voice "$1" -n --override "dunno07-${VOICE_LOWER}.
 # Error messages
 python3 speak.py -w -f "pcm" --voice "$1" -n --override "err-${VOICE_LOWER}.wav" "Villa kom upp í samskiptum við netþjón."
 python3 speak.py -w -f "pcm" --voice "$1" -n --override "conn-${VOICE_LOWER}.wav" "Ekki næst samband við netið."
+python3 speak.py -w -f "pcm" --voice "$1" -n --override "nomic-${VOICE_LOWER}.wav" "Mig vantar heimild til að nota hljóðnema."
 
 # My name is
 python3 speak.py -w -f "pcm" --voice "$1" -n --override "mynameis-${VOICE_LOWER}.wav" "Svona hljómar þessi rödd."
+
+# Voice speed
+python3 speak.py -w -f "pcm" --voice "$1" -n --override "voicespeed-${VOICE_LOWER}.wav" "Svona hljómar þessi hraði."
diff --git a/speech/voices/google.py b/speech/voices/google.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python
+"""
+
+    Greynir: Natural language processing for Icelandic
+
+    Copyright (C) 2023 Miðeind ehf.
+
+       This program is free software: you can redistribute it and/or modify
+       it under the terms of the GNU General Public License as published by
+       the Free Software Foundation, either version 3 of the License, or
+       (at your option) any later version.
+       This program is distributed in the hope that it will be useful,
+       but WITHOUT ANY WARRANTY; without even the implied warranty of
+       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+       GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see http://www.gnu.org/licenses/.
+
+
+    Icelandic-language text to speech via the Google Cloud API.
+
+"""
+
+from typing import Optional
+
+import logging
+import uuid
+from pathlib import Path
+
+from google.cloud import texttospeech
+
+from . import AUDIO_SCRATCH_DIR, suffix_for_audiofmt
+
+# from speech.trans import strip_markup
+
+
+NAME = "Google"
+VOICES = frozenset(("Anna",))
+AUDIO_FORMATS = frozenset(("mp3"))
+
+
+def text_to_audio_data(
+    text: str,
+    text_format: str,
+    audio_format: str,
+    voice_id: str,
+    speed: float = 1.0,
+) -> Optional[bytes]:
+    """Feeds text to Google's TTS API and returns audio data received from server."""
+
+    # Instantiates a client
+    client = texttospeech.TextToSpeechClient()
+
+    # Set the text input to be synthesized
+    synthesis_input = texttospeech.SynthesisInput(text=text)
+
+    # Build the voice request, select the language code
+    # and the SSML voice gender.
+    voice = texttospeech.VoiceSelectionParams(
+        language_code="is-IS", ssml_gender=texttospeech.SsmlVoiceGender.FEMALE
+    )
+
+    # Select the type of audio file you want returned.
+    # We only support mp3 for now.
+    audio_config = texttospeech.AudioConfig(
+        audio_encoding=texttospeech.AudioEncoding.MP3
+    )
+
+    try:
+        # Perform the text-to-speech request on the text input with the selected
+        # voice parameters and audio file type
+        response = client.synthesize_speech(
+            input=synthesis_input, voice=voice, audio_config=audio_config
+        )
+        return response.audio_content
+    except Exception as e:
+        logging.error(f"Error communicating with Google Cloud STT API: {e}")
+
+
+def text_to_audio_url(
+    text: str,
+    text_format: str,
+    audio_format: str,
+    voice_id: str,
+    speed: float = 1.0,
+) -> Optional[str]:
+    """Returns URL for speech-synthesized text."""
+
+    data = text_to_audio_data(**locals())
+    if not data:
+        return None
+
+    suffix = suffix_for_audiofmt(audio_format)
+    out_fn: str = str(AUDIO_SCRATCH_DIR / f"{uuid.uuid4()}.{suffix}")
+    try:
+        with open(out_fn, "wb") as f:
+            f.write(data)
+    except Exception as e:
+        logging.error(f"Error writing audio file {out_fn}: {e}")
+        return None
+
+    # Generate and return file:// URL to audio file
+    url = Path(out_fn).as_uri()
+    return url
diff --git a/speech/voices/tiro.py b/speech/voices/tiro.py
@@ -32,7 +32,6 @@
 
 from . import AUDIO_SCRATCH_DIR, suffix_for_audiofmt
 from speech.trans import strip_markup
-from speech.voices import generate_data_uri, mimetype_for_audiofmt
 
 NAME = "Tiro"
 VOICES = frozenset(("Alfur", "Dilja", "Bjartur", "Rosa", "Alfur_v2", "Dilja_v2"))
@@ -110,11 +109,3 @@ def text_to_audio_url(
     # Generate and return file:// URL to audio file
     url = Path(out_fn).as_uri()
     return url
-
-    # Old method returned Data URI instead of writing to file
-    # and returning a file:// URL
-    # # Generate Data URI from the bytes received
-    # mime_type = mimetype_for_audiofmt(audio_format)
-    # data_uri = generate_data_uri(data, mime_type=mime_type)
-
-    # return data_uri