dense-analysis · yetanotherdeveloper · Jan 2, 2021 · Jan 2, 2021
diff --git a/README.md b/README.md
@@ -63,6 +63,24 @@ register a project with access to the "Cloud Speech API." See Google's
 speech-to-text demo site for more information:
 https://cloud.google.com/speech-to-text/
 
+### Deepspeech support (experimental)
+vim-speech can utilize the Mozilla [deepspeech](https://github.com/mozilla/DeepSpeech)
+
+You need to install deepspeech:
+```
+pip install deepspeech
+```
+
+And then download/train deepspeech model (and optionally the language model scorer). Then
+let this plugin know about their location e.g.
+```
+export DEEPSPEECH_MODEL=<path to my deepspeech model e.g.deepspeech-0.9.3-models.pbmm>
+export DEEPSPEECH_SCORER=<path to my deepspeech scorer e.g. deepspeech-0.9.3-models.scorer>
+```
+
+It may be helpful to finetune the pre-trained model with your own voice samples.
+More info in [documentation](https://deepspeech.readthedocs.io/)
+
 ## Usage
 
 Once you have figured out how to get everything installed, you can use the

diff --git a/autoload/vim_speech.vim b/autoload/vim_speech.vim
@@ -80,8 +80,8 @@ function! s:StartJobIfNeeded(buffer) abort
         return
     endif
 
-    if empty($GOOGLE_APPLICATION_CREDENTIALS)
-        throw 'GOOGLE_APPLICATION_CREDENTIALS is not set'
+    if empty($GOOGLE_APPLICATION_CREDENTIALS) && empty($DEEPSPEECH_MODEL)
+        throw 'Neither GOOGLE_APPLICATION_CREDENTIALS nor DEEPSPEECH_MODEL is set'
     endif
 
     let l:command = ale#Escape(g:vim_speech_dir . '/venv/bin/python')

diff --git a/plugin/speech_to_text_client.py b/plugin/speech_to_text_client.py
@@ -92,6 +92,21 @@ def stop_recording(self):
 
         return output_file.getvalue()
 
+def transcribe_file_with_deepspeech(content):
+    from deepspeech import Model
+    import numpy as np
+
+    if not content:
+        return ''
+
+    ds = Model(os.environ.get('DEEPSPEECH_MODEL'))
+    scorer = os.environ.get('DEEPSPEECH_SCORER')
+    if scorer:
+        ds.enableExternalScorer(scorer)
+    numpy_content = np.frombuffer(content, dtype=np.int16)
+    transcribe = ds.stt(numpy_content)
+    return transcribe
+
 
 def transcribe_file(content):
     from google.cloud import speech
@@ -127,11 +142,13 @@ def stdin_has_data():
 
 
 def main():
+
     # Stop early if the environment variable isn't set.
-    if not os.environ.get('GOOGLE_APPLICATION_CREDENTIALS'):
+    if not os.environ.get('GOOGLE_APPLICATION_CREDENTIALS') and not os.environ.get('DEEPSPEECH_MODEL'):
         sys.exit(
             'You must set GOOGLE_APPLICATION_CREDENTIALS'
-            ' to your JSON credentials filename.'
+            'to your JSON credentials filename or DEEPSPEECH_MODEL'
+            'to trained deepspeech model.'
         )
 
     client = RecordingClient()
@@ -156,7 +173,7 @@ def main():
                 elif message == 'stop':
                     print_and_flush('record end')
                     audio_content = client.stop_recording()
-                    print_and_flush('speech', transcribe_file(audio_content))
+                    print_and_flush('speech',transcribe_file(audio_content) if os.environ.get('GOOGLE_APPLICATION_CREDENTIALS') else transcribe_file_with_deepspeech(audio_content))
                 elif message == 'quit':
                     break