-
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcli.py
140 lines (128 loc) · 6.98 KB
/
cli.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#
# This file is part of Tacspeak.
# (c) Copyright 2023-2024 by Joshua Webb
# Licensed under the AGPL-3.0; see LICENSE.txt file.
#
import argparse
import os
from kaldi_active_grammar import Compiler, disable_donation_message
import tacspeak
from tacspeak.__main__ import main as tacspeak_main
from tacspeak.test_model import test_model, test_model_dictation, transcribe_wav, transcribe_wav_dictation
from dragonfly import get_engine
import logging
from multiprocessing import freeze_support
def main():
print(f"Tacspeak version {tacspeak.__version__}")
print_notices()
disable_donation_message()
parser = argparse.ArgumentParser(description='Start speech recognition.')
parser.add_argument('--recompile_model', dest='model_dir', action='store',
metavar='model_dir', nargs='?', const='kaldi_model/',
help='recompile the model in `model_dir` (default is kaldi_model/), for changes to user_lexicon.txt')
parser.add_argument('--print_mic_list', action='store_true',
help=('see a list of available input devices and their corresponding indexes and names.'
+ ' useful for setting `audio_input_device` in ./tacspeak/user_settings.py'))
parser.add_argument('--test_model', dest='test_model', action='store',
metavar=('tsv_file', 'model_dir', 'lexicon_file', 'num_threads'), nargs=4,
help=('test model + active grammar recognition using test audio specified in .tsv file.'
+ " Example: --test_model './retain/retain.tsv' './kaldi_model/' './kaldi_model/lexicon.txt' 4"))
parser.add_argument('--test_dictation', action='store_true',
help=('only used together with --test_model. tests model using raw dictation graph, irrespective of grammar modules.'
+ " Example: --test_model './retain/retain.tsv' './kaldi_model/' './kaldi_model/lexicon.txt' 4 --test_dictation"))
parser.add_argument('--transcribe_wav', dest='transcribe_wav', action='store',
metavar=('wav_path', 'out_txt_path', 'model_dir'), nargs=3,
help=('transcribe a wav file using active grammar modules, output to txt file.'
+ " Example: --transcribe_wav 'audio.wav' 'audio.txt' './kaldi_model/'"))
parser.add_argument('--transcribe_dictation', action='store_true',
help=('only used together with --transcribe_wav. transcribes using raw dictation graph, irrespective of grammar modules.'
+ " Example: --transcribe_wav 'audio.wav' 'audio.txt' './kaldi_model/' --transcribe_dictation"))
args = parser.parse_args()
if args.model_dir is not None and os.path.isdir(args.model_dir):
_log = logging.getLogger('kaldi')
logging.basicConfig(level=5)
compiler = Compiler(args.model_dir)
print("Compiling dictation graph (approx. 30 minutes)...")
compiler.compile_agf_dictation_fst()
return
if args.print_mic_list:
get_engine('kaldi').print_mic_list()
input("Press enter key to exit.")
return
if args.test_model:
if args.test_model[0] is not None and os.path.isfile(args.test_model[0]) and args.test_model[1] is not None and os.path.isdir(args.test_model[1]):
tsv_file = args.test_model[0]
model_dir = args.test_model[1]
try:
lexicon_file = args.test_model[2]
if not os.path.isfile(lexicon_file):
lexicon_file = None
except Exception as e:
print(f"{e}")
lexicon_file = None
try:
num_threads = int(args.test_model[3])
if not isinstance(num_threads, int) or num_threads < 1:
num_threads = 1
except Exception as e:
print(f"{e}")
num_threads = 1
print(f"{tsv_file},{model_dir},{lexicon_file},{num_threads}")
if args.test_dictation:
calculator, cmd_overall_stats = test_model_dictation(tsv_file, model_dir, lexicon_file, num_threads)
outfile_path = 'test_model_output_dictation_tokens.txt'
else:
calculator, cmd_overall_stats = test_model(tsv_file, model_dir, lexicon_file, num_threads)
outfile_path = 'test_model_output_tokens.txt'
with open(outfile_path, 'w', encoding='utf-8') as outfile:
outfile.write(f"\n{calculator.overall_string()}\n")
for item in calculator.data.items():
outfile.write(f"\n{str(item)}")
outfile.write("\n")
for entry in calculator.ranked_worst_to_best_list():
outfile.write(f"\n{str(entry)}")
overall_entry_1 = (model_dir, tsv_file, "Dictation" if args.test_dictation else "Command", "WER", calculator.overall_string())
overall_entry_2 = (model_dir, tsv_file, "Dictation" if args.test_dictation else "Command", "CMDERR", cmd_overall_stats)
with open('test_model_output_overall.txt', 'a', encoding='utf-8') as outfile:
outfile.write(f"{overall_entry_1}\n")
if not args.test_dictation:
outfile.write(f"{overall_entry_2}\n")
return calculator.overall_string(), cmd_overall_stats
return
if args.transcribe_wav:
if args.transcribe_wav[0] is not None and os.path.isfile(args.transcribe_wav[0]):
wav_path = args.transcribe_wav[0]
try:
out_txt_path = args.transcribe_wav[1]
except Exception:
out_txt_path = None
try:
model_dir = args.transcribe_wav[2]
except Exception:
model_dir = "./kaldi_model/"
if args.transcribe_dictation:
entry = transcribe_wav_dictation(wav_path, out_txt_path, model_dir)
else:
entry = transcribe_wav(wav_path, out_txt_path, model_dir)
print(f"{entry}")
return
tacspeak_main()
def print_notices():
text = """
Tacspeak - speech recognition for gaming
© Copyright 2023-2024 by Joshua Webb
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
"""
print(text)
if __name__ == "__main__":
freeze_support()
main()