@@ -61,6 +61,7 @@ struct whisper_params {
61
61
float logprob_thold = -1 .00f ;
62
62
float temperature = 0 .00f ;
63
63
float temperature_inc = 0 .20f ;
64
+ float no_speech_thold = 0 .6f ;
64
65
65
66
bool debug_mode = false ;
66
67
bool translate = false ;
@@ -137,6 +138,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
137
138
fprintf (stderr, " --inference-path PATH, [%-7s] Inference path for all requests\n " , sparams.inference_path .c_str ());
138
139
fprintf (stderr, " --convert, [%-7s] Convert audio to WAV, requires ffmpeg on the server" , sparams.ffmpeg_converter ? " true" : " false" );
139
140
fprintf (stderr, " -sns, --suppress-nst [%-7s] suppress non-speech tokens\n " , params.suppress_nst ? " true" : " false" );
141
+ fprintf (stderr, " -nth N, --no-speech-thold N [%-7.2f] no speech threshold\n " , params.no_speech_thold );
140
142
fprintf (stderr, " \n " );
141
143
}
142
144
@@ -182,6 +184,8 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params, serve
182
184
else if (arg == " -ng" || arg == " --no-gpu" ) { params.use_gpu = false ; }
183
185
else if (arg == " -fa" || arg == " --flash-attn" ) { params.flash_attn = true ; }
184
186
else if (arg == " -sns" || arg == " --suppress-nst" ) { params.suppress_nst = true ; }
187
+ else if (arg == " -nth" || arg == " --no-speech-thold" ) { params.no_speech_thold = std::stof (argv[++i]); }
188
+
185
189
// server params
186
190
else if ( arg == " --port" ) { sparams.port = std::stoi (argv[++i]); }
187
191
else if ( arg == " --host" ) { sparams.hostname = argv[++i]; }
@@ -790,6 +794,7 @@ int main(int argc, char ** argv) {
790
794
wparams.beam_search .beam_size = params.beam_size ;
791
795
792
796
wparams.temperature = params.temperature ;
797
+ wparams.no_speech_thold = params.no_speech_thold ;
793
798
wparams.temperature_inc = params.temperature_inc ;
794
799
wparams.entropy_thold = params.entropy_thold ;
795
800
wparams.logprob_thold = params.logprob_thold ;
@@ -942,7 +947,7 @@ int main(int argc, char ** argv) {
942
947
943
948
// TODO compression_ratio and no_speech_prob are not implemented yet
944
949
// segment["compression_ratio"] = 0;
945
- // segment["no_speech_prob"] = 0 ;
950
+ segment[" no_speech_prob" ] = whisper_full_get_segment_no_speech_prob (ctx, i) ;
946
951
947
952
jres[" segments" ].push_back (segment);
948
953
}
0 commit comments