Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 4 additions & 19 deletions examples/models/whisper/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,6 @@ DEFINE_string(
audio_path,
"",
"Path to input audio file. Accepts .wav or raw float .bin.");
DEFINE_string(
model_name,
"base",
"Whisper model name (base, small, medium, large, large-v2, large-v3, large-v3-turbo).");
DEFINE_double(
temperature,
0.0,
Expand Down Expand Up @@ -114,21 +110,10 @@ int main(int argc, char** argv) {
config.max_new_tokens = FLAGS_max_new_tokens;
config.temperature = static_cast<float>(FLAGS_temperature);

// Set decoder_start_token_id based on model version
if (FLAGS_model_name == "large-v2" || FLAGS_model_name == "large-v3" ||
FLAGS_model_name == "large-v3-turbo") {
config.decoder_start_token_id = 50258;
ET_LOG(
Info,
"Using decoder_start_token_id=50258 for model: %s",
FLAGS_model_name.c_str());
} else {
config.decoder_start_token_id = 50257;
ET_LOG(
Info,
"Using decoder_start_token_id=50257 for model: %s",
FLAGS_model_name.c_str());
}
// All Whisper models from HuggingFace now use the v3 tokenizer format
// where token 50257 = <|endoftext|> and token 50258 = <|startoftranscript|>
config.decoder_start_token_id = 50258;
ET_LOG(Info, "Using decoder_start_token_id=50258");

auto result =
runner.transcribe(features, config, [&](const std::string& piece) {
Expand Down
Loading