From d1dd0b7535076e1afa5f6b138305fe60cd59a7ac Mon Sep 17 00:00:00 2001 From: QuantiusBenignus <120202899+QuantiusBenignus@users.noreply.github.com> Date: Tue, 23 Jun 2026 14:51:15 -0400 Subject: [PATCH] Fix argument flag for min speech duration in VAD Fixed the -vspd flag for vad_min_speech_duration_ms, to prevent hiding vad_min_silence_duration_ms. In usage () clarified the output timestamp units. Fixed a few typos. --- examples/vad-speech-segments/speech.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/vad-speech-segments/speech.cpp b/examples/vad-speech-segments/speech.cpp index a22425c4b8c..e929e58e960 100644 --- a/examples/vad-speech-segments/speech.cpp +++ b/examples/vad-speech-segments/speech.cpp @@ -26,6 +26,7 @@ static void vad_print_usage(int /*argc*/, char ** argv, const cli_params & param fprintf(stderr, "\n"); fprintf(stderr, "usage: %s [options] file\n", argv[0]); fprintf(stderr, "supported audio formats: flac, mp3, ogg, wav\n"); + fprintf(stderr, "Note: Output timestamps are in centiseconds (1/100th of a second).\n"); fprintf(stderr, "\n"); fprintf(stderr, "options:\n"); fprintf(stderr, " -h, --help [default] show this help message and exit\n"); @@ -64,7 +65,7 @@ static bool vad_params_parse(int argc, char ** argv, cli_params & params) { else if (arg == "-ug" || arg == "--use-gpu") { params.use_gpu = true; } else if (arg == "-vm" || arg == "--vad-model") { params.vad_model = ARGV_NEXT; } else if (arg == "-vt" || arg == "--vad-threshold") { params.vad_threshold = std::stof(ARGV_NEXT); } - else if (arg == "-vsd" || arg == "--vad-min-speech-duration-ms") { params.vad_min_speech_duration_ms = std::stoi(ARGV_NEXT); } + else if (arg == "-vspd" || arg == "--vad-min-speech-duration-ms") { params.vad_min_speech_duration_ms = std::stoi(ARGV_NEXT); } else if (arg == "-vsd" || arg == "--vad-min-silence-duration-ms") { params.vad_min_speech_duration_ms = std::stoi(ARGV_NEXT); } else if (arg == "-vmsd" || arg == "--vad-max-speech-duration-s") { params.vad_max_speech_duration_s = std::stof(ARGV_NEXT); } else if (arg == "-vp" || arg == "--vad-speech-pad-ms") { params.vad_speech_pad_ms = std::stoi(ARGV_NEXT); } @@ -122,7 +123,7 @@ int main(int argc, char ** argv) { return 3; } - // Get the the vad segements using the probabilities that have been computed + // Get the the vad segments using the probabilities that have been computed // previously and stored in the whisper_vad_context. struct whisper_vad_params params = whisper_vad_default_params(); params.threshold = cli_params.vad_threshold;