@@ -19,8 +19,10 @@ Arguments:
1919 hf_model HuggingFace model ID (required)
2020 Supported models:
2121 - mistralai/Voxtral-Mini-3B-2507
22+ - nvidia/diar_streaming_sortformer_4spk-v2
2223 - openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo})
2324 - google/gemma-3-4b-it
25+ - Qwen/Qwen3-0.6B
2426 - nvidia/parakeet-tdt
2527 - mistralai/Voxtral-Mini-4B-Realtime-2602
2628
@@ -43,6 +45,7 @@ Arguments:
4345Examples:
4446 test_model_e2e.sh metal "openai/whisper-small" "non-quantized"
4547 test_model_e2e.sh cuda "mistralai/Voxtral-Mini-3B-2507" "quantized-int4-tile-packed" "./model_output"
48+ test_model_e2e.sh cuda "nvidia/diar_streaming_sortformer_4spk-v2" "non-quantized" "./model_output"
4649 test_model_e2e.sh cuda "nvidia/parakeet-tdt" "non-quantized" "./model_output"
4750 test_model_e2e.sh xnnpack "nvidia/parakeet-tdt" "quantized-8da4w" "./model_output"
4851 test_model_e2e.sh metal "mistralai/Voxtral-Mini-4B-Realtime-2602" "non-quantized" "." "vr-streaming"
@@ -151,6 +154,18 @@ case "$HF_MODEL" in
151154 AUDIO_FILE=" "
152155 IMAGE_PATH=" docs/source/_static/img/et-logo.png"
153156 ;;
157+ Qwen/Qwen3-0.6B)
158+ MODEL_NAME=" qwen3"
159+ RUNNER_TARGET=" llama_main"
160+ RUNNER_PATH=" llama"
161+ EXPECTED_OUTPUT=" Paris"
162+ PREPROCESSOR=" "
163+ TOKENIZER_URL=" https://huggingface.co/Qwen/Qwen3-0.6B/resolve/main" # @lint-ignore
164+ TOKENIZER_FILE=" "
165+ AUDIO_URL=" "
166+ AUDIO_FILE=" "
167+ IMAGE_PATH=" "
168+ ;;
154169 nvidia/parakeet-tdt)
155170 MODEL_NAME=" parakeet"
156171 RUNNER_TARGET=" parakeet_runner"
@@ -163,6 +178,18 @@ case "$HF_MODEL" in
163178 AUDIO_FILE=" test_audio.wav"
164179 IMAGE_PATH=" "
165180 ;;
181+ nvidia/diar_streaming_sortformer_4spk-v2)
182+ MODEL_NAME=" sortformer"
183+ RUNNER_TARGET=" sortformer_runner"
184+ RUNNER_PATH=" sortformer"
185+ EXPECTED_OUTPUT=" Speaker 1"
186+ PREPROCESSOR=" "
187+ TOKENIZER_URL=" "
188+ TOKENIZER_FILE=" "
189+ AUDIO_URL=" https://github.com/voxserv/audio_quality_testing_samples/raw/refs/heads/master/testaudio/16000/test01_20s.wav"
190+ AUDIO_FILE=" poem.wav"
191+ IMAGE_PATH=" "
192+ ;;
166193 mistralai/Voxtral-Mini-4B-Realtime-2602)
167194 MODEL_NAME=" voxtral_realtime"
168195 RUNNER_TARGET=" voxtral_realtime_runner"
@@ -177,7 +204,7 @@ case "$HF_MODEL" in
177204 ;;
178205 * )
179206 echo " Error: Unsupported model '$HF_MODEL '"
180- echo " Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}), google/gemma-3-4b-it, nvidia/parakeet-tdt"
207+ echo " Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, nvidia/diar_streaming_sortformer_4spk-v2, openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}), google/gemma-3-4b-it, Qwen/Qwen3-0.6B , nvidia/parakeet-tdt"
181208 exit 1
182209 ;;
183210esac
@@ -190,8 +217,8 @@ echo "::endgroup::"
190217echo " ::group::Prepare $MODEL_NAME Artifacts"
191218
192219
193- # Download tokenizer files (skip for parakeet and voxtral_realtime which bundle tokenizer in export)
194- if [ " $MODEL_NAME " != " parakeet" ] && [ " $MODEL_NAME " != " voxtral_realtime" ]; then
220+ # Download tokenizer files (skip for models that bundle tokenizer in export or do not use one )
221+ if [ " $MODEL_NAME " != " parakeet" ] && [ " $MODEL_NAME " != " voxtral_realtime" ] && [ " $MODEL_NAME " != " sortformer " ] ; then
195222 if [ " $TOKENIZER_FILE " != " " ]; then
196223 curl -L $TOKENIZER_URL /$TOKENIZER_FILE -o $MODEL_DIR /$TOKENIZER_FILE
197224 else
@@ -246,9 +273,14 @@ if [ "$(uname -s)" = "Darwin" ] && [ -f "$RUNNER_BIN" ]; then
246273 install_name_tool -change /opt/llvm-openmp/lib/libomp.dylib @rpath/libomp.dylib " $RUNNER_BIN "
247274 fi
248275fi
249- # For CUDA, add data_path argument (Metal embeds data in .pte)
276+ # For CUDA, add named data argument (Metal embeds data in .pte).
277+ # Llama runner uses --data_paths, other runners use --data_path.
250278if [ " $DEVICE " = " cuda" ]; then
251- RUNNER_ARGS=" $RUNNER_ARGS --data_path ${MODEL_DIR} /aoti_cuda_blob.ptd"
279+ if [ " $RUNNER_PATH " = " llama" ]; then
280+ RUNNER_ARGS=" $RUNNER_ARGS --data_paths ${MODEL_DIR} /aoti_cuda_blob.ptd"
281+ else
282+ RUNNER_ARGS=" $RUNNER_ARGS --data_path ${MODEL_DIR} /aoti_cuda_blob.ptd"
283+ fi
252284fi
253285
254286# Add model-specific arguments
@@ -262,15 +294,34 @@ case "$MODEL_NAME" in
262294 gemma3)
263295 RUNNER_ARGS=" $RUNNER_ARGS --tokenizer_path ${MODEL_DIR} / --image_path $IMAGE_PATH "
264296 ;;
297+ qwen3)
298+ PROMPT_FILE=" ${MODEL_DIR} /qwen3_prompt.txt"
299+ cat > " ${PROMPT_FILE} " << 'EOF '
300+ <|im_start|>user
301+ What is the capital of France?<|im_end|>
302+ <|im_start|>assistant
303+ EOF
304+ RUNNER_ARGS=" $RUNNER_ARGS --tokenizer_path ${MODEL_DIR} / --prompt_file ${PROMPT_FILE} "
305+ ;;
265306 parakeet)
266307 RUNNER_ARGS=" --model_path ${MODEL_DIR} /model.pte --audio_path ${MODEL_DIR} /$AUDIO_FILE --tokenizer_path ${MODEL_DIR} /$TOKENIZER_FILE "
267308 # For CUDA, add data_path argument (Metal embeds data in .pte)
268309 if [ " $DEVICE " = " cuda" ]; then
269310 RUNNER_ARGS=" $RUNNER_ARGS --data_path ${MODEL_DIR} /aoti_cuda_blob.ptd"
270311 fi
271312 ;;
313+ sortformer)
314+ RUNNER_ARGS=" --model_path ${MODEL_DIR} /model.pte --audio_path ${MODEL_DIR} /$AUDIO_FILE "
315+ if [ " $DEVICE " = " cuda" ]; then
316+ RUNNER_ARGS=" $RUNNER_ARGS --data_path ${MODEL_DIR} /aoti_cuda_blob.ptd"
317+ fi
318+ ;;
272319 voxtral_realtime)
273320 RUNNER_ARGS=" --model_path ${MODEL_DIR} /model.pte --tokenizer_path ${MODEL_DIR} /$TOKENIZER_FILE --preprocessor_path ${MODEL_DIR} /$PREPROCESSOR --audio_path ${MODEL_DIR} /$AUDIO_FILE --temperature 0"
321+ # Add CUDA data path if present
322+ if [ " $DEVICE " = " cuda" ] && [ -f " ${MODEL_DIR} /aoti_cuda_blob.ptd" ]; then
323+ RUNNER_ARGS=" $RUNNER_ARGS --data_path ${MODEL_DIR} /aoti_cuda_blob.ptd"
324+ fi
274325 # Determine streaming mode based on MODE parameter
275326 USE_STREAMING=" true"
276327 if [ " $MODE " = " vr-offline" ]; then
0 commit comments