Skip to content

Commit d49ac66

Browse files
update spark qa/ci tests
Signed-off-by: Pamela <179191831+pamelap-nvidia@users.noreply.github.com>
1 parent 93feb57 commit d49ac66

3 files changed

Lines changed: 18 additions & 46 deletions

File tree

tests/integration/test_lists/qa/llm_spark_func.yml

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,6 @@ llm_spark_func:
99
gte: 1
1010
lte: 1
1111
tests:
12-
- test_e2e.py::test_ptp_quickstart_advanced[GPT-OSS-20B-gpt_oss/gpt-oss-20b]
13-
- test_e2e.py::test_ptp_quickstart_advanced[GPT-OSS-120B-gpt_oss/gpt-oss-120b]
14-
- test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-bf16-instruct-llama-3.1-model/Llama-3.1-8B-Instruct]
15-
- test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-FP8-llama-3.1-model/Llama-3.1-8B-Instruct-FP8]
16-
- test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-FP4-modelopt-hf-model-hub/Llama-3.1-8B-Instruct-fp4]
1712
- test_e2e.py::test_ptp_quickstart_advanced[Qwen3-8b-fp8-Qwen3/nvidia-Qwen3-8B-FP8]
1813
- test_e2e.py::test_ptp_quickstart_advanced[Qwen3-8b-nvfp4-Qwen3/nvidia-Qwen3-8B-NVFP4]
1914
- test_e2e.py::test_ptp_quickstart_advanced[Qwen3-8B-bf16-Qwen3/Qwen3-8B]
@@ -22,38 +17,14 @@ llm_spark_func:
2217
- test_e2e.py::test_ptp_quickstart_advanced[Qwen3-14B-bf16-Qwen3/Qwen3-14B]
2318
- test_e2e.py::test_ptp_quickstart_advanced[Qwen3-32B-bf16-Qwen3/Qwen3-32B]
2419
- test_e2e.py::test_ptp_quickstart_advanced[Qwen3-32b-nvfp4-Qwen3/nvidia-Qwen3-32B-NVFP4]
25-
- test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B-Qwen3/Qwen3-30B-A3B]
26-
- test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B_nvfp4_hf-Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf]
27-
- test_e2e.py::test_ptp_quickstart_advanced[Phi4-Reasoning-Plus-fp8-nvidia-Phi-4-reasoning-plus-FP8]
28-
- test_e2e.py::test_ptp_quickstart_advanced[Phi4-Reasoning-Plus-nvfp4-nvidia-Phi-4-reasoning-plus-NVFP4]
29-
- test_e2e.py::test_ptp_quickstart_advanced[Phi-4-reasoning-plus-bf16-Phi-4-reasoning-plus]
3020
- test_e2e.py::test_ptp_quickstart_advanced[Llama3.3-70B-FP8-modelopt-hf-model-hub/Llama-3.3-70B-Instruct-fp8]
3121
- test_e2e.py::test_ptp_quickstart_advanced[Llama3.3-70B-FP4-modelopt-hf-model-hub/Llama-3.3-70B-Instruct-fp4]
3222
- test_e2e.py::test_ptp_quickstart_advanced[Llama-4-Scout-17B-16E-FP4-llama4-models/Llama-4-Scout-17B-16E-Instruct-FP4]
3323
- test_e2e.py::test_ptp_quickstart_advanced[DeepSeek-R1-Distill-Qwen-32B-DeepSeek-R1/DeepSeek-R1-Distill-Qwen-32B]
3424
- test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Nano-9B-v2-nvfp4-NVIDIA-Nemotron-Nano-9B-v2-NVFP4]
3525
- test_e2e.py::test_ptp_quickstart_advanced_eagle3[GPT-OSS-120B-Eagle3-gpt_oss/gpt-oss-120b-gpt_oss/gpt-oss-120b-Eagle3]
36-
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_auto_dtype
37-
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_nvfp4
38-
- accuracy/test_llm_api_pytorch_multimodal.py::TestQwen2_5_VL_7B::test_auto_dtype
39-
- accuracy/test_llm_api_pytorch_multimodal.py::TestQwen2_5_VL_7B::test_fp8
40-
- accuracy/test_llm_api_pytorch_multimodal.py::TestQwen2_5_VL_7B::test_nvfp4
41-
- accuracy/test_llm_api_pytorch_multimodal.py::TestGemma3_27BInstruct::test_fp8_prequantized
42-
- accuracy/test_llm_api_pytorch_multimodal.py::TestGemma3_27BInstruct::test_nvfp4_prequantized
43-
- accuracy/test_llm_api_pytorch_multimodal.py::TestGemma3_12BInstruct::test_auto_dtype
44-
- accuracy/test_llm_api_pytorch_multimodal.py::TestGemma3_12BInstruct::test_fp8_prequantized
45-
- accuracy/test_llm_api_pytorch_multimodal.py::TestGemma3_12BInstruct::test_nvfp4_prequantized
46-
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[latency_moe_cutlass-torch_compile=False]
47-
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[latency_moe_cutlass-torch_compile=True]
4826
- test_e2e.py::test_trtllm_benchmark_serving[gpt_oss/gpt-oss-20b]
4927
- test_e2e.py::test_openai_chat_guided_decoding[meta-llama/Llama-3.1-8B-Instruct]
50-
- examples/serve/test_serve.py::test_nemotron3_super_120b_nvfp4
51-
- examples/serve/test_serve.py::test_nemotron3_nano_omni_nvfp4[text_reasoning_on]
52-
- examples/serve/test_serve.py::test_nemotron3_nano_omni_nvfp4[text_reasoning_off]
53-
- examples/serve/test_serve.py::test_nemotron3_nano_omni_nvfp4[text_streaming]
54-
- examples/serve/test_serve.py::test_nemotron3_nano_omni_nvfp4[tool_calling]
55-
- examples/serve/test_serve.py::test_nemotron3_nano_omni_nvfp4[image]
56-
- examples/serve/test_serve.py::test_nemotron3_nano_omni_nvfp4[video]
5728
# ===============================================================================
5829
# 2: Multi-GPU (2 GPUs) Spark func cases with multinode support
5930
# ===============================================================================

tests/integration/test_lists/qa/llm_spark_perf.yml

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -40,24 +40,9 @@ llm_spark_perf:
4040
- perf/test_perf.py::test_perf[qwen3_30b_a3b-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
4141
- perf/test_perf.py::test_perf[llama_v4_scout_17b_16e_instruct_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
4242
- perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_v1.5_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
43-
- perf/test_perf.py::test_perf[phi_4_reasoning_plus_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
44-
- perf/test_perf.py::test_perf[phi_4_reasoning_plus_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
45-
- perf/test_perf.py::test_perf[phi_4_reasoning_plus-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
4643
- perf/test_perf.py::test_perf[qwen3_32b_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
4744
- perf/test_perf.py::test_perf[qwen3_32b-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
4845
- perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
49-
- perf/test_perf.py::test_perf[phi_4_multimodal_instruct-bench-pytorch-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
50-
- perf/test_perf.py::test_perf[phi_4_multimodal_instruct_fp4-bench-pytorch-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
51-
- perf/test_perf.py::test_perf[phi_4_multimodal_instruct_fp8-bench-pytorch-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
52-
- perf/test_perf.py::test_perf[qwen2_5_vl_7b_instruct-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
53-
- perf/test_perf.py::test_perf[qwen2_5_vl_7b_instruct_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
54-
- perf/test_perf.py::test_perf[qwen2_5_vl_7b_instruct_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
55-
- perf/test_perf.py::test_perf[gemma_3_27b_it-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
56-
- perf/test_perf.py::test_perf[gemma_3_27b_it_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
57-
- perf/test_perf.py::test_perf[gemma_3_27b_it_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
58-
- perf/test_perf.py::test_perf[gemma_3_12b_it-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
59-
- perf/test_perf.py::test_perf[gemma_3_12b_it_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
60-
- perf/test_perf.py::test_perf[gemma_3_12b_it_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
6146
# ===============================================================================
6247
# 2: Multi-GPU (2 GPUs) Spark perf cases with multinode support
6348
# ===============================================================================

tests/integration/test_lists/test-db/l0_gb10.yml

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,15 @@ l0_gb10:
1717
tests:
1818
# ------------- PyTorch tests ---------------
1919
- unittest/_torch/attention/test_attention_mla.py
20-
- test_e2e.py::test_ptp_quickstart_bert[VANILLA-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]
21-
- test_e2e.py::test_ptp_quickstart_bert[TRTLLM-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]
2220
- accuracy/test_llm_api_pytorch.py::TestQwen3_8B::test_bf16[latency]
21+
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_auto_dtype
22+
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[latency_moe_cutlass-torch_compile=False]
23+
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[latency_moe_cutlass-torch_compile=True]
24+
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_nvfp4
25+
- examples/serve/test_serve.py::test_nemotron3_nano_omni_nvfp4[text_reasoning_off]
26+
- examples/serve/test_serve.py::test_nemotron3_nano_omni_nvfp4[text_streaming]
27+
- examples/serve/test_serve.py::test_nemotron3_nano_omni_nvfp4[tool_calling]
28+
- test_e2e.py::test_ptp_quickstart_advanced[GPT-OSS-120B-gpt_oss/gpt-oss-120b]
2329
- condition:
2430
ranges:
2531
system_gpu_count:
@@ -40,3 +46,13 @@ l0_gb10:
4046
- unittest/_torch/modeling -k "modeling_out_of_tree"
4147
- unittest/_torch/modules/moe/test_moe_module.py::test_configurable_moe_single_gpu[e8_k1_h512_i512-seq=8-dtype=torch.float16-backend=CUTLASS-quant=NVFP4-routing=Renormalize]
4248
- unittest/_torch/modules/moe/test_moe_module.py::test_configurable_moe_single_gpu[e8_k1_h512_i512-seq=8-dtype=torch.bfloat16-backend=CUTLASS-quant=NVFP4-routing=Renormalize]
49+
- test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B-Qwen3/Qwen3-30B-A3B]
50+
- test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B_nvfp4_hf-Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf]
51+
- test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-bf16-instruct-llama-3.1-model/Llama-3.1-8B-Instruct]
52+
- test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-FP8-llama-3.1-model/Llama-3.1-8B-Instruct-FP8]
53+
- test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-FP4-modelopt-hf-model-hub/Llama-3.1-8B-Instruct-fp4]
54+
- test_e2e.py::test_ptp_quickstart_advanced[GPT-OSS-20B-gpt_oss/gpt-oss-20b]
55+
- examples/serve/test_serve.py::test_nemotron3_super_120b_nvfp4
56+
- examples/serve/test_serve.py::test_nemotron3_nano_omni_nvfp4[text_reasoning_on]
57+
- examples/serve/test_serve.py::test_nemotron3_nano_omni_nvfp4[image]
58+
- examples/serve/test_serve.py::test_nemotron3_nano_omni_nvfp4[video]

0 commit comments

Comments
 (0)