Skip to content

Commit bfe993b

Browse files
committed
temporary test changes for iteration time
Signed-off-by: William Zhang <133824995+2ez4bz@users.noreply.github.com>
1 parent a9c72e7 commit bfe993b

5 files changed

Lines changed: 93 additions & 802 deletions

File tree

jenkins/L0_Test.groovy

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4477,15 +4477,7 @@ def launchTestJobs(pipeline, testFilter)
44774477
"DGX_H100-4_GPUs-PyTorch-Ray-1": ["auto:dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
44784478
"DGX_H100-4_GPUs-AutoDeploy-1": ["auto:dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
44794479
"DGX_H100-4_GPUs-AutoDeploy-Post-Merge-1": ["auto:dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
4480-
"DGX_B200-PyTorch-1": ["auto:dgx-b200-flex", "l0_b200", 1, 9, 1, 1, true],
4481-
"DGX_B200-PyTorch-2": ["auto:dgx-b200-flex", "l0_b200", 2, 9, 1, 1, true],
4482-
"DGX_B200-PyTorch-3": ["auto:dgx-b200-flex", "l0_b200", 3, 9, 1, 1, true],
4483-
"DGX_B200-PyTorch-4": ["auto:dgx-b200-flex", "l0_b200", 4, 9, 1, 1, true],
4484-
"DGX_B200-PyTorch-5": ["auto:dgx-b200-flex", "l0_b200", 5, 9, 1, 1, true],
4485-
"DGX_B200-PyTorch-6": ["auto:dgx-b200-flex", "l0_b200", 6, 9, 1, 1, true],
4486-
"DGX_B200-PyTorch-7": ["auto:dgx-b200-flex", "l0_b200", 7, 9, 1, 1, true],
4487-
"DGX_B200-PyTorch-8": ["auto:dgx-b200-flex", "l0_b200", 8, 9, 1, 1, true],
4488-
"DGX_B200-PyTorch-9": ["auto:dgx-b200-flex", "l0_b200", 9, 9, 1, 1, true],
4480+
"DGX_B200-PyTorch-1": ["auto:dgx-b200-flex", "l0_b200", 1, 1, 1, 1, true],
44894481
"DGX_B200-AutoDeploy-1": ["auto:dgx-b200-flex", "l0_b200", 1, 1, 1, 1, true],
44904482
"DGX_B200-Triton-Post-Merge-1": ["auto:dgx-b200-flex", "l0_b200", 1, 1, 1, 1, true],
44914483
"DGX_B200-PyTorch-Post-Merge-1": ["auto:dgx-b200-flex", "l0_b200", 1, 2, 1, 1, true],

scripts/check_test_list.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -720,6 +720,11 @@ def verify_waive_list(llm_src, args):
720720
with open(tmp_waives_file, "w") as f:
721721
f.writelines(f"{line}\n" for line in sorted(processed_lines))
722722

723+
if not processed_lines:
724+
print("No integration waive entries found; skipping collection.",
725+
flush=True)
726+
return
727+
723728
subprocess.run(
724729
f"cd {llm_src}/tests/integration/defs && "
725730
f"pytest --test-list={tmp_waives_file} --output-dir={llm_src} -s --co -q",

tests/integration/defs/accuracy/test_llm_api_pytorch_multimodal.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -747,6 +747,87 @@ class TestNanoV3Omni(LlmapiAccuracyTestHarness):
747747
marks=(skip_pre_blackwell,),
748748
id="nvfp4",
749749
),
750+
# TEMPORARY: duplicate the flaky NVFP4 case for B200 CI iteration.
751+
pytest.param(
752+
"nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning-NVFP4",
753+
f"{llm_models_root()}/NVIDIA-Nemotron-3-Nano-Omni-30B-A3B-Reasoning-NVFP4",
754+
KvCacheConfig(
755+
free_gpu_memory_fraction=0.8,
756+
mamba_ssm_cache_dtype="float32",
757+
enable_block_reuse=False,
758+
dtype="fp8",
759+
),
760+
128,
761+
QuantAlgo.MIXED_PRECISION,
762+
(MMMU_TASK_SPEC, VOXPOPULI_TASK_SPEC, VIDEOMME_TASK_SPEC),
763+
None,
764+
marks=(skip_pre_blackwell,),
765+
id="nvfp4_repeat1",
766+
),
767+
pytest.param(
768+
"nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning-NVFP4",
769+
f"{llm_models_root()}/NVIDIA-Nemotron-3-Nano-Omni-30B-A3B-Reasoning-NVFP4",
770+
KvCacheConfig(
771+
free_gpu_memory_fraction=0.8,
772+
mamba_ssm_cache_dtype="float32",
773+
enable_block_reuse=False,
774+
dtype="fp8",
775+
),
776+
128,
777+
QuantAlgo.MIXED_PRECISION,
778+
(MMMU_TASK_SPEC, VOXPOPULI_TASK_SPEC, VIDEOMME_TASK_SPEC),
779+
None,
780+
marks=(skip_pre_blackwell,),
781+
id="nvfp4_repeat2",
782+
),
783+
pytest.param(
784+
"nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning-NVFP4",
785+
f"{llm_models_root()}/NVIDIA-Nemotron-3-Nano-Omni-30B-A3B-Reasoning-NVFP4",
786+
KvCacheConfig(
787+
free_gpu_memory_fraction=0.8,
788+
mamba_ssm_cache_dtype="float32",
789+
enable_block_reuse=False,
790+
dtype="fp8",
791+
),
792+
128,
793+
QuantAlgo.MIXED_PRECISION,
794+
(MMMU_TASK_SPEC, VOXPOPULI_TASK_SPEC, VIDEOMME_TASK_SPEC),
795+
None,
796+
marks=(skip_pre_blackwell,),
797+
id="nvfp4_repeat3",
798+
),
799+
pytest.param(
800+
"nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning-NVFP4",
801+
f"{llm_models_root()}/NVIDIA-Nemotron-3-Nano-Omni-30B-A3B-Reasoning-NVFP4",
802+
KvCacheConfig(
803+
free_gpu_memory_fraction=0.8,
804+
mamba_ssm_cache_dtype="float32",
805+
enable_block_reuse=False,
806+
dtype="fp8",
807+
),
808+
128,
809+
QuantAlgo.MIXED_PRECISION,
810+
(MMMU_TASK_SPEC, VOXPOPULI_TASK_SPEC, VIDEOMME_TASK_SPEC),
811+
None,
812+
marks=(skip_pre_blackwell,),
813+
id="nvfp4_repeat4",
814+
),
815+
pytest.param(
816+
"nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning-NVFP4",
817+
f"{llm_models_root()}/NVIDIA-Nemotron-3-Nano-Omni-30B-A3B-Reasoning-NVFP4",
818+
KvCacheConfig(
819+
free_gpu_memory_fraction=0.8,
820+
mamba_ssm_cache_dtype="float32",
821+
enable_block_reuse=False,
822+
dtype="fp8",
823+
),
824+
128,
825+
QuantAlgo.MIXED_PRECISION,
826+
(MMMU_TASK_SPEC, VOXPOPULI_TASK_SPEC, VIDEOMME_TASK_SPEC),
827+
None,
828+
marks=(skip_pre_blackwell,),
829+
id="nvfp4_repeat5",
830+
),
750831
],
751832
)
752833
# `torch.compile` uses a thread pool to compile and it's used in audio pre-processing.

0 commit comments

Comments
 (0)