Skip to content

Commit a2b7c52

Browse files
committed
temporary test changes for iteration time
Signed-off-by: William Zhang <133824995+2ez4bz@users.noreply.github.com>
1 parent 1832ab3 commit a2b7c52

4 files changed

Lines changed: 83 additions & 801 deletions

File tree

jenkins/L0_Test.groovy

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4417,15 +4417,7 @@ def launchTestJobs(pipeline, testFilter)
44174417
"DGX_H100-4_GPUs-PyTorch-Ray-1": ["auto:dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
44184418
"DGX_H100-4_GPUs-AutoDeploy-1": ["auto:dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
44194419
"DGX_H100-4_GPUs-AutoDeploy-Post-Merge-1": ["auto:dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
4420-
"DGX_B200-PyTorch-1": ["auto:dgx-b200-flex", "l0_b200", 1, 9, 1, 1, true],
4421-
"DGX_B200-PyTorch-2": ["auto:dgx-b200-flex", "l0_b200", 2, 9, 1, 1, true],
4422-
"DGX_B200-PyTorch-3": ["auto:dgx-b200-flex", "l0_b200", 3, 9, 1, 1, true],
4423-
"DGX_B200-PyTorch-4": ["auto:dgx-b200-flex", "l0_b200", 4, 9, 1, 1, true],
4424-
"DGX_B200-PyTorch-5": ["auto:dgx-b200-flex", "l0_b200", 5, 9, 1, 1, true],
4425-
"DGX_B200-PyTorch-6": ["auto:dgx-b200-flex", "l0_b200", 6, 9, 1, 1, true],
4426-
"DGX_B200-PyTorch-7": ["auto:dgx-b200-flex", "l0_b200", 7, 9, 1, 1, true],
4427-
"DGX_B200-PyTorch-8": ["auto:dgx-b200-flex", "l0_b200", 8, 9, 1, 1, true],
4428-
"DGX_B200-PyTorch-9": ["auto:dgx-b200-flex", "l0_b200", 9, 9, 1, 1, true],
4420+
"DGX_B200-PyTorch-1": ["auto:dgx-b200-flex", "l0_b200", 1, 1, 1, 1, true],
44294421
"DGX_B200-AutoDeploy-1": ["auto:dgx-b200-flex", "l0_b200", 1, 1, 1, 1, true],
44304422
"DGX_B200-Triton-Post-Merge-1": ["auto:dgx-b200-flex", "l0_b200", 1, 1, 1, 1, true],
44314423
"DGX_B200-PyTorch-Post-Merge-1": ["auto:dgx-b200-flex", "l0_b200", 1, 2, 1, 1, true],

tests/integration/defs/accuracy/test_llm_api_pytorch_multimodal.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -711,6 +711,82 @@ class TestNanoV3Omni(LlmapiAccuracyTestHarness):
711711
marks=(skip_pre_blackwell,),
712712
id="nvfp4",
713713
),
714+
# TEMPORARY: duplicate the flaky NVFP4 case for B200 CI iteration.
715+
pytest.param(
716+
"nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning-NVFP4",
717+
f"{llm_models_root()}/NVIDIA-Nemotron-3-Nano-Omni-30B-A3B-Reasoning-NVFP4",
718+
KvCacheConfig(
719+
free_gpu_memory_fraction=0.8,
720+
mamba_ssm_cache_dtype="float32",
721+
enable_block_reuse=False,
722+
dtype="fp8",
723+
),
724+
128,
725+
QuantAlgo.MIXED_PRECISION,
726+
(MMMU_TASK_SPEC, VOXPOPULI_TASK_SPEC, VIDEOMME_TASK_SPEC),
727+
marks=(skip_pre_blackwell,),
728+
id="nvfp4_repeat1",
729+
),
730+
pytest.param(
731+
"nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning-NVFP4",
732+
f"{llm_models_root()}/NVIDIA-Nemotron-3-Nano-Omni-30B-A3B-Reasoning-NVFP4",
733+
KvCacheConfig(
734+
free_gpu_memory_fraction=0.8,
735+
mamba_ssm_cache_dtype="float32",
736+
enable_block_reuse=False,
737+
dtype="fp8",
738+
),
739+
128,
740+
QuantAlgo.MIXED_PRECISION,
741+
(MMMU_TASK_SPEC, VOXPOPULI_TASK_SPEC, VIDEOMME_TASK_SPEC),
742+
marks=(skip_pre_blackwell,),
743+
id="nvfp4_repeat2",
744+
),
745+
pytest.param(
746+
"nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning-NVFP4",
747+
f"{llm_models_root()}/NVIDIA-Nemotron-3-Nano-Omni-30B-A3B-Reasoning-NVFP4",
748+
KvCacheConfig(
749+
free_gpu_memory_fraction=0.8,
750+
mamba_ssm_cache_dtype="float32",
751+
enable_block_reuse=False,
752+
dtype="fp8",
753+
),
754+
128,
755+
QuantAlgo.MIXED_PRECISION,
756+
(MMMU_TASK_SPEC, VOXPOPULI_TASK_SPEC, VIDEOMME_TASK_SPEC),
757+
marks=(skip_pre_blackwell,),
758+
id="nvfp4_repeat3",
759+
),
760+
pytest.param(
761+
"nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning-NVFP4",
762+
f"{llm_models_root()}/NVIDIA-Nemotron-3-Nano-Omni-30B-A3B-Reasoning-NVFP4",
763+
KvCacheConfig(
764+
free_gpu_memory_fraction=0.8,
765+
mamba_ssm_cache_dtype="float32",
766+
enable_block_reuse=False,
767+
dtype="fp8",
768+
),
769+
128,
770+
QuantAlgo.MIXED_PRECISION,
771+
(MMMU_TASK_SPEC, VOXPOPULI_TASK_SPEC, VIDEOMME_TASK_SPEC),
772+
marks=(skip_pre_blackwell,),
773+
id="nvfp4_repeat4",
774+
),
775+
pytest.param(
776+
"nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning-NVFP4",
777+
f"{llm_models_root()}/NVIDIA-Nemotron-3-Nano-Omni-30B-A3B-Reasoning-NVFP4",
778+
KvCacheConfig(
779+
free_gpu_memory_fraction=0.8,
780+
mamba_ssm_cache_dtype="float32",
781+
enable_block_reuse=False,
782+
dtype="fp8",
783+
),
784+
128,
785+
QuantAlgo.MIXED_PRECISION,
786+
(MMMU_TASK_SPEC, VOXPOPULI_TASK_SPEC, VIDEOMME_TASK_SPEC),
787+
marks=(skip_pre_blackwell,),
788+
id="nvfp4_repeat5",
789+
),
714790
],
715791
)
716792
# `torch.compile` uses a thread pool to compile and it's used in audio pre-processing.

0 commit comments

Comments
 (0)