Skip to content

Commit 883c873

Browse files
authored
Noeyy/add new cases for newly added checkpoints on HF (#728)
## What does this PR do? **Type of change:** Add TRT LLM/vLLM/SGLang functional test cases for newly added checkpoints on HF **Overview:** 1.Since the speculative draft model only supports loading from a local path, we should set the MODELOPT_LOCAL_MODEL_ROOT environment variable. If we don't set it, these test cases will be skipped. 2. Newly added checkpoints: - nvidia/gpt-oss-120b-Eagle3-short-context - nvidia/gpt-oss-120b-Eagle3-throughput - nvidia/EAGLE3-NVIDIA-Nemotron-3-Nano-30B-A3B-BF16 - nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8 ## Usage ```python pytest tests/examples/llm_ptq/test_deploy.py --run-release ``` ## Testing Run release testing ## Before your PR is "*Ready for review*" Ready for review - **Make sure you read and follow [Contributor guidelines](https://github.com/NVIDIA/Model-Optimizer/blob/main/CONTRIBUTING.md)** and your commits are signed. - **Is this change backward compatible?**: Yes - **Did you write any new necessary tests?**: No - **Did you add or update any necessary documentation?**: No - **Did you update [Changelog](https://github.com/NVIDIA/Model-Optimizer/blob/main/CHANGELOG.rst)?**: No ## Additional Information N/A --------- Signed-off-by: noeyy-mino <174223378+noeyy-mino@users.noreply.github.com>
1 parent 3350b0a commit 883c873

3 files changed

Lines changed: 96 additions & 18 deletions

File tree

tests/_test_utils/deploy_utils.py

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,40 @@ def _deploy_trtllm(self):
100100
spec_config = None
101101
llm = None
102102
kv_cache_config = KvCacheConfig(enable_block_reuse=True, free_gpu_memory_fraction=0.8)
103-
if "eagle" in self.model_id.lower():
103+
104+
if self.model_id == "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8":
105+
llm = LLM(
106+
model=self.model_id,
107+
tensor_parallel_size=self.tensor_parallel_size,
108+
enable_attention_dp=False,
109+
attn_backend=self.attn_backend,
110+
trust_remote_code=True,
111+
max_batch_size=8,
112+
kv_cache_config=KvCacheConfig(
113+
enable_block_reuse=False,
114+
mamba_ssm_cache_dtype="float32",
115+
),
116+
)
117+
elif self.model_id == "nvidia/EAGLE3-NVIDIA-Nemotron-3-Nano-30B-A3B-BF16":
118+
spec_config = EagleDecodingConfig(
119+
max_draft_len=3,
120+
speculative_model_dir=self.model_id,
121+
eagle3_one_model=self.eagle3_one_model,
122+
)
123+
llm = LLM(
124+
model=self.model_id,
125+
tensor_parallel_size=self.tensor_parallel_size,
126+
enable_attention_dp=False,
127+
attn_backend=self.attn_backend,
128+
trust_remote_code=True,
129+
max_batch_size=8,
130+
speculative_config=spec_config,
131+
kv_cache_config=KvCacheConfig(
132+
enable_block_reuse=False,
133+
mamba_ssm_cache_dtype="float32",
134+
),
135+
)
136+
elif "eagle" in self.model_id.lower():
104137
spec_config = EagleDecodingConfig(
105138
max_draft_len=3,
106139
speculative_model_dir=self.model_id,
@@ -146,7 +179,7 @@ def _deploy_vllm(self):
146179
pytest.skip("vllm package not available")
147180

148181
quantization_method = "modelopt"
149-
if "FP4" in self.model_id:
182+
if "fp4" in self.model_id.lower():
150183
quantization_method = "modelopt_fp4"
151184
llm = LLM(
152185
model=self.model_id,
@@ -182,7 +215,7 @@ def _deploy_sglang(self):
182215
except ImportError:
183216
pytest.skip("sglang package not available")
184217
quantization_method = "modelopt"
185-
if "FP4" in self.model_id:
218+
if "fp4" in self.model_id.lower():
186219
quantization_method = "modelopt_fp4"
187220
if "eagle" in self.model_id.lower():
188221
llm = sgl.Engine(
@@ -197,6 +230,14 @@ def _deploy_sglang(self):
197230
mem_fraction_static=0.7,
198231
context_length=1024,
199232
)
233+
elif self.model_id == "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8":
234+
llm = sgl.Engine(
235+
model_path=self.model_id,
236+
quantization=quantization_method,
237+
tp_size=self.tensor_parallel_size,
238+
trust_remote_code=True,
239+
attention_backend="flashinfer",
240+
)
200241
else:
201242
llm = sgl.Engine(
202243
model_path=self.model_id,

tests/examples/cnn_qat/test_resnet50.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,19 @@
1919
from _test_utils.examples.run_command import run_example_command
2020
from _test_utils.torch.misc import minimum_gpu
2121

22-
imagenet_path = os.getenv("IMAGENET_PATH")
23-
skip_no_imagenet = pytest.mark.skipif(
24-
not imagenet_path or not os.path.isdir(imagenet_path),
25-
reason="IMAGENET_PATH environment variable is not set or does not point to a valid directory",
26-
)
2722

23+
@pytest.fixture
24+
def imagenet_path():
25+
"""Fixture to get IMAGENET_PATH from environment and skip if not valid."""
26+
path = os.getenv("IMAGENET_PATH")
27+
if not path or not os.path.isdir(path):
28+
pytest.skip(
29+
"IMAGENET_PATH environment variable is not set or does not point to a valid directory"
30+
)
31+
return path
2832

29-
def _build_common_command():
33+
34+
def _build_common_command(imagenet_path):
3035
"""Build common command arguments for CNN QAT training."""
3136
train_data_path = os.path.join(imagenet_path, "train")
3237
val_data_path = os.path.join(imagenet_path, "val")
@@ -58,21 +63,19 @@ def _run_qat_command(base_cmd, common_args, output_dir, example_dir="cnn_qat"):
5863
run_example_command(full_command, example_dir)
5964

6065

61-
@skip_no_imagenet
6266
@minimum_gpu(1)
63-
def test_cnn_qat_single_gpu(tmp_path):
67+
def test_cnn_qat_single_gpu(tmp_path, imagenet_path):
6468
"""Test CNN QAT on single GPU."""
65-
common_args = _build_common_command()
69+
common_args = _build_common_command(imagenet_path)
6670
base_command = ["python", "torchvision_qat.py", "--gpu", "0"]
6771

6872
_run_qat_command(base_command, common_args, tmp_path)
6973

7074

71-
@skip_no_imagenet
7275
@minimum_gpu(2)
73-
def test_cnn_qat_multi_gpu(tmp_path):
76+
def test_cnn_qat_multi_gpu(tmp_path, imagenet_path):
7477
"""Test CNN QAT on multiple GPUs."""
75-
common_args = _build_common_command()
78+
common_args = _build_common_command(imagenet_path)
7679
base_command = ["torchrun", "--nproc_per_node=2", "torchvision_qat.py"]
7780

7881
_run_qat_command(base_command, common_args, tmp_path)

tests/examples/llm_ptq/test_deploy.py

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,13 @@ def test_kimi(command):
386386
tensor_parallel_size=8,
387387
mini_sm=89,
388388
),
389+
*ModelDeployerList(
390+
model_id="nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8",
391+
backend=("trtllm", "vllm", "sglang"),
392+
tensor_parallel_size=1,
393+
mini_sm=89,
394+
attn_backend="FLASHINFER",
395+
),
389396
],
390397
ids=idfn,
391398
)
@@ -464,18 +471,33 @@ def test_medusa(command):
464471
),
465472
*ModelDeployerList(
466473
base_model="openai/gpt-oss-120b",
467-
model_id="nvidia/gpt-oss-120b-Eagle3",
474+
model_id="nvidia/gpt-oss-120b-Eagle3-long-context",
475+
backend=("trtllm", "sglang"),
476+
tensor_parallel_size=8,
477+
mini_sm=89,
478+
),
479+
*ModelDeployerList(
480+
base_model="openai/gpt-oss-120b",
481+
model_id="nvidia/gpt-oss-120b-Eagle3-short-context",
468482
backend=("trtllm", "sglang"),
469483
tensor_parallel_size=8,
470484
mini_sm=89,
471485
),
472486
*ModelDeployerList(
473487
base_model="openai/gpt-oss-120b",
474-
model_id="nvidia/gpt-oss-120b-Eagle3-v2",
488+
model_id="nvidia/gpt-oss-120b-Eagle3-throughput",
475489
backend=("trtllm", "sglang"),
476490
tensor_parallel_size=8,
477491
mini_sm=89,
478492
),
493+
*ModelDeployerList(
494+
base_model="nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
495+
model_id="nvidia/EAGLE3-NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
496+
backend=("trtllm", "vllm", "sglang"),
497+
eagle3_one_model=False,
498+
tensor_parallel_size=8,
499+
mini_sm=89,
500+
),
479501
*ModelDeployerList(
480502
base_model="nvidia/Llama-3.3-70B-Instruct-FP8",
481503
model_id="nvidia/Llama-3.3-70B-Instruct-Eagle3",
@@ -487,4 +509,16 @@ def test_medusa(command):
487509
ids=idfn,
488510
)
489511
def test_eagle(command):
490-
command.run()
512+
"""Skip test if MODELOPT_LOCAL_MODEL_ROOT is set but model doesn't exist locally.
513+
speculative models shoule be loaded by local path"""
514+
local_root = os.getenv("MODELOPT_LOCAL_MODEL_ROOT")
515+
if not local_root:
516+
return
517+
518+
local_path = os.path.join(local_root, command.model_id)
519+
if os.path.isdir(local_path):
520+
# Update model_id to use local path
521+
command.model_id = local_path
522+
command.run()
523+
else:
524+
pytest.skip(f"Local model not found: {local_path}")

0 commit comments

Comments
 (0)