Skip to content

Commit 9de9b8f

Browse files
authored
Noeyy/add test cases for the newly added checkpoints on HF (#827)
## What does this PR do? **Type of change:** new tests **Overview:** Add new test cases for the newly added checkpoints on HuggingFace. ## Usage pytest test_deploy.py --run-release ```python None ``` ## Testing None ## Before your PR is "*Ready for review*" - **Make sure you read and follow [Contributor guidelines](https://github.com/NVIDIA/Model-Optimizer/blob/main/CONTRIBUTING.md)** and your commits are signed. - **Is this change backward compatible?**: Yes - **Did you write any new necessary tests?**: Yes - **Did you add or update any necessary documentation?**: No - **Did you update [Changelog](https://github.com/NVIDIA/Model-Optimizer/blob/main/CHANGELOG.rst)?**: No ## Additional Information None <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **New Features** * Added support for NVFP4 model variants across multiple model families (DeepSeek, Llama, Qwen, and others). * **Improvements** * Enhanced backend availability detection to automatically identify and manage supported deployment backends at runtime. * **Tests** * Improved test infrastructure for better reproducibility and backend compatibility handling. <sub>✏️ Tip: You can customize this high-level summary in your review settings.</sub> <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Signed-off-by: noeyy-mino <174223378+noeyy-mino@users.noreply.github.com>
1 parent e247f5d commit 9de9b8f

3 files changed

Lines changed: 135 additions & 56 deletions

File tree

tests/_test_utils/deploy_utils.py

Lines changed: 65 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,48 @@
1919
import pytest
2020
import torch
2121

22+
# Cache for available backends detection (computed once at import time)
23+
_AVAILABLE_BACKENDS = None
24+
25+
26+
def get_available_backends():
27+
"""Detect which backends are available in the current environment.
28+
29+
Returns:
30+
set: A set of available backend names ('trtllm', 'vllm', 'sglang')
31+
"""
32+
global _AVAILABLE_BACKENDS
33+
if _AVAILABLE_BACKENDS is not None:
34+
return _AVAILABLE_BACKENDS
35+
36+
available = set()
37+
38+
try:
39+
import tensorrt_llm # noqa: F401
40+
41+
available.add("trtllm")
42+
except ImportError:
43+
pass
44+
45+
try:
46+
import vllm # noqa: F401
47+
48+
available.add("vllm")
49+
except ImportError:
50+
pass
51+
52+
try:
53+
import sglang # noqa: F401
54+
55+
available.add("sglang")
56+
except ImportError:
57+
pass
58+
59+
_AVAILABLE_BACKENDS = available
60+
print(f"[deploy_utils] Detected available backends: {available}")
61+
return _AVAILABLE_BACKENDS
62+
63+
2264
# Common test prompts for all backends
2365
COMMON_PROMPTS = [
2466
"Hello, my name is",
@@ -90,18 +132,18 @@ def run(self):
90132

91133
def _deploy_trtllm(self):
92134
"""Deploy a model using TensorRT-LLM."""
93-
try:
94-
from tensorrt_llm import LLM, SamplingParams
95-
from tensorrt_llm.llmapi import CudaGraphConfig, EagleDecodingConfig, KvCacheConfig
96-
except ImportError:
97-
pytest.skip("tensorrt_llm package not available")
135+
from tensorrt_llm import LLM, SamplingParams
136+
from tensorrt_llm.llmapi import CudaGraphConfig, EagleDecodingConfig, KvCacheConfig
98137

99138
sampling_params = SamplingParams(max_tokens=32)
100139
spec_config = None
101140
llm = None
102141
kv_cache_config = KvCacheConfig(enable_block_reuse=True, free_gpu_memory_fraction=0.8)
103142

104-
if self.model_id == "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8":
143+
if self.model_id in (
144+
"nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8",
145+
"nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4",
146+
):
105147
llm = LLM(
106148
model=self.model_id,
107149
tensor_parallel_size=self.tensor_parallel_size,
@@ -173,10 +215,7 @@ def _deploy_trtllm(self):
173215

174216
def _deploy_vllm(self):
175217
"""Deploy a model using vLLM."""
176-
try:
177-
from vllm import LLM, SamplingParams
178-
except ImportError:
179-
pytest.skip("vllm package not available")
218+
from vllm import LLM, SamplingParams
180219

181220
quantization_method = "modelopt"
182221
if "fp4" in self.model_id.lower():
@@ -210,10 +249,8 @@ def _deploy_vllm(self):
210249

211250
def _deploy_sglang(self):
212251
"""Deploy a model using SGLang."""
213-
try:
214-
import sglang as sgl
215-
except ImportError:
216-
pytest.skip("sglang package not available")
252+
import sglang as sgl
253+
217254
quantization_method = "modelopt"
218255
if "fp4" in self.model_id.lower():
219256
quantization_method = "modelopt_fp4"
@@ -230,7 +267,10 @@ def _deploy_sglang(self):
230267
mem_fraction_static=0.7,
231268
context_length=1024,
232269
)
233-
elif self.model_id == "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8":
270+
elif self.model_id in (
271+
"nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8",
272+
"nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4",
273+
):
234274
llm = sgl.Engine(
235275
model_path=self.model_id,
236276
quantization=quantization_method,
@@ -259,10 +299,20 @@ def __init__(self, **params):
259299
else:
260300
self.params[key] = [value]
261301

302+
# Filter backends to only include available ones
303+
if "backend" in self.params:
304+
available = get_available_backends()
305+
original_backends = self.params["backend"]
306+
self.params["backend"] = [b for b in original_backends if b in available]
307+
262308
# Pre-generate all deployers for pytest compatibility
263309
self._deployers = list(self._generate_deployers())
264310

265311
def _generate_deployers(self):
312+
# If no backends available after filtering, yield nothing
313+
if "backend" in self.params and not self.params["backend"]:
314+
return
315+
266316
for values in itertools.product(*self.params.values()):
267317
deployer = ModelDeployer(**dict(zip(self.params.keys(), values)))
268318
# Set test case ID in format "model_id_backend"

tests/examples/gpt_oss/test_gpt_oss_qat.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -294,51 +294,48 @@ def deploy_gpt_oss_trtllm(self, tmp_path, model_path_override=None):
294294
)
295295
def test_gpt_oss_complete_pipeline(model_path, tmp_path):
296296
"""Test the complete GPT-OSS optimization pipeline by executing all 3 steps in sequence."""
297-
import pathlib
298297

299-
# Use current directory instead of tmp_path for checkpoints
300-
current_dir = pathlib.Path.cwd()
301298
# Create GPTOSS instance with model path
302299
gpt_oss = GPTOSS(model_path)
303300

304301
if model_path == "openai/gpt-oss-20b":
305302
# Step 1: SFT Training
306-
sft_checkpoint = gpt_oss.gpt_oss_sft_training(current_dir)
303+
sft_checkpoint = gpt_oss.gpt_oss_sft_training(tmp_path)
307304
if not sft_checkpoint or not sft_checkpoint.exists():
308305
print("Step 1 failed: SFT checkpoint not found, stopping pipeline.")
309306
return
310307
print(f"Step 1 completed: SFT checkpoint at {sft_checkpoint}")
311308

312309
# Step 2: QAT Training (depends on Step 1)
313-
qat_checkpoint = gpt_oss.gpt_oss_qat_training(current_dir, sft_dir=sft_checkpoint)
310+
qat_checkpoint = gpt_oss.gpt_oss_qat_training(tmp_path, sft_dir=sft_checkpoint)
314311
if not qat_checkpoint or not qat_checkpoint.exists():
315312
print("Step 2 failed: QAT checkpoint not found, stopping pipeline.")
316313
return
317314
print(f"Step 2 completed: QAT checkpoint at {qat_checkpoint}")
318315

319316
# Step 3: MXFP4 Conversion (depends on Step 2)
320-
mxfp4_checkpoint = gpt_oss.gpt_oss_mxfp4_conversion(current_dir, qat_dir=qat_checkpoint)
317+
mxfp4_checkpoint = gpt_oss.gpt_oss_mxfp4_conversion(tmp_path, qat_dir=qat_checkpoint)
321318
if not mxfp4_checkpoint or not mxfp4_checkpoint.exists():
322319
print("Step 3 failed: MXFP4 checkpoint not found, stopping pipeline.")
323320
return
324321
print(f"Step 3 completed: MXFP4 checkpoint at {mxfp4_checkpoint}")
325322

326323
# Step 4: Deploy with TensorRT-LLM (depends on Step 3)
327324
print("Step 4: Running deployment with MXFP4 checkpoint...")
328-
gpt_oss.deploy_gpt_oss_trtllm(current_dir, model_path_override=mxfp4_checkpoint)
325+
gpt_oss.deploy_gpt_oss_trtllm(tmp_path, model_path_override=mxfp4_checkpoint)
329326
print("Step 4 completed: Deployment successful")
330327

331328
elif model_path == "openai/gpt-oss-120b":
332329
# Step 1: QAT Training with LoRA
333-
qat_lora_checkpoint = gpt_oss.gpt_oss_qat_training_lora(current_dir)
330+
qat_lora_checkpoint = gpt_oss.gpt_oss_qat_training_lora(tmp_path)
334331
if not qat_lora_checkpoint or not qat_lora_checkpoint.exists():
335332
print("Step 1 failed: QAT-LoRA checkpoint not found, stopping pipeline.")
336333
return
337334
print(f"Step 1 completed: QAT-LoRA checkpoint at {qat_lora_checkpoint}")
338335

339336
# Step 2: MXFP4 Conversion for LoRA model (depends on Step 1)
340337
mxfp4_checkpoint = gpt_oss.gpt_oss_mxfp4_conversion_lora(
341-
current_dir, qat_lora_dir=qat_lora_checkpoint
338+
tmp_path, qat_lora_dir=qat_lora_checkpoint
342339
)
343340
if not mxfp4_checkpoint or not mxfp4_checkpoint.exists():
344341
print("Step 2 failed: MXFP4 checkpoint not found, stopping pipeline.")
@@ -347,5 +344,5 @@ def test_gpt_oss_complete_pipeline(model_path, tmp_path):
347344

348345
# Step 3: Deploy with TensorRT-LLM (depends on Step 2)
349346
print("Step 3: Running deployment with MXFP4 checkpoint...")
350-
gpt_oss.deploy_gpt_oss_trtllm(current_dir, model_path_override=mxfp4_checkpoint)
347+
gpt_oss.deploy_gpt_oss_trtllm(tmp_path, model_path_override=mxfp4_checkpoint)
351348
print("Step 3 completed: Deployment successful")

0 commit comments

Comments
 (0)