Skip to content

Commit 5efdc8f

Browse files
Fix any_model for gpt-oss
Signed-off-by: Daniel Korzekwa <dkorzekwa@nvidia.com>
1 parent 7d6c203 commit 5efdc8f

3 files changed

Lines changed: 24 additions & 9 deletions

File tree

tests/gpu/torch/puzzletron/resources/configs/gpt-oss-20b/gpt-oss-20b.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ mip:
8080
target_memory: 780_000 # 78_000
8181

8282
mip_constraints:
83+
- stats.num_local_experts: 48 # teacher has: 2 layers * 32 experts = 64 total experts
8384
metric_overrides:
8485
max_seconds_per_solution: 60
8586

tests/gpu/torch/puzzletron/resources/configs/nemotron-3-nano-30b-a3b-base-bf16/nemotron-3-nano-30b-a3b-base-bf16.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ scoring:
4949
micro_batch_size: 1
5050
seed: 42
5151
shuffle_seed: 444
52-
dataset_path: ${dataset_path}
52+
dataset_path: ${dataset_path}/valid
5353

5454
mip:
5555
single_block_replacement_validation_dir: ${to_path:${scoring.output_dir}}
@@ -107,7 +107,7 @@ realize_model:
107107
micro_batch_size: 1
108108
seed: 42
109109
shuffle_seed: 444
110-
dataset_path: ${dataset_path}
110+
dataset_path: ${dataset_path}/valid
111111

112112
nccl_timeout_minutes: ${timedelta_minutes:10}
113113

tests/gpu/torch/puzzletron/test_puzzletron.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
"*E",
6363
True,
6464
),
65+
("gpt-oss-20b", "gpt_oss_20b", "gpt-oss-20b", None, True),
6566
],
6667
)
6768
def test_puzzletron(
@@ -139,14 +140,27 @@ def _test_puzzletron_multiprocess_job(
139140
assert (puzzle_dir / "ckpts/num_experts_8").exists()
140141

141142
# assertions for the mip_and_realize_models step 6
143+
# Find the MIP solution directory dynamically (e.g., stats_num_local_experts_*)
144+
mip_solutions_dir = puzzle_dir / "mip/puzzle_solutions"
145+
solution_dirs = [
146+
d
147+
for d in mip_solutions_dir.iterdir()
148+
if d.is_dir() and d.name.startswith("stats_num_local_experts_")
149+
]
150+
assert len(solution_dirs) == 1, (
151+
f"Expected exactly one stats_num_local_experts_* directory, found: {[d.name for d in solution_dirs]}"
152+
)
153+
solution_dir = solution_dirs[0]
154+
142155
solution_0_ckpt_config_path = (
143-
puzzle_dir
144-
/ "mip/puzzle_solutions/stats_num_local_experts_1472/solutions--checkpoints/solution_0/config.json"
156+
solution_dir / "solutions--checkpoints/solution_0/config.json"
157+
)
158+
assert solution_0_ckpt_config_path.exists(), (
159+
f"Expected {solution_0_ckpt_config_path} to exist"
160+
)
161+
assert (solution_dir / "solutions.json").exists(), (
162+
f"Expected {solution_dir / 'solutions.json'} to exist"
145163
)
146-
assert solution_0_ckpt_config_path.exists()
147-
assert (
148-
puzzle_dir / "mip/puzzle_solutions/stats_num_local_experts_1472/solutions.json"
149-
).exists()
150164
else:
151165
# assertions for the score_pruning_activations step 1 (FFN pruning)
152166
_assert_score_pruning_activations(puzzle_dir, hf_config_name)
@@ -217,7 +231,7 @@ def _test_puzzletron_multiprocess_job(
217231
"nemotron-nano-12b-v2": 4.79390811920166,
218232
"mistral-small-24b-instruct-2501": 4.709150314331055,
219233
"qwen3-8b": 4.733874320983887,
220-
# Note: nemotron-3-nano-30b-a3b-base-bf16 uses MoE expert pruning with different MIP path
234+
# Note: nemotron-3-nano-30b-a3b-base-bf16 and gpt-oss-20b use MoE expert pruning with different MIP path
221235
}
222236

223237

0 commit comments

Comments
 (0)