Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions modelopt/torch/puzzletron/anymodel/model_descriptor/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,31 @@ def get_language_model_config(config):
"""
return config

@staticmethod
def truncate_pattern_for_subblock(
lm_config: Any, parent_layer_index: int | None = None
) -> None:
"""Adjust per-layer config fields so a single-layer model represents the correct layer type.

The default implementation handles ``hybrid_override_pattern`` for
hybrid architectures. It is a no-op when the field is absent.
Override if a model uses a different pattern alphabet.
"""
pattern = getattr(lm_config, "hybrid_override_pattern", None)
if not pattern:
return
# Strip cosmetic pipe separators (e.g. "M|-|*" -> "M-*") before indexing.
pattern = pattern.replace("|", "")
if not pattern:
raise ValueError(
f"hybrid_override_pattern is set but contains no layer-type characters "
f"(original: {lm_config.hybrid_override_pattern!r})"
)
if parent_layer_index is not None and 0 <= parent_layer_index < len(pattern):
lm_config.hybrid_override_pattern = pattern[parent_layer_index]
return
lm_config.hybrid_override_pattern = pattern[0]

Comment thread
kevalmorabia97 marked this conversation as resolved.
@classmethod
def create_dummy_block(cls, original_layer: nn.Module, block_index: int) -> nn.Module:
"""Create a dummy block to replace a layer for sharded model initialization."""
Expand Down
6 changes: 3 additions & 3 deletions modelopt/torch/puzzletron/entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,15 +65,15 @@ def puzzletron(
launch_prune_ckpt(hydra_cfg)
dist.barrier()

# Step 4: build_library_and_stats (single process)
# Step 3: build_library_and_stats (single process)
if dist.is_master():
launch_build_library_and_stats(hydra_cfg)
dist.barrier()

# Step 5: calc_one_block_scores (distributed processing)
# Step 4: calc_one_block_scores (distributed processing)
launch_scoring(hydra_cfg)

# Step 6: mip_and_realize_models (distributed processing)
# Step 5: mip_and_realize_models (distributed processing)
launch_mip_and_realize_model(hydra_cfg)

return hydra_cfg
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,12 @@ def calculate_subblock_params(
layer_config: BlockConfig | FFNConfig | AttentionConfig,
descriptor: Type[ModelDescriptor],
) -> int:
"""Count parameters on one meta decoder layer."""
"""Count parameters on one meta decoder layer.

The caller is responsible for adjusting per-layer config fields (e.g.
``hybrid_override_pattern``) before passing ``config``; see
``ModelDescriptor.truncate_pattern_for_subblock``.
"""
if isinstance(layer_config, FFNConfig):
block_config = layer_config.to_blockconfig()
elif isinstance(layer_config, AttentionConfig):
Expand Down
12 changes: 9 additions & 3 deletions modelopt/torch/puzzletron/subblock_stats/calc_subblock_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

"""Calc subblock stats to compute memory and runtime statistics for subblocks."""

import copy
import dataclasses
import json
import os
Expand Down Expand Up @@ -150,6 +151,11 @@ def calculate_subblock_stats(
subblock_config = subblock_config_indexed["subblock_config"]
parent_layer_indices = subblock_config_indexed["parent_layer_indices"]

layer_model_config = copy.deepcopy(model_config)
ModelDescriptor.truncate_pattern_for_subblock(
descriptor.get_language_model_config(layer_model_config), parent_layer_indices[0]
)

if is_calc_runtime:
total_runtime_ms = runtime_by_subblock_dict[subblock_config]
prefill_runtime_ms = None
Expand All @@ -168,17 +174,17 @@ def calculate_subblock_stats(
weights_dtype,
kv_cache_dtype,
allocate_prefill_query,
model_config=model_config,
model_config=layer_model_config,
descriptor=descriptor,
)
if not isinstance(subblock_memory, dict):
subblock_memory = {"memory_mib": subblock_memory, "kv_cache_memory_mib": 0.0}

subblock_params = calculate_subblock_params(model_config, subblock_config, descriptor)
subblock_params = calculate_subblock_params(layer_model_config, subblock_config, descriptor)
if moe_stats_file is not None:
subblock_active_params = calc_subblock_active_params(
subblock_config,
model_config,
layer_model_config,
descriptor,
n_embd,
moe_stats_file,
Expand Down
85 changes: 85 additions & 0 deletions tests/gpu/puzzletron/test_nemotron_h_gpu_validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""GPU validation for Nemotron-H hybrid model subblock parameter counting.

Requires HuggingFace Hub access to nvidia/NVIDIA-Nemotron-Nano-12B-v2-Base (config only,
no weights are downloaded) and mamba_ssm (CUDA).

Usage:
pytest -v -s -o addopts= tests/gpu/puzzletron/test_nemotron_h_gpu_validation.py
"""

import copy

import pytest

import modelopt.torch.puzzletron.anymodel.models.nemotron_h_v2.nemotron_h_v2_model_descriptor # noqa: F401
from modelopt.torch.puzzletron.anymodel.model_descriptor import (
ModelDescriptor,
ModelDescriptorFactory,
)
from modelopt.torch.puzzletron.block_config import FFNConfig
from modelopt.torch.puzzletron.subblock_stats.calc_subblock_params_and_memory import (
calculate_subblock_params,
)
from modelopt.torch.puzzletron.tools.checkpoint_utils import load_model_config

MODEL_ID = "nvidia/NVIDIA-Nemotron-Nano-12B-v2-Base"


@pytest.fixture
def nemotron_descriptor():
return ModelDescriptorFactory.get("nemotron_h_v2")


@pytest.fixture
def nemotron_config(nemotron_descriptor):
return load_model_config(
MODEL_ID, trust_remote_code=nemotron_descriptor.requires_trust_remote_code()
)


def test_ffn_variants_produce_distinct_params(nemotron_config, nemotron_descriptor):
"""FFN subblocks with different intermediate_size must report different param counts.

On hybrid models, hybrid_override_pattern must be truncated to match the subblock
type; otherwise a single-layer model always builds layer 0 (Mamba) and every FFN
variant reports identical param counts.
"""
lm_config = nemotron_descriptor.get_language_model_config(nemotron_config)
pattern = lm_config.hybrid_override_pattern.replace("|", "")
ffn_indices = [i for i, c in enumerate(pattern) if c in ("-", "E")]
assert ffn_indices, f"No FFN layers in pattern: {pattern}"

teacher_size = lm_config.intermediate_size
sizes = [teacher_size // 4, teacher_size // 2, teacher_size]

param_counts = {}
for size in sizes:
layer_config = copy.deepcopy(nemotron_config)
ModelDescriptor.truncate_pattern_for_subblock(
nemotron_descriptor.get_language_model_config(layer_config), ffn_indices[0]
)

params = calculate_subblock_params(
layer_config, FFNConfig(intermediate_size=size), nemotron_descriptor
)
param_counts[size] = params
print(f" intermediate_size={size:>8d} -> params={params:>12,}")

assert len(set(param_counts.values())) == len(sizes), (
f"Expected {len(sizes)} distinct param counts, got: {param_counts}"
)
8 changes: 4 additions & 4 deletions tests/gpu/torch/puzzletron/test_puzzletron.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,8 +325,8 @@ def _assert_mip_solutions(puzzle_dir: Path, hf_model_name: str):
"meta-llama/Llama-3.1-8B-Instruct": 395.63,
"meta-llama/Llama-3.2-3B-Instruct": 395.63,
"mistralai/Mistral-Small-24B-Instruct-2501": 395.63,
"nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-Base-BF16": 202.13,
"nvidia/NVIDIA-Nemotron-Nano-12B-v2": 202.13,
"nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-Base-BF16": 432.81,
"nvidia/NVIDIA-Nemotron-Nano-12B-v2": 197.63,
"openai/gpt-oss-20b": 437.33,
"Qwen/Qwen2.5-7B-Instruct": 386.25,
"Qwen/Qwen3-8B": 395.63,
Expand All @@ -339,8 +339,8 @@ def _assert_mip_solutions(puzzle_dir: Path, hf_model_name: str):
"meta-llama/Llama-3.1-8B-Instruct": 6096128,
"meta-llama/Llama-3.2-3B-Instruct": 6096128,
"mistralai/Mistral-Small-24B-Instruct-2501": 6096128,
"nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-Base-BF16": 5309184,
"nvidia/NVIDIA-Nemotron-Nano-12B-v2": 5309184,
"nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-Base-BF16": 126255872,
"nvidia/NVIDIA-Nemotron-Nano-12B-v2": 2949888,
"openai/gpt-oss-20b": 27959168,
"Qwen/Qwen2.5-7B-Instruct": 1181696,
"Qwen/Qwen3-8B": 6096640,
Expand Down
115 changes: 115 additions & 0 deletions tests/unit/torch/puzzletron/test_hybrid_pattern_truncation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Tests for ModelDescriptor.truncate_pattern_for_subblock.

Validates that the base descriptor method selects the correct pattern
character when building a 1-layer model for per-subblock param counting.
"""

from types import SimpleNamespace

import pytest

pytest.importorskip("transformers")

from modelopt.torch.puzzletron.anymodel.model_descriptor import ModelDescriptor

NEMOTRON_H_PATTERN = "M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M-"


class TestTruncatePatternForSubblock:
"""Test ModelDescriptor.truncate_pattern_for_subblock."""

@pytest.mark.parametrize(
("index", "expected"),
[
(0, "M"),
(1, "-"),
(7, "*"),
],
ids=["mamba", "ffn", "attention"],
)
def test_index_selects_correct_layer_type(self, index, expected):
"""Parent layer index selects the matching character from the pattern."""
cfg = _make_config()

ModelDescriptor.truncate_pattern_for_subblock(cfg, parent_layer_index=index)

assert cfg.hybrid_override_pattern == expected

@pytest.mark.parametrize(
("index", "expected"),
[
(1, "-"),
(2, "*"),
],
ids=["ffn_after_strip", "attention_after_strip"],
)
def test_pipe_separators_stripped_before_indexing(self, index, expected):
"""Pipe-delimited patterns like 'M|-|*' are normalised to 'M-*' before lookup."""
cfg = _make_config("M|-|*")

ModelDescriptor.truncate_pattern_for_subblock(cfg, parent_layer_index=index)

assert cfg.hybrid_override_pattern == expected

def test_missing_attribute_is_noop(self):
"""Config without hybrid_override_pattern is left unchanged."""
cfg = SimpleNamespace()

ModelDescriptor.truncate_pattern_for_subblock(cfg, parent_layer_index=0)

assert not hasattr(cfg, "hybrid_override_pattern")

def test_empty_pattern_is_noop(self):
"""Empty pattern string is left unchanged."""
cfg = _make_config("")

ModelDescriptor.truncate_pattern_for_subblock(cfg, parent_layer_index=0)

assert cfg.hybrid_override_pattern == ""

def test_pipes_only_pattern_raises(self):
"""Pattern with only pipe separators has no layer-type characters and should error."""
cfg = _make_config("|||")

with pytest.raises(ValueError, match="no layer-type characters"):
ModelDescriptor.truncate_pattern_for_subblock(cfg, parent_layer_index=0)

def test_none_index_defaults_to_first_char(self):
"""Without an explicit index, defaults to pattern[0]."""
cfg = _make_config("*-M")

ModelDescriptor.truncate_pattern_for_subblock(cfg)

assert cfg.hybrid_override_pattern == "*"

@pytest.mark.parametrize(
"index",
[999, -1],
ids=["above_range", "negative"],
)
def test_out_of_range_index_defaults_to_first_char(self, index):
"""Out-of-range index defaults to pattern[0]."""
cfg = _make_config("*-M")

ModelDescriptor.truncate_pattern_for_subblock(cfg, parent_layer_index=index)

assert cfg.hybrid_override_pattern == "*"


def _make_config(pattern=NEMOTRON_H_PATTERN):
return SimpleNamespace(hybrid_override_pattern=pattern)
Loading