NVIDIA · kevalmorabia97 · Apr 15, 2026 · Apr 9, 2026 · Apr 14, 2026 · Apr 14, 2026
diff --git a/modelopt/torch/puzzletron/anymodel/model_descriptor/base.py b/modelopt/torch/puzzletron/anymodel/model_descriptor/base.py
@@ -179,6 +179,31 @@ def get_language_model_config(config):
         """
         return config
 
+    @staticmethod
+    def truncate_pattern_for_subblock(
+        lm_config: Any, parent_layer_index: int | None = None
+    ) -> None:
+        """Adjust per-layer config fields so a single-layer model represents the correct layer type.
+
+        The default implementation handles ``hybrid_override_pattern`` for
+        hybrid architectures.  It is a no-op when the field is absent.
+        Override if a model uses a different pattern alphabet.
+        """
+        pattern = getattr(lm_config, "hybrid_override_pattern", None)
+        if not pattern:
+            return
+        # Strip cosmetic pipe separators (e.g. "M|-|*" -> "M-*") before indexing.
+        pattern = pattern.replace("|", "")
+        if not pattern:
+            raise ValueError(
+                f"hybrid_override_pattern is set but contains no layer-type characters "
+                f"(original: {lm_config.hybrid_override_pattern!r})"
+            )
+        if parent_layer_index is not None and 0 <= parent_layer_index < len(pattern):
+            lm_config.hybrid_override_pattern = pattern[parent_layer_index]
+            return
+        lm_config.hybrid_override_pattern = pattern[0]
+
     @classmethod
     def create_dummy_block(cls, original_layer: nn.Module, block_index: int) -> nn.Module:
         """Create a dummy block to replace a layer for sharded model initialization."""

diff --git a/modelopt/torch/puzzletron/entrypoint.py b/modelopt/torch/puzzletron/entrypoint.py
@@ -65,15 +65,15 @@ def puzzletron(
         launch_prune_ckpt(hydra_cfg)
     dist.barrier()
 
-    # Step 4: build_library_and_stats (single process)
+    # Step 3: build_library_and_stats (single process)
     if dist.is_master():
         launch_build_library_and_stats(hydra_cfg)
     dist.barrier()
 
-    # Step 5: calc_one_block_scores (distributed processing)
+    # Step 4: calc_one_block_scores (distributed processing)
     launch_scoring(hydra_cfg)
 
-    # Step 6: mip_and_realize_models (distributed processing)
+    # Step 5: mip_and_realize_models (distributed processing)
     launch_mip_and_realize_model(hydra_cfg)
 
     return hydra_cfg
diff --git a/modelopt/torch/puzzletron/subblock_stats/calc_subblock_params_and_memory.py b/modelopt/torch/puzzletron/subblock_stats/calc_subblock_params_and_memory.py
@@ -118,7 +118,12 @@ def calculate_subblock_params(
     layer_config: BlockConfig | FFNConfig | AttentionConfig,
     descriptor: Type[ModelDescriptor],
 ) -> int:
-    """Count parameters on one meta decoder layer."""
+    """Count parameters on one meta decoder layer.
+
+    The caller is responsible for adjusting per-layer config fields (e.g.
+    ``hybrid_override_pattern``) before passing ``config``; see
+    ``ModelDescriptor.truncate_pattern_for_subblock``.
+    """
     if isinstance(layer_config, FFNConfig):
         block_config = layer_config.to_blockconfig()
     elif isinstance(layer_config, AttentionConfig):

diff --git a/modelopt/torch/puzzletron/subblock_stats/calc_subblock_stats.py b/modelopt/torch/puzzletron/subblock_stats/calc_subblock_stats.py
@@ -16,6 +16,7 @@
 
 """Calc subblock stats to compute memory and runtime statistics for subblocks."""
 
+import copy
 import dataclasses
 import json
 import os
@@ -150,6 +151,11 @@ def calculate_subblock_stats(
         subblock_config = subblock_config_indexed["subblock_config"]
         parent_layer_indices = subblock_config_indexed["parent_layer_indices"]
 
+        layer_model_config = copy.deepcopy(model_config)
+        ModelDescriptor.truncate_pattern_for_subblock(
+            descriptor.get_language_model_config(layer_model_config), parent_layer_indices[0]
+        )
+
         if is_calc_runtime:
             total_runtime_ms = runtime_by_subblock_dict[subblock_config]
             prefill_runtime_ms = None
@@ -168,17 +174,17 @@ def calculate_subblock_stats(
             weights_dtype,
             kv_cache_dtype,
             allocate_prefill_query,
-            model_config=model_config,
+            model_config=layer_model_config,
             descriptor=descriptor,
         )
         if not isinstance(subblock_memory, dict):
             subblock_memory = {"memory_mib": subblock_memory, "kv_cache_memory_mib": 0.0}
 
-        subblock_params = calculate_subblock_params(model_config, subblock_config, descriptor)
+        subblock_params = calculate_subblock_params(layer_model_config, subblock_config, descriptor)
         if moe_stats_file is not None:
             subblock_active_params = calc_subblock_active_params(
                 subblock_config,
-                model_config,
+                layer_model_config,
                 descriptor,
                 n_embd,
                 moe_stats_file,

diff --git a/tests/gpu/puzzletron/test_nemotron_h_gpu_validation.py b/tests/gpu/puzzletron/test_nemotron_h_gpu_validation.py
@@ -0,0 +1,85 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""GPU validation for Nemotron-H hybrid model subblock parameter counting.
+
+Requires HuggingFace Hub access to nvidia/NVIDIA-Nemotron-Nano-12B-v2-Base (config only,
+no weights are downloaded) and mamba_ssm (CUDA).
+
+Usage:
+    pytest -v -s -o addopts= tests/gpu/puzzletron/test_nemotron_h_gpu_validation.py
+"""
+
+import copy
+
+import pytest
+
+import modelopt.torch.puzzletron.anymodel.models.nemotron_h_v2.nemotron_h_v2_model_descriptor  # noqa: F401
+from modelopt.torch.puzzletron.anymodel.model_descriptor import (
+    ModelDescriptor,
+    ModelDescriptorFactory,
+)
+from modelopt.torch.puzzletron.block_config import FFNConfig
+from modelopt.torch.puzzletron.subblock_stats.calc_subblock_params_and_memory import (
+    calculate_subblock_params,
+)
+from modelopt.torch.puzzletron.tools.checkpoint_utils import load_model_config
+
+MODEL_ID = "nvidia/NVIDIA-Nemotron-Nano-12B-v2-Base"
+
+
+@pytest.fixture
+def nemotron_descriptor():
+    return ModelDescriptorFactory.get("nemotron_h_v2")
+
+
+@pytest.fixture
+def nemotron_config(nemotron_descriptor):
+    return load_model_config(
+        MODEL_ID, trust_remote_code=nemotron_descriptor.requires_trust_remote_code()
+    )
+
+
+def test_ffn_variants_produce_distinct_params(nemotron_config, nemotron_descriptor):
+    """FFN subblocks with different intermediate_size must report different param counts.
+
+    On hybrid models, hybrid_override_pattern must be truncated to match the subblock
+    type; otherwise a single-layer model always builds layer 0 (Mamba) and every FFN
+    variant reports identical param counts.
+    """
+    lm_config = nemotron_descriptor.get_language_model_config(nemotron_config)
+    pattern = lm_config.hybrid_override_pattern.replace("|", "")
+    ffn_indices = [i for i, c in enumerate(pattern) if c in ("-", "E")]
+    assert ffn_indices, f"No FFN layers in pattern: {pattern}"
+
+    teacher_size = lm_config.intermediate_size
+    sizes = [teacher_size // 4, teacher_size // 2, teacher_size]
+
+    param_counts = {}
+    for size in sizes:
+        layer_config = copy.deepcopy(nemotron_config)
+        ModelDescriptor.truncate_pattern_for_subblock(
+            nemotron_descriptor.get_language_model_config(layer_config), ffn_indices[0]
+        )
+
+        params = calculate_subblock_params(
+            layer_config, FFNConfig(intermediate_size=size), nemotron_descriptor
+        )
+        param_counts[size] = params
+        print(f"  intermediate_size={size:>8d} -> params={params:>12,}")
+
+    assert len(set(param_counts.values())) == len(sizes), (
+        f"Expected {len(sizes)} distinct param counts, got: {param_counts}"
+    )
diff --git a/tests/gpu/torch/puzzletron/test_puzzletron.py b/tests/gpu/torch/puzzletron/test_puzzletron.py
@@ -325,8 +325,8 @@ def _assert_mip_solutions(puzzle_dir: Path, hf_model_name: str):
     "meta-llama/Llama-3.1-8B-Instruct": 395.63,
     "meta-llama/Llama-3.2-3B-Instruct": 395.63,
     "mistralai/Mistral-Small-24B-Instruct-2501": 395.63,
-    "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-Base-BF16": 202.13,
-    "nvidia/NVIDIA-Nemotron-Nano-12B-v2": 202.13,
+    "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-Base-BF16": 432.81,
+    "nvidia/NVIDIA-Nemotron-Nano-12B-v2": 197.63,
     "openai/gpt-oss-20b": 437.33,
     "Qwen/Qwen2.5-7B-Instruct": 386.25,
     "Qwen/Qwen3-8B": 395.63,
@@ -339,8 +339,8 @@ def _assert_mip_solutions(puzzle_dir: Path, hf_model_name: str):
     "meta-llama/Llama-3.1-8B-Instruct": 6096128,
     "meta-llama/Llama-3.2-3B-Instruct": 6096128,
     "mistralai/Mistral-Small-24B-Instruct-2501": 6096128,
-    "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-Base-BF16": 5309184,
-    "nvidia/NVIDIA-Nemotron-Nano-12B-v2": 5309184,
+    "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-Base-BF16": 126255872,
+    "nvidia/NVIDIA-Nemotron-Nano-12B-v2": 2949888,
     "openai/gpt-oss-20b": 27959168,
     "Qwen/Qwen2.5-7B-Instruct": 1181696,
     "Qwen/Qwen3-8B": 6096640,

diff --git a/tests/unit/torch/puzzletron/test_hybrid_pattern_truncation.py b/tests/unit/torch/puzzletron/test_hybrid_pattern_truncation.py
@@ -0,0 +1,115 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for ModelDescriptor.truncate_pattern_for_subblock.
+
+Validates that the base descriptor method selects the correct pattern
+character when building a 1-layer model for per-subblock param counting.
+"""
+
+from types import SimpleNamespace
+
+import pytest
+
+pytest.importorskip("transformers")
+
+from modelopt.torch.puzzletron.anymodel.model_descriptor import ModelDescriptor
+
+NEMOTRON_H_PATTERN = "M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M-"
+
+
+class TestTruncatePatternForSubblock:
+    """Test ModelDescriptor.truncate_pattern_for_subblock."""
+
+    @pytest.mark.parametrize(
+        ("index", "expected"),
+        [
+            (0, "M"),
+            (1, "-"),
+            (7, "*"),
+        ],
+        ids=["mamba", "ffn", "attention"],
+    )
+    def test_index_selects_correct_layer_type(self, index, expected):
+        """Parent layer index selects the matching character from the pattern."""
+        cfg = _make_config()
+
+        ModelDescriptor.truncate_pattern_for_subblock(cfg, parent_layer_index=index)
+
+        assert cfg.hybrid_override_pattern == expected
+
+    @pytest.mark.parametrize(
+        ("index", "expected"),
+        [
+            (1, "-"),
+            (2, "*"),
+        ],
+        ids=["ffn_after_strip", "attention_after_strip"],
+    )
+    def test_pipe_separators_stripped_before_indexing(self, index, expected):
+        """Pipe-delimited patterns like 'M|-|*' are normalised to 'M-*' before lookup."""
+        cfg = _make_config("M|-|*")
+
+        ModelDescriptor.truncate_pattern_for_subblock(cfg, parent_layer_index=index)
+
+        assert cfg.hybrid_override_pattern == expected
+
+    def test_missing_attribute_is_noop(self):
+        """Config without hybrid_override_pattern is left unchanged."""
+        cfg = SimpleNamespace()
+
+        ModelDescriptor.truncate_pattern_for_subblock(cfg, parent_layer_index=0)
+
+        assert not hasattr(cfg, "hybrid_override_pattern")
+
+    def test_empty_pattern_is_noop(self):
+        """Empty pattern string is left unchanged."""
+        cfg = _make_config("")
+
+        ModelDescriptor.truncate_pattern_for_subblock(cfg, parent_layer_index=0)
+
+        assert cfg.hybrid_override_pattern == ""
+
+    def test_pipes_only_pattern_raises(self):
+        """Pattern with only pipe separators has no layer-type characters and should error."""
+        cfg = _make_config("|||")
+
+        with pytest.raises(ValueError, match="no layer-type characters"):
+            ModelDescriptor.truncate_pattern_for_subblock(cfg, parent_layer_index=0)
+
+    def test_none_index_defaults_to_first_char(self):
+        """Without an explicit index, defaults to pattern[0]."""
+        cfg = _make_config("*-M")
+
+        ModelDescriptor.truncate_pattern_for_subblock(cfg)
+
+        assert cfg.hybrid_override_pattern == "*"
+
+    @pytest.mark.parametrize(
+        "index",
+        [999, -1],
+        ids=["above_range", "negative"],
+    )
+    def test_out_of_range_index_defaults_to_first_char(self, index):
+        """Out-of-range index defaults to pattern[0]."""
+        cfg = _make_config("*-M")
+
+        ModelDescriptor.truncate_pattern_for_subblock(cfg, parent_layer_index=index)
+
+        assert cfg.hybrid_override_pattern == "*"
+
+
+def _make_config(pattern=NEMOTRON_H_PATTERN):
+    return SimpleNamespace(hybrid_override_pattern=pattern)