NVIDIA
diff --git a/‎.github/CODEOWNERS‎
Lines changed: 1 addition & 0 deletions b/‎.github/CODEOWNERS‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source/conf.py‎
Lines changed: 9 additions & 0 deletions b/‎docs/source/conf.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎examples/llm_eval/lm_eval_hf.py‎
Lines changed: 3 additions & 7 deletions b/‎examples/llm_eval/lm_eval_hf.py‎
Lines changed: 3 additions & 7 deletions
diff --git a/‎examples/puzzletron/evaluation/hf_deployable_anymodel.py‎
Lines changed: 4 additions & 7 deletions b/‎examples/puzzletron/evaluation/hf_deployable_anymodel.py‎
Lines changed: 4 additions & 7 deletions
diff --git a/‎examples/puzzletron/main.py‎
Lines changed: 13 additions & 20 deletions b/‎examples/puzzletron/main.py‎
Lines changed: 13 additions & 20 deletions
diff --git a/‎examples/speculative_decoding/collect_hidden_states/compute_hidden_states_hf.py‎
Lines changed: 2 additions & 1 deletion b/‎examples/speculative_decoding/collect_hidden_states/compute_hidden_states_hf.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎modelopt/torch/prune/importance_hooks/expert_removal_hooks.py‎
Lines changed: 2 additions & 0 deletions b/‎modelopt/torch/prune/importance_hooks/expert_removal_hooks.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎modelopt/torch/puzzletron/__init__.py‎
Lines changed: 19 additions & 0 deletions b/‎modelopt/torch/puzzletron/__init__.py‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎modelopt/torch/puzzletron/activation_scoring/__init__.py‎
Lines changed: 17 additions & 0 deletions b/‎modelopt/torch/puzzletron/activation_scoring/__init__.py‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎modelopt/torch/puzzletron/activation_scoring/activation_hooks/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎modelopt/torch/puzzletron/activation_scoring/activation_hooks/__init__.py‎
Lines changed: 1 addition & 0 deletions
@@ -50,6 +50,7 @@ modelopt_recipes @NVIDIA/modelopt-recipes-codeowners
 /examples/model_hub @NVIDIA/modelopt-examples-model_hub-codeowners
 /examples/onnx_ptq @NVIDIA/modelopt-onnx-codeowners
 /examples/pruning @NVIDIA/modelopt-torch-nas-prune-codeowners
+/examples/puzzletron @NVIDIA/modelopt-torch-puzzletron-codeowners
 /examples/specdec_bench @NVIDIA/modelopt-torch-speculative-codeowners
 /examples/speculative_decoding @NVIDIA/modelopt-torch-speculative-codeowners
 /examples/torch_onnx @NVIDIA/modelopt-onnx-codeowners
 
@@ -31,6 +31,7 @@
 # import sys
 # sys.path.insert(0, os.path.abspath('.'))
 
+import contextlib
 import os
 import sys
 
@@ -44,6 +45,14 @@
 sys.path.insert(0, os.path.abspath("../../"))
 sys.path.append(os.path.abspath("./_ext"))
 
+# Pre-import modelopt.torch so it is cached in sys.modules before Sphinx applies
+# autodoc_mock_imports.  Mocking triton/tensorrt_llm at the Sphinx level can break
+# transitive imports (transformers, transformer_engine, …) and cause modelopt.torch
+# to fail inside autosummary.  Importing here — while the real packages are still on
+# sys.path — avoids that problem entirely.
+with contextlib.suppress(Exception):
+    import modelopt.torch  # noqa: F401
+
 # -- Project information -----------------------------------------------------
 
 project = "Model Optimizer"  # pylint: disable=C0103
 
@@ -52,11 +52,7 @@
 from modelopt.torch.sparsity.attention_sparsity.conversion import is_attn_sparsified
 
 try:
-    import modelopt.torch.puzzletron.anymodel.models  # noqa: F401
-    from modelopt.torch.puzzletron.anymodel.model_descriptor.model_descriptor_factory import (
-        resolve_descriptor_from_pretrained,
-    )
-    from modelopt.torch.puzzletron.anymodel.puzzformer import deci_x_patcher
+    import modelopt.torch.puzzletron as mtpz
 
     _ANYMODEL_AVAILABLE = True
 except ImportError:
@@ -68,12 +64,12 @@ def _anymodel_patcher_context(pretrained, trust_remote_code=False):
     if not _ANYMODEL_AVAILABLE or not pretrained:
         return contextlib.nullcontext()
     try:
-        descriptor = resolve_descriptor_from_pretrained(
+        descriptor = mtpz.anymodel.resolve_descriptor_from_pretrained(
             pretrained, trust_remote_code=trust_remote_code
         )
     except (ValueError, AttributeError):
         return contextlib.nullcontext()
-    return deci_x_patcher(model_descriptor=descriptor)
+    return mtpz.anymodel.deci_x_patcher(model_descriptor=descriptor)
 
 
 def create_from_arg_obj(cls: type[T], arg_dict: dict, additional_config: dict | None = None) -> T:
 
@@ -31,10 +31,7 @@
 from peft import PeftModel
 from transformers import AutoModel, AutoModelForCausalLM, AutoTokenizer
 
-from modelopt.torch.puzzletron.anymodel.model_descriptor.model_descriptor_factory import (
-    resolve_descriptor_from_pretrained,
-)
-from modelopt.torch.puzzletron.anymodel.puzzformer import deci_x_patcher
+import modelopt.torch.puzzletron as mtpz
 
 try:
     from pytriton.decorators import batch
@@ -145,14 +142,14 @@ def _load(
             # =========================================================================
             # BEGIN ANYMODEL PATCH
             # Wraps model loading with deci_x_patcher for heterogeneous layer configs.
-            # See: modelopt/torch/puzzletron/anymodel/puzzformer/utils.py
+            # See: modelopt/torch/puzzletron/anymodel/puzzformer/patcher.py
             # =========================================================================
 
-            descriptor = resolve_descriptor_from_pretrained(
+            descriptor = mtpz.anymodel.resolve_descriptor_from_pretrained(
                 self.hf_model_id_path, trust_remote_code=hf_kwargs.get("trust_remote_code", False)
             )
 
-            with deci_x_patcher(model_descriptor=descriptor):
+            with mtpz.anymodel.deci_x_patcher(model_descriptor=descriptor):
                 self.model = AutoModelForCausalLM.from_pretrained(
                     self.hf_model_id_path,
                     torch_dtype=torch_dtype,
 
@@ -37,15 +37,8 @@
 from pathlib import Path
 
 import modelopt.torch.nas as mtn
-import modelopt.torch.puzzletron.mip.mip_and_realize_models as mip_and_realize_models
-import modelopt.torch.puzzletron.mip.sweep as sweep
+import modelopt.torch.puzzletron as mtpz
 import modelopt.torch.utils.distributed as dist
-from modelopt.torch.puzzletron.nas.plugins.puzzletron_nas_plugin import PuzzletronModel
-from modelopt.torch.puzzletron.tools.hydra_utils import (
-    initialize_hydra_config_for_dir,
-    register_hydra_resolvers,
-)
-from modelopt.torch.puzzletron.tools.logger import mprint
 
 
 def parse_args():
@@ -74,26 +67,26 @@ def run_full_puzzletron(hydra_config_path: str):
     Args:
         config_path: Path to the YAML configuration file
     """
-    mprint("Puzzletron Progress 1/8: starting puzzletron pipeline")
+    mtpz.tools.mprint("Puzzletron Progress 1/8: starting puzzletron pipeline")
     dist.setup(timeout=timedelta(minutes=10))
 
     # Register Hydra custom resolvers (needed for config resolution)
-    register_hydra_resolvers()
+    mtpz.tools.register_hydra_resolvers()
 
     hydra_config_path = Path(hydra_config_path).resolve()
     hydra_config_dir = str(hydra_config_path.parent)
     hydra_config_name = hydra_config_path.stem
 
     # Load hydra config
-    hydra_cfg = initialize_hydra_config_for_dir(
+    hydra_cfg = mtpz.tools.initialize_hydra_config_for_dir(
         config_dir=hydra_config_dir,
         config_name=hydra_config_name,
         overrides=[],
     )
 
     # Convert model (convert from HF to DeciLM, score pruning activations,
     # prune the model and save pruned checkpoints)
-    input_model = PuzzletronModel()
+    input_model = mtpz.puzzletron_nas_plugin.PuzzletronModel()
     converted_model = mtn.convert(
         input_model,
         mode=[
@@ -120,7 +113,7 @@ def run_full_puzzletron(hydra_config_path: str):
     )
 
     dist.cleanup()
-    mprint("Puzzletron Progress 8/8: puzzletron pipeline completed (multi-gpu)")
+    mtpz.tools.mprint("Puzzletron Progress 8/8: puzzletron pipeline completed (multi-gpu)")
 
 
 def run_mip_only(hydra_config_path: str):
@@ -135,33 +128,33 @@ def run_mip_only(hydra_config_path: str):
     dist.setup(timeout=timedelta(minutes=10))
 
     # Register Hydra custom resolvers (needed for config resolution)
-    register_hydra_resolvers()
+    mtpz.tools.register_hydra_resolvers()
 
     hydra_config_path = Path(hydra_config_path).resolve()
     hydra_config_dir = str(hydra_config_path.parent)
     hydra_config_name = hydra_config_path.stem
 
     # Load hydra config
-    hydra_cfg = initialize_hydra_config_for_dir(
+    hydra_cfg = mtpz.tools.initialize_hydra_config_for_dir(
         config_dir=hydra_config_dir,
         config_name=hydra_config_name,
         overrides=[],
     )
 
     # Check if sweep mode is enabled
     if hasattr(hydra_cfg.mip, "sweep") and hydra_cfg.mip.sweep.get("enabled", False):
-        mprint(
+        mtpz.tools.mprint(
             "Puzzletron Progress 7/8: running MIP sweep for multiple compression rates (multi-gpu)"
         )
-        sweep.run_mip_sweep(hydra_cfg)
+        mtpz.mip.run_mip_sweep(hydra_cfg)
     else:
         # mip_and_realize_models (distributed processing)
         # TODO: How to make it part of mnt.search() api, similarly to run_full_puzzletron() API
-        mprint("Puzzletron Progress 7/8: running MIP and realizing models (multi-gpu)")
-        mip_and_realize_models.launch_mip_and_realize_model(hydra_cfg)
+        mtpz.tools.mprint("Puzzletron Progress 7/8: running MIP and realizing models (multi-gpu)")
+        mtpz.mip.launch_mip_and_realize_model(hydra_cfg)
 
     dist.cleanup()
-    mprint("Puzzletron Progress 8/8: puzzletron pipeline completed (multi-gpu)")
+    mtpz.tools.mprint("Puzzletron Progress 8/8: puzzletron pipeline completed (multi-gpu)")
 
 
 def main():
 
@@ -142,7 +142,8 @@ def keep_conversation(entry):
     tokenizer = AutoTokenizer.from_pretrained(args.model, trust_remote_code=args.trust_remote_code)
     if tokenizer.pad_token is None:
         tokenizer.pad_token = tokenizer.eos_token
-    tokenizer.chat_template = tokenizer.chat_template.replace(REMOVE_THINK_CHAT_TEMPLATE, "")
+    if tokenizer.chat_template is not None:
+        tokenizer.chat_template = tokenizer.chat_template.replace(REMOVE_THINK_CHAT_TEMPLATE, "")
 
     output_dir = args.output_dir
     output_dir.mkdir(parents=True, exist_ok=True)
 
@@ -27,6 +27,8 @@
 from .base_hooks import ForwardHook
 
 if TYPE_CHECKING:
+    # Okay since this is only used for type hints else we should not import puzzletron here
+    # as its dependencies may not be installed
     from modelopt.torch.puzzletron.block_config import BlockConfig
 
 __all__ = [
 
@@ -13,3 +13,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# NOTE: Some modules also trigger factory registration as side effect
+from . import (
+    activation_scoring,
+    anymodel,
+    block_config,
+    build_library_and_stats,
+    dataset,
+    entrypoint,
+    mip,
+    plugins,
+    pruning,
+    puzzletron_nas_plugin,
+    replacement_library,
+    scoring,
+    sewing_kit,
+    subblock_stats,
+    tools,
+    utils,
+)
@@ -0,0 +1,17 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .activation_hooks import *
+from .score_pruning_activations import *
@@ -13,3 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from .utils import *
Original file line number	Diff line number	Diff line change
`@@ -13,3 +13,4 @@`
`13`	`13`	`# See the License for the specific language governing permissions and`
`14`	`14`	`# limitations under the License.`
`15`	`15`
	`16`	`+from .utils import *`