polish

h-guo18 · h-guo18 · commit d9fe3d7440e6 · 2026-05-07T22:10:15.000Z
Signed-off-by: h-guo18 &lt;67671475+h-guo18@users.noreply.github.com&gt;
diff --git a/examples/speculative_decoding/main.py b/examples/speculative_decoding/main.py
@@ -151,21 +151,16 @@ def train():
             trust_remote_code=recipe.model.trust_remote_code,
         )
         if isinstance(recipe, ModelOptMedusaRecipe):
-            mtsp.convert(model, [("medusa", recipe.medusa.model_dump())])
+            medusa_cfg: dict = recipe.medusa.model_dump()
+            mtsp.convert(model, [("medusa", medusa_cfg)])
         elif isinstance(recipe, ModelOptEagleRecipe):
-            eagle_cfg = recipe.eagle.model_dump()
+            eagle_cfg: dict = recipe.eagle.model_dump()
             mtsp.convert(model, [("eagle", eagle_cfg)])
-
-            # Load draft vocab cache if the draft model uses a compressed vocabulary
-            if model.eagle_config.draft_vocab_size < model.eagle_config.vocab_size:
-                d2t = recipe.data.draft_vocab_cache
-                if d2t is None or not os.path.isfile(d2t):
-                    raise FileNotFoundError(f"Draft vocab cache provided but not found: {d2t}")
-                model.eagle_module.d2t = torch.load(d2t, weights_only=True)
-                print_rank_0(f"Loaded draft vocab cache from {d2t}.")
+            # Load draft vocab cache
+            mtsp.plugins.HFEagleModel.load_draft_vocab_cache(model, recipe.data.draft_vocab_cache)
         elif isinstance(recipe, ModelOptDFlashRecipe):
             # Re-validate with tokenizer to resolve dflash_mask_token_id and enforce its presence.
-            dflash_cfg = DFlashConfig.model_validate(
+            dflash_cfg: dict = DFlashConfig.model_validate(
                 recipe.dflash.model_dump(), context={"tokenizer": tokenizer}
             ).model_dump()
             mtsp.convert(model, [("dflash", dflash_cfg)])
diff --git a/modelopt/torch/speculative/plugins/hf_eagle.py b/modelopt/torch/speculative/plugins/hf_eagle.py
@@ -17,6 +17,7 @@
 
 import contextlib
 import copy
+import os
 from typing import Any
 
 import torch
@@ -25,6 +26,8 @@
 from transformers.models.llama.modeling_llama import LlamaDecoderLayer
 from transformers.utils import ModelOutput
 
+from modelopt.torch.utils import print_rank_0
+
 from ...export.plugins.hf_spec_export import EagleExporter, SpeculativeDecodingExporter
 from ..eagle.conversion import EagleDMRegistry
 from ..eagle.eagle_model import EagleModel
@@ -88,7 +91,7 @@ def _nvtx_range(self, name):
 
             return nvtx.range(name)
         except Exception as e:
-            print(f"Failed to create NVTX range {name}: {e}")
+            print_rank_0(f"Failed to create NVTX range {name}: {e}")
             return contextlib.nullcontext()
 
     def _find_base_model_parts(self):
@@ -105,7 +108,7 @@ def _find_base_model_parts(self):
                 try:
                     submodule = self.get_submodule(path)
                     assert isinstance(submodule, torch.nn.Module)
-                    print(f"Found {name} at {path}")
+                    print_rank_0(f"Found {name} at {path}")
                     found_submodule = True
                     setattr(self, name, path)
                     break
@@ -128,7 +131,7 @@ def _activate_torch_compile(self):
             try:
                 setattr(self, name, torch.compile(getattr(self, name), dynamic=False, **kwargs))
             except Exception:  # noqa: PERF203
-                print(f"Disabling torch.compile for {name} due to compilation error.")
+                print_rank_0(f"Disabling torch.compile for {name} due to compilation error.")
 
     def get_dummy_inputs(self) -> dict:
         """Construct dummy inputs for export forward pass."""
@@ -250,6 +253,16 @@ def _preservation_loss(
         )
         return -loss.sum(dim=-1).mean() * self.eagle_base_lora_preservation_loss_weight
 
+    @staticmethod
+    def load_draft_vocab_cache(model, d2t_path: str) -> None:
+        """Load the draft vocab cache from the given path."""
+        if d2t_path is None or model.eagle_config.draft_vocab_size >= model.eagle_config.vocab_size:
+            return
+        if not os.path.isfile(d2t_path):
+            raise FileNotFoundError(f"Draft vocab cache provided but not found: {d2t_path}")
+        model.eagle_module.d2t = torch.load(d2t_path, weights_only=True)
+        print_rank_0(f"Loaded draft vocab cache from {d2t_path}.")
+
     def modify(
         self,
         config,