NVIDIA
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.pre-commit-config.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/speculative_decoding/eagle_utils.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/speculative_decoding/eagle_utils.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/speculative_decoding/scripts/ar_validate.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/speculative_decoding/scripts/ar_validate.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎modelopt/torch/export/plugins/hf_spec_export.py‎
Lines changed: 2 additions & 2 deletions b/‎modelopt/torch/export/plugins/hf_spec_export.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎modelopt/torch/speculative/eagle/default_config.py‎
Lines changed: 0 additions & 4 deletions b/‎modelopt/torch/speculative/eagle/default_config.py‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎modelopt/torch/speculative/plugins/__init__.py‎
Lines changed: 3 additions & 2 deletions b/‎modelopt/torch/speculative/plugins/__init__.py‎
Lines changed: 3 additions & 2 deletions
@@ -99,7 +99,7 @@ repos:
               modelopt/torch/quantization/plugins/attention.py|
               modelopt/torch/sparsity/attention_sparsity/methods/vsa_utils.py|
               modelopt/torch/speculative/eagle/utils.py|
-              modelopt/torch/speculative/plugins/transformers.py|
+              modelopt/torch/speculative/plugins/hf_medusa.py|
               modelopt/torch/utils/plugins/megatron_mmlu.py|
               examples/chained_optimizations/bert_prune_distill_quantize.py|
               examples/deepseek/quantize_to_nvfp4.py|
 
@@ -358,7 +358,7 @@ def patched_templated_attn(*args, **kwargs):
         original_op = args[2]
 
         # This patch is only enabled for eagle model by context manager, not base model.
-        patch_enbabled = modelopt.torch.speculative.plugins.transformers.ENABLE_CP_TTT_PATCH
+        patch_enbabled = modelopt.torch.speculative.plugins.hf_eagle.ENABLE_CP_TTT_PATCH
 
         if patch_enbabled and original_op != torch.ops.aten._scaled_dot_product_cudnn_attention:
             raise ValueError(f"CP TTT only supports cudnn attention now. Got: {original_op}")
 
@@ -27,7 +27,7 @@
 from transformers import AutoTokenizer
 
 import modelopt.torch.opt as mto
-from modelopt.torch.speculative.plugins.transformers import HFARValidation
+from modelopt.torch.speculative.plugins.hf_eagle import HFARValidation
 from modelopt.torch.speculative.utils import load_vlm_or_llm
 
 mto.enable_huggingface_checkpointing()
 
@@ -171,8 +171,8 @@ def _export_config(self):
         template_config = deepcopy(template_config)
 
         def _get_config_from_draft_or_base(key: str, model: nn.Module):
-            if getattr(model._draft_model_config, key, None) is not None:
-                return getattr(model._draft_model_config, key)
+            if getattr(model.eagle_config, key, None) is not None:
+                return getattr(model.eagle_config, key)
             elif getattr(model.config, key, None) is not None:
                 return getattr(model.config, key)
             else:
 
@@ -37,8 +37,6 @@
     "use_aux_hidden_state": False,
     "eagle_aux_hidden_state_layer_ids": [],
     "use_mtp_layernorm": False,
-    "parallel_draft_step": 1,
-    "parallel_draft_heads_num_layers": 1,
     "has_lm_head": False,
     "head_dim": 128,
 }
@@ -107,7 +105,5 @@
     "use_aux_hidden_state": True,
     "eagle_aux_hidden_state_layer_ids": [],
     "use_mtp_layernorm": False,
-    "parallel_draft_step": 1,
-    "parallel_draft_heads_num_layers": 1,
     "has_lm_head": False,
 }
@@ -18,7 +18,7 @@
 Please check out the source code of this module for examples of how plugins work and how you can
 write your own one. Currently, we support plugins for
 
-- :meth:`transformers<modelopt.torch.speculative.plugins.transformers>`
+- :meth:`hf_eagle<modelopt.torch.speculative.plugins.hf_eagle>`
 """
 
 from modelopt.torch.utils import import_plugin
@@ -31,4 +31,5 @@
 
 with import_plugin("transformers"):
     from .hf_dflash import *
-    from .transformers import *
+    from .hf_eagle import *
+    from .hf_medusa import *