polish

h-guo18 · h-guo18 · commit 8719802c2aab · 2026-05-07T22:10:16.000Z
Signed-off-by: h-guo18 &lt;67671475+h-guo18@users.noreply.github.com&gt;
diff --git a/examples/speculative_decoding/main.py b/examples/speculative_decoding/main.py
@@ -49,7 +49,6 @@
 import modelopt.torch.speculative as mtsp
 from modelopt.recipe import load_recipe
 from modelopt.recipe.config import ModelOptDFlashRecipe, ModelOptEagleRecipe, ModelOptMedusaRecipe
-from modelopt.torch.speculative.config import DFlashConfig
 from modelopt.torch.speculative.utils import load_vlm_or_llm, patch_transformers5_params_loading
 from modelopt.torch.utils import print_rank_0
 from modelopt.torch.utils.distributed import is_master
@@ -159,10 +158,15 @@ def train():
             # Load draft vocab cache
             mtsp.plugins.HFEagleModel.load_draft_vocab_cache(model, recipe.data.draft_vocab_cache)
         elif isinstance(recipe, ModelOptDFlashRecipe):
-            # Re-validate with tokenizer to resolve dflash_mask_token_id and enforce its presence.
-            dflash_cfg: dict = DFlashConfig.model_validate(
-                recipe.dflash.model_dump(), context={"tokenizer": tokenizer}
-            ).model_dump()
+            # Fall back to tokenizer.mask_token_id when not set in the recipe; require one of the two.
+            if recipe.dflash.dflash_mask_token_id is None:
+                recipe.dflash.dflash_mask_token_id = getattr(tokenizer, "mask_token_id", None)
+            if recipe.dflash.dflash_mask_token_id is None:
+                raise ValueError(
+                    "dflash.dflash_mask_token_id is required: set it in the recipe YAML "
+                    "or use a tokenizer that defines mask_token_id."
+                )
+            dflash_cfg: dict = recipe.dflash.model_dump()
             mtsp.convert(model, [("dflash", dflash_cfg)])
         else:
             raise ValueError(f"Unsupported speculative recipe type: {type(recipe).__name__}")
diff --git a/modelopt/torch/speculative/config.py b/modelopt/torch/speculative/config.py
@@ -16,9 +16,8 @@
 """Configurations for speculative decoding modes."""
 
 from copy import deepcopy
-from typing import Any
 
-from pydantic import ValidationInfo, model_validator
+from pydantic import model_validator
 
 from modelopt.torch.opt.config import ModeloptBaseConfig, ModeloptField
 
@@ -102,10 +101,12 @@ class DFlashConfig(ModeloptBaseConfig):
         default=True, description="Whether to report eval accuracy."
     )
 
-    dflash_mask_token_id: int = ModeloptField(
+    dflash_mask_token_id: int | None = ModeloptField(
         default=None,
-        description="Token ID used for masked (unknown) positions. "
-        "Set explicitly or auto-detected from tokenizer.mask_token_id in main.py.",
+        description=(
+            "Token ID used for masked (unknown) positions. Set explicitly in the recipe YAML, "
+            "or left unset to fall back to ``tokenizer.mask_token_id`` at training time."
+        ),
     )
 
     dflash_architecture_config: dict = ModeloptField(
@@ -117,37 +118,6 @@ class DFlashConfig(ModeloptBaseConfig):
         description="Whether to use torch.compile on DFlash forward/loss methods.",
     )
 
-    @model_validator(mode="before")
-    @classmethod
-    def _resolve_mask_token_id(cls, data: Any, info: ValidationInfo) -> Any:
-        """Auto-detect ``dflash_mask_token_id`` from tokenizer when provided in context."""
-        if not isinstance(data, dict) or data.get("dflash_mask_token_id") is not None:
-            return data
-        ctx = info.context if info.context else {}
-        tokenizer = ctx.get("tokenizer")
-        if tokenizer is not None and getattr(tokenizer, "mask_token_id", None) is not None:
-            data["dflash_mask_token_id"] = tokenizer.mask_token_id
-        return data
-
-    @model_validator(mode="after")
-    def _check_mask_token_id(self, info: ValidationInfo) -> "DFlashConfig":
-        """Require ``dflash_mask_token_id`` once a tokenizer is available.
-
-        Skipped when no tokenizer is in context (e.g., recipe-load time before the tokenizer
-        is constructed). The caller is expected to re-validate with ``context={"tokenizer": ...}``
-        once the tokenizer is loaded; that pass enforces the requirement.
-        """
-        ctx = info.context if info.context else {}
-        if ctx.get("tokenizer") is None:
-            return self
-        if self.dflash_mask_token_id is None:
-            raise ValueError(
-                "dflash_mask_token_id is required. Set it in the config YAML "
-                "(dflash.dflash_mask_token_id=TOKEN_ID) or ensure the tokenizer "
-                "has a mask_token_id attribute."
-            )
-        return self
-
 
 class MedusaConfig(ModeloptBaseConfig):
     """Medusa config."""