fastgen: delete unused wan22 plugin

jingyu-ml · claude · jingyu-ml · commit ba93c30e4ee9 · 2026-06-01T14:32:48.000-07:00
wan22.py shipped the Wan 2.2 teacher feature-capture helpers, but with the
Wan example config and recipe already removed the plugin is never
exercised: the Qwen-Image plugin provides its own attach_feature_capture,
and the DMD feature-capture path is duck-typed on ``_fastgen_captured``.

Remove the module, drop its (only) import from plugins/__init__.py, and
repoint the docstring / error-message references at the qwen_image plugin.

Co-Authored-By: Claude Opus 4.8 (1M context) &lt;noreply@anthropic.com&gt;
Signed-off-by: Jingyu Xin &lt;jingyux@nvidia.com&gt;
diff --git a/modelopt/torch/fastgen/__init__.py b/modelopt/torch/fastgen/__init__.py
@@ -31,7 +31,7 @@
 
     # If GAN is enabled, expose intermediate teacher features to the discriminator.
     if cfg.gan_loss_weight_gen > 0:
-        mtf.plugins.wan22.attach_feature_capture(teacher, feature_indices=[15, 22, 29])
+        mtf.plugins.qwen_image.attach_feature_capture(teacher, feature_indices=[30])
 
     pipeline = mtf.DMDPipeline(student, teacher, fake_score, cfg, discriminator=disc)
 
@@ -62,7 +62,7 @@
 from .pipeline import DistillationPipeline
 
 # isort: off
-# Plugins must be imported after the core exports so the wan22 hooks can reference
+# Plugins must be imported after the core exports so the plugin hooks can reference
 # DMDPipeline if needed in the future; also matches the ordering used by
 # modelopt.torch.distill.
 from . import plugins
diff --git a/modelopt/torch/fastgen/discriminators.py b/modelopt/torch/fastgen/discriminators.py
@@ -22,7 +22,7 @@
 a list of spatial feature tensors ``[B, C, H, W]`` and returns concatenated
 logits ``[B, num_heads]``. The model-specific work of producing those tensors
 (installing forward hooks, reshaping packed-token streams into spatial maps)
-lives in the per-model plugins (``plugins/qwen_image.py``, ``plugins/wan22.py``).
+lives in the per-model plugins (``plugins/qwen_image.py``).
 """
 
 from __future__ import annotations
diff --git a/modelopt/torch/fastgen/loader.py b/modelopt/torch/fastgen/loader.py
@@ -15,7 +15,7 @@
 
 """YAML-driven configuration loading for fastgen distillation pipelines.
 
-YAML is the first-class entry point for DMD-on-Wan configurations — the fastgen library
+YAML is the first-class entry point for DMD configurations — the fastgen library
 does not expect callers to hand-build Python dicts. Typical usage::
 
     from modelopt.torch.fastgen import DMDConfig, load_dmd_config
diff --git a/modelopt/torch/fastgen/methods/dmd.py b/modelopt/torch/fastgen/methods/dmd.py
@@ -64,7 +64,7 @@
 
 
 # ---------------------------------------------------------------------------- #
-#  Feature capture helper (duck-typed so tests can bypass the wan22 plugin)    #
+#  Feature capture helper (duck-typed so tests can bypass the capture plugin)  #
 # ---------------------------------------------------------------------------- #
 
 
@@ -76,8 +76,7 @@ def _drain_if_hooked(module: nn.Module) -> list[torch.Tensor] | None:
     call sites can drain unconditionally after every teacher forward — this prevents
     the buffer from growing across steps when hooks are attached but the GAN branch is
     disabled (e.g. an ablation). Callers that need the strict "did you forget to attach
-    hooks?" failure mode should call :func:`_require_hooked` on the result, or use
-    :func:`modelopt.torch.fastgen.plugins.wan22.pop_captured_features` directly.
+    hooks?" failure mode should call :func:`_require_hooked` on the result.
     """
     captured = getattr(module, "_fastgen_captured", None)
     if captured is None:
@@ -106,7 +105,7 @@ def _require_hooked(
         raise RuntimeError(
             f"Feature-capture hooks are required on the teacher ({which} branch): "
             "teacher._fastgen_captured is missing. Call "
-            "modelopt.torch.fastgen.plugins.wan22.attach_feature_capture(teacher, ...) "
+            "modelopt.torch.fastgen.plugins.qwen_image.attach_feature_capture(teacher, ...) "
             "before running this loss."
         )
     return features
@@ -127,7 +126,7 @@ class DMDPipeline(DistillationPipeline):
             object with a ``.sample`` attribute.
         teacher: Frozen reference module with the same call signature. If ``discriminator``
             is provided, feature-capture hooks must be attached to ``teacher`` before
-            calling ``compute_*_loss`` — see :func:`modelopt.torch.fastgen.plugins.wan22.attach_feature_capture`.
+            calling ``compute_*_loss`` — see :func:`modelopt.torch.fastgen.plugins.qwen_image.attach_feature_capture`.
         fake_score: Trainable auxiliary module (same signature as teacher/student). Used to
             approximate the student's generated distribution for the VSD gradient.
         config: :class:`~modelopt.torch.fastgen.config.DMDConfig` with the hyperparameters.
diff --git a/modelopt/torch/fastgen/plugins/__init__.py b/modelopt/torch/fastgen/plugins/__init__.py
@@ -15,16 +15,13 @@
 
 """Optional plugins for the fastgen subpackage (gated via ``import_plugin``).
 
-``wan22`` holds the forward-hook helpers for exposing intermediate teacher activations
-to the DMD2 GAN discriminator on Wan 2.2 models. The module itself only depends on
-``torch`` at runtime, but we still gate the import so environments that choose not to
-install any optional fastgen dependencies see a clean package import.
+``qwen_image`` holds the Qwen-Image pipeline plus the forward-hook helpers that expose
+intermediate teacher activations to the DMD2 GAN discriminator. The import is gated so
+environments that choose not to install the optional fastgen dependencies still see a
+clean package import.
 """
 
 from modelopt.torch.utils import import_plugin
 
-with import_plugin("wan22"):
-    from .wan22 import *
-
 with import_plugin("qwen_image"):
     from .qwen_image import *
diff --git a/modelopt/torch/fastgen/plugins/qwen_image.py b/modelopt/torch/fastgen/plugins/qwen_image.py
@@ -257,10 +257,9 @@ def _call_model(
 #  GAN feature capture                                                          #
 # ---------------------------------------------------------------------------- #
 
-# Attribute names match :mod:`modelopt.torch.fastgen.plugins.wan22` so the shared
+# These attribute names are what the shared
 # :func:`~modelopt.torch.fastgen.methods.dmd._drain_if_hooked` /
-# :func:`~modelopt.torch.fastgen.methods.dmd._require_hooked` helpers work
-# without modification.
+# :func:`~modelopt.torch.fastgen.methods.dmd._require_hooked` helpers look for.
 _CAPTURED_ATTR = "_fastgen_captured"
 _HANDLES_ATTR = "_fastgen_capture_handles"
 _INDICES_ATTR = "_fastgen_capture_indices"
diff --git a/modelopt/torch/fastgen/plugins/wan22.py b/modelopt/torch/fastgen/plugins/wan22.py