use InputParam/OutputParam templates and fix ruff

akshan-main · akshan-main · commit 330c5f677494 · 2026-04-02T20:01:49.000-07:00
diff --git a/src/diffusers/modular_pipelines/__init__.py b/src/diffusers/modular_pipelines/__init__.py
@@ -124,6 +124,11 @@
             HeliosPyramidDistilledModularPipeline,
             HeliosPyramidModularPipeline,
         )
+        from .hunyuan_video1_5 import (
+            HunyuanVideo15Blocks,
+            HunyuanVideo15Image2VideoBlocks,
+            HunyuanVideo15ModularPipeline,
+        )
         from .modular_pipeline import (
             AutoPipelineBlocks,
             BlockState,
@@ -145,7 +150,6 @@
             QwenImageLayeredModularPipeline,
             QwenImageModularPipeline,
         )
-        from .hunyuan_video1_5 import HunyuanVideo15Blocks, HunyuanVideo15Image2VideoBlocks, HunyuanVideo15ModularPipeline
         from .stable_diffusion_xl import StableDiffusionXLAutoBlocks, StableDiffusionXLModularPipeline
         from .wan import (
             Wan22Blocks,
diff --git a/src/diffusers/modular_pipelines/hunyuan_video1_5/before_denoise.py b/src/diffusers/modular_pipelines/hunyuan_video1_5/before_denoise.py
@@ -76,9 +76,9 @@ def expected_components(self) -> list[ComponentSpec]:
     @property
     def inputs(self) -> list[InputParam]:
         return [
-            InputParam("num_videos_per_prompt", default=1),
-            InputParam("prompt_embeds", required=True, type_hint=torch.Tensor),
-            InputParam("batch_size", type_hint=int),
+            InputParam.template("num_images_per_prompt", name="num_videos_per_prompt"),
+            InputParam.template("prompt_embeds"),
+            InputParam.template("batch_size", default=None),
         ]
 
     @property
@@ -111,8 +111,8 @@ def description(self) -> str:
     @property
     def inputs(self) -> list[InputParam]:
         return [
-            InputParam("num_inference_steps", default=50),
-            InputParam("sigmas"),
+            InputParam.template("num_inference_steps"),
+            InputParam.template("sigmas"),
         ]
 
     @property
@@ -150,20 +150,20 @@ def description(self) -> str:
     @property
     def inputs(self) -> list[InputParam]:
         return [
-            InputParam("height", type_hint=int),
-            InputParam("width", type_hint=int),
+            InputParam.template("height"),
+            InputParam.template("width"),
             InputParam("num_frames", type_hint=int, default=121),
-            InputParam("latents", type_hint=torch.Tensor | None),
-            InputParam("num_videos_per_prompt", type_hint=int, default=1),
-            InputParam("generator"),
-            InputParam("batch_size", required=True, type_hint=int),
-            InputParam("dtype", type_hint=torch.dtype),
+            InputParam.template("latents"),
+            InputParam.template("num_images_per_prompt", name="num_videos_per_prompt"),
+            InputParam.template("generator"),
+            InputParam.template("batch_size", required=True, default=None),
+            InputParam.template("dtype", default=None),
         ]
 
     @property
     def intermediate_outputs(self) -> list[OutputParam]:
         return [
-            OutputParam("latents", type_hint=torch.Tensor),
+            OutputParam.template("latents"),
             OutputParam("cond_latents_concat", type_hint=torch.Tensor),
             OutputParam("mask_concat", type_hint=torch.Tensor),
             OutputParam("image_embeds", type_hint=torch.Tensor),
@@ -265,19 +265,19 @@ def expected_components(self) -> list[ComponentSpec]:
     @property
     def inputs(self) -> list[InputParam]:
         return [
-            InputParam("image", required=True),
+            InputParam.template("image"),
             InputParam("num_frames", type_hint=int, default=121),
-            InputParam("latents", type_hint=torch.Tensor | None),
-            InputParam("num_videos_per_prompt", type_hint=int, default=1),
-            InputParam("generator"),
-            InputParam("batch_size", required=True, type_hint=int),
-            InputParam("dtype", type_hint=torch.dtype),
+            InputParam.template("latents"),
+            InputParam.template("num_images_per_prompt", name="num_videos_per_prompt"),
+            InputParam.template("generator"),
+            InputParam.template("batch_size", required=True, default=None),
+            InputParam.template("dtype", default=None),
         ]
 
     @property
     def intermediate_outputs(self) -> list[OutputParam]:
         return [
-            OutputParam("latents", type_hint=torch.Tensor),
+            OutputParam.template("latents"),
             OutputParam("cond_latents_concat", type_hint=torch.Tensor),
             OutputParam("mask_concat", type_hint=torch.Tensor),
             OutputParam("image_embeds", type_hint=torch.Tensor),
diff --git a/src/diffusers/modular_pipelines/hunyuan_video1_5/decoders.py b/src/diffusers/modular_pipelines/hunyuan_video1_5/decoders.py
@@ -12,10 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Any
 
-import numpy as np
-import PIL
 import torch
 
 from ...configuration_utils import FrozenDict
@@ -49,20 +46,16 @@ def description(self) -> str:
         return "Step that decodes the denoised latents into videos"
 
     @property
-    def inputs(self) -> list[tuple[str, Any]]:
+    def inputs(self) -> list[InputParam]:
         return [
-            InputParam("latents", required=True, type_hint=torch.Tensor),
-            InputParam("output_type", default="np", type_hint=str),
+            InputParam.template("latents", required=True),
+            InputParam.template("output_type", default="np"),
         ]
 
     @property
     def intermediate_outputs(self) -> list[OutputParam]:
         return [
-            OutputParam(
-                "videos",
-                type_hint=list[list[PIL.Image.Image]] | list[torch.Tensor] | list[np.ndarray],
-                description="The generated videos",
-            )
+            OutputParam.template("videos"),
         ]
 
     # Copied from pipeline_hunyuan_video1_5.py lines 823-829
diff --git a/src/diffusers/modular_pipelines/hunyuan_video1_5/denoise.py b/src/diffusers/modular_pipelines/hunyuan_video1_5/denoise.py
@@ -43,7 +43,7 @@ def description(self) -> str:
     @property
     def inputs(self) -> list[InputParam]:
         return [
-            InputParam("latents", required=True, type_hint=torch.Tensor),
+            InputParam.template("latents", required=True),
             InputParam("cond_latents_concat", required=True, type_hint=torch.Tensor),
             InputParam("mask_concat", required=True, type_hint=torch.Tensor),
         ]
@@ -92,8 +92,8 @@ def description(self) -> str:
     @property
     def inputs(self) -> list[InputParam]:
         inputs = [
-            InputParam("attention_kwargs"),
-            InputParam("num_inference_steps", required=True, type_hint=int),
+            InputParam.template("attention_kwargs"),
+            InputParam.template("num_inference_steps", required=True, default=None),
             InputParam("image_embeds", type_hint=torch.Tensor),
         ]
         for value in self._guider_input_fields.values():
@@ -194,8 +194,8 @@ def loop_expected_components(self) -> list[ComponentSpec]:
     @property
     def loop_inputs(self) -> list[InputParam]:
         return [
-            InputParam("timesteps", required=True, type_hint=torch.Tensor),
-            InputParam("num_inference_steps", required=True, type_hint=int),
+            InputParam.template("timesteps", required=True),
+            InputParam.template("num_inference_steps", required=True, default=None),
         ]
 
     @torch.no_grad()
@@ -273,10 +273,10 @@ def description(self) -> str:
     @property
     def inputs(self) -> list[InputParam]:
         inputs = [
-            InputParam("attention_kwargs"),
-            InputParam("num_inference_steps", required=True, type_hint=int),
+            InputParam.template("attention_kwargs"),
+            InputParam.template("num_inference_steps", required=True, default=None),
             InputParam("image_embeds", type_hint=torch.Tensor),
-            InputParam("timesteps", required=True, type_hint=torch.Tensor),
+            InputParam.template("timesteps", required=True),
         ]
         for value in self._guider_input_fields.values():
             if isinstance(value, tuple):
diff --git a/src/diffusers/modular_pipelines/hunyuan_video1_5/encoders.py b/src/diffusers/modular_pipelines/hunyuan_video1_5/encoders.py
@@ -158,26 +158,26 @@ def expected_components(self) -> list[ComponentSpec]:
     @property
     def inputs(self) -> list[InputParam]:
         return [
-            InputParam("prompt"),
-            InputParam("negative_prompt"),
-            InputParam("prompt_embeds", type_hint=torch.Tensor),
-            InputParam("prompt_embeds_mask", type_hint=torch.Tensor),
-            InputParam("negative_prompt_embeds", type_hint=torch.Tensor),
-            InputParam("negative_prompt_embeds_mask", type_hint=torch.Tensor),
+            InputParam.template("prompt", required=False),
+            InputParam.template("negative_prompt"),
+            InputParam.template("prompt_embeds", required=False),
+            InputParam.template("prompt_embeds_mask", required=False),
+            InputParam.template("negative_prompt_embeds"),
+            InputParam.template("negative_prompt_embeds_mask"),
             InputParam("prompt_embeds_2", type_hint=torch.Tensor),
             InputParam("prompt_embeds_mask_2", type_hint=torch.Tensor),
             InputParam("negative_prompt_embeds_2", type_hint=torch.Tensor),
             InputParam("negative_prompt_embeds_mask_2", type_hint=torch.Tensor),
-            InputParam("num_videos_per_prompt", type_hint=int, default=1),
+            InputParam.template("num_images_per_prompt", name="num_videos_per_prompt"),
         ]
 
     @property
     def intermediate_outputs(self) -> list[OutputParam]:
         return [
-            OutputParam("prompt_embeds", type_hint=torch.Tensor, kwargs_type="denoiser_input_fields"),
-            OutputParam("prompt_embeds_mask", type_hint=torch.Tensor, kwargs_type="denoiser_input_fields"),
-            OutputParam("negative_prompt_embeds", type_hint=torch.Tensor, kwargs_type="denoiser_input_fields"),
-            OutputParam("negative_prompt_embeds_mask", type_hint=torch.Tensor, kwargs_type="denoiser_input_fields"),
+            OutputParam.template("prompt_embeds"),
+            OutputParam.template("prompt_embeds_mask"),
+            OutputParam.template("negative_prompt_embeds"),
+            OutputParam.template("negative_prompt_embeds_mask"),
             OutputParam("prompt_embeds_2", type_hint=torch.Tensor, kwargs_type="denoiser_input_fields"),
             OutputParam("prompt_embeds_mask_2", type_hint=torch.Tensor, kwargs_type="denoiser_input_fields"),
             OutputParam("negative_prompt_embeds_2", type_hint=torch.Tensor, kwargs_type="denoiser_input_fields"),
diff --git a/src/diffusers/modular_pipelines/hunyuan_video1_5/modular_blocks_hunyuan_video1_5.py b/src/diffusers/modular_pipelines/hunyuan_video1_5/modular_blocks_hunyuan_video1_5.py