Skip to content

Commit e35962b

Browse files
committed
add tests for qwenimage modular.
1 parent 8f80dda commit e35962b

6 files changed

Lines changed: 97 additions & 11 deletions

File tree

src/diffusers/modular_pipelines/qwenimage/before_denoise.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ def expected_components(self) -> List[ComponentSpec]:
132132
@property
133133
def inputs(self) -> List[InputParam]:
134134
return [
135+
InputParam("latents"),
135136
InputParam(name="height"),
136137
InputParam(name="width"),
137138
InputParam(name="num_images_per_prompt", default=1),
@@ -196,11 +197,11 @@ def __call__(self, components: QwenImageModularPipeline, state: PipelineState) -
196197
f"You have passed a list of generators of length {len(block_state.generator)}, but requested an effective batch"
197198
f" size of {batch_size}. Make sure the batch size matches the length of the generators."
198199
)
199-
200-
block_state.latents = randn_tensor(
201-
shape, generator=block_state.generator, device=device, dtype=block_state.dtype
202-
)
203-
block_state.latents = components.pachifier.pack_latents(block_state.latents)
200+
if block_state.latents is None:
201+
block_state.latents = randn_tensor(
202+
shape, generator=block_state.generator, device=device, dtype=block_state.dtype
203+
)
204+
block_state.latents = components.pachifier.pack_latents(block_state.latents)
204205

205206
self.set_block_state(state, block_state)
206207
return components, state
@@ -549,7 +550,7 @@ def __call__(self, components: QwenImageModularPipeline, state: PipelineState) -
549550
block_state.width // components.vae_scale_factor // 2,
550551
)
551552
]
552-
* block_state.batch_size
553+
for _ in range(block_state.batch_size)
553554
]
554555
block_state.txt_seq_lens = (
555556
block_state.prompt_embeds_mask.sum(dim=1).tolist() if block_state.prompt_embeds_mask is not None else None

src/diffusers/modular_pipelines/qwenimage/decoders.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,9 @@ def __call__(self, components: QwenImageModularPipeline, state: PipelineState) -
7474
block_state = self.get_block_state(state)
7575

7676
# YiYi Notes: remove support for output_type = "latents', we can just skip decode/encode step in modular
77+
vae_scale_factor = 2 ** len(components.vae.temperal_downsample)
7778
block_state.latents = components.pachifier.unpack_latents(
78-
block_state.latents, block_state.height, block_state.width
79+
block_state.latents, block_state.height, block_state.width, vae_scale_factor=vae_scale_factor
7980
)
8081
block_state.latents = block_state.latents.to(components.vae.dtype)
8182

src/diffusers/modular_pipelines/qwenimage/encoders.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -503,6 +503,8 @@ def __call__(self, components: QwenImageModularPipeline, state: PipelineState):
503503
block_state.prompt_embeds = block_state.prompt_embeds[:, : block_state.max_sequence_length]
504504
block_state.prompt_embeds_mask = block_state.prompt_embeds_mask[:, : block_state.max_sequence_length]
505505

506+
block_state.negative_prompt_embeds = None
507+
block_state.negative_prompt_embeds_mask = None
506508
if components.requires_unconditional_embeds:
507509
negative_prompt = block_state.negative_prompt or ""
508510
block_state.negative_prompt_embeds, block_state.negative_prompt_embeds_mask = get_qwen_prompt_embeds(

src/diffusers/modular_pipelines/qwenimage/modular_pipeline.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,7 @@ class QwenImagePachifier(ConfigMixin):
2626
config_name = "config.json"
2727

2828
@register_to_config
29-
def __init__(
30-
self,
31-
patch_size: int = 2,
32-
):
29+
def __init__(self, patch_size: int = 2):
3330
super().__init__()
3431

3532
def pack_latents(self, latents):

tests/modular_pipelines/qwen/__init__.py

Whitespace-only changes.
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
# coding=utf-8
2+
# Copyright 2025 HuggingFace Inc.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
import unittest
17+
18+
import numpy as np
19+
import torch
20+
21+
from diffusers import ClassifierFreeGuidance
22+
from diffusers.modular_pipelines import QwenImageAutoBlocks, QwenImageModularPipeline
23+
24+
from ...testing_utils import torch_device
25+
from ..test_modular_pipelines_common import ModularPipelineTesterMixin
26+
27+
28+
class QwenImagexModularTests:
29+
pipeline_class = QwenImageModularPipeline
30+
pipeline_blocks_class = QwenImageAutoBlocks
31+
repo = "hf-internal-testing/tiny-qwenimage-modular"
32+
33+
params = frozenset(["prompt", "height", "width", "negative_prompt", "attention_kwargs", "image", "mask_image"])
34+
batch_params = frozenset(["prompt", "negative_prompt", "image", "mask_image"])
35+
36+
def get_pipeline(self, components_manager=None, torch_dtype=torch.float32):
37+
pipeline = self.pipeline_blocks_class().init_pipeline(self.repo, components_manager=components_manager)
38+
pipeline.load_components(torch_dtype=torch_dtype)
39+
pipeline.set_progress_bar_config(disable=None)
40+
return pipeline
41+
42+
def get_dummy_inputs(self, device, seed=0):
43+
if str(device).startswith("mps"):
44+
generator = torch.manual_seed(seed)
45+
else:
46+
generator = torch.Generator(device=device).manual_seed(seed)
47+
inputs = {
48+
"prompt": "dance monkey",
49+
"negative_prompt": "bad quality",
50+
"generator": generator,
51+
"num_inference_steps": 2,
52+
"height": 32,
53+
"width": 32,
54+
"max_sequence_length": 16,
55+
"output_type": "np",
56+
}
57+
return inputs
58+
59+
60+
class QwenImageModularGuiderTests:
61+
def test_guider_cfg(self):
62+
pipe = self.get_pipeline()
63+
pipe = pipe.to(torch_device)
64+
65+
guider = ClassifierFreeGuidance(guidance_scale=1.0)
66+
pipe.update_components(guider=guider)
67+
68+
inputs = self.get_dummy_inputs(torch_device)
69+
out_no_cfg = pipe(**inputs, output="images")
70+
71+
guider = ClassifierFreeGuidance(guidance_scale=7.5)
72+
pipe.update_components(guider=guider)
73+
inputs = self.get_dummy_inputs(torch_device)
74+
out_cfg = pipe(**inputs, output="images")
75+
76+
assert out_cfg.shape == out_no_cfg.shape
77+
max_diff = np.abs(out_cfg - out_no_cfg).max()
78+
assert max_diff > 1e-2, "Output with CFG must be different from normal inference"
79+
80+
81+
class QwenImageModularPipelineFastTests(
82+
QwenImagexModularTests, QwenImageModularGuiderTests, ModularPipelineTesterMixin, unittest.TestCase
83+
):
84+
def __init__(self, *args, **kwargs):
85+
super().__init__(*args, **kwargs)

0 commit comments

Comments
 (0)