Fix the attention mask in ulysses SP for QwenImage (#13278)

zhtmike · sayakpaul · web-flow · commit afdda57f61f7 · 2026-03-24T02:12:50.000-07:00
* fix mask in SP

* change the modification to qwen specific

* drop xfail since qwen-image mask is fixed

---------

Co-authored-by: Sayak Paul &lt;spsayakpaul@gmail.com&gt;
diff --git a/src/diffusers/models/transformers/transformer_qwenimage.py b/src/diffusers/models/transformers/transformer_qwenimage.py
@@ -933,6 +933,7 @@ def forward(
             batch_size, image_seq_len = hidden_states.shape[:2]
             image_mask = torch.ones((batch_size, image_seq_len), dtype=torch.bool, device=hidden_states.device)
             joint_attention_mask = torch.cat([encoder_hidden_states_mask, image_mask], dim=1)
+            joint_attention_mask = joint_attention_mask[:, None, None, :]
             block_attention_kwargs["attention_mask"] = joint_attention_mask
 
         for index_block, block in enumerate(self.transformer_blocks):
diff --git a/tests/models/testing_utils/parallelism.py b/tests/models/testing_utils/parallelism.py
@@ -200,7 +200,6 @@ def test_context_parallel_inference(self, cp_type, batch_size: int = 1):
             f"Context parallel inference failed: {return_dict.get('error', 'Unknown error')}"
         )
 
-    @pytest.mark.xfail(reason="Context parallel may not support batch_size > 1")
     @pytest.mark.parametrize("cp_type", ["ulysses_degree", "ring_degree"], ids=["ulysses", "ring"])
     def test_context_parallel_batch_inputs(self, cp_type):
         self.test_context_parallel_inference(cp_type, batch_size=2)

Original file line number	Diff line number	Diff line change
`@@ -200,7 +200,6 @@ def test_context_parallel_inference(self, cp_type, batch_size: int = 1):`
`200`	`200`	`f"Context parallel inference failed: {return_dict.get('error', 'Unknown error')}"`
`201`	`201`	`)`
`202`	`202`
`203`		`- @pytest.mark.xfail(reason="Context parallel may not support batch_size > 1")`
`204`	`203`	`@pytest.mark.parametrize("cp_type", ["ulysses_degree", "ring_degree"], ids=["ulysses", "ring"])`
`205`	`204`	`def test_context_parallel_batch_inputs(self, cp_type):`
`206`	`205`	`self.test_context_parallel_inference(cp_type, batch_size=2)`