Merge branch 'main' into feat/sd3-modular-pipeline

AlanPonnachan · web-flow · commit cf29904133d1 · 2026-04-20T20:47:21.000+05:30
diff --git a/docs/source/en/optimization/speed-memory-optims.md b/docs/source/en/optimization/speed-memory-optims.md
@@ -33,6 +33,8 @@ The table below provides a comparison of optimization strategy combinations and
 
 This guide will show you how to compile and offload a quantized model with [bitsandbytes](../quantization/bitsandbytes#torchcompile). Make sure you are using [PyTorch nightly](https://pytorch.org/get-started/locally/) and the latest version of bitsandbytes.
 
+While we use bitsandbytes in this example, other quantization backends such as [TorchAO](../quantization/torchao.md) also support these features.
+
 ```bash
 pip install -U bitsandbytes
 ```
diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py
@@ -535,6 +535,7 @@
             "EasyAnimateInpaintPipeline",
             "EasyAnimatePipeline",
             "ErnieImagePipeline",
+            "Flux2KleinInpaintPipeline",
             "Flux2KleinKVPipeline",
             "Flux2KleinPipeline",
             "Flux2Pipeline",
@@ -1321,6 +1322,7 @@
             EasyAnimateInpaintPipeline,
             EasyAnimatePipeline,
             ErnieImagePipeline,
+            Flux2KleinInpaintPipeline,
             Flux2KleinKVPipeline,
             Flux2KleinPipeline,
             Flux2Pipeline,
diff --git a/src/diffusers/loaders/lora_conversion_utils.py b/src/diffusers/loaders/lora_conversion_utils.py
@@ -2331,6 +2331,20 @@ def _convert_non_diffusers_flux2_lora_to_diffusers(state_dict):
             temp_state_dict[new_key] = v
         original_state_dict = temp_state_dict
 
+    # Some Flux2 checkpoints skip the ai-toolkit `single_blocks` / `double_blocks`
+    # layout and already store expanded diffusers block names. Accept those
+    # directly, and normalize the legacy `sformer_blocks` alias used by some exports.
+    possible_expanded_block_prefixes = {
+        "single_transformer_blocks.": "single_transformer_blocks.",
+        "transformer_blocks.": "transformer_blocks.",
+        "sformer_blocks.": "transformer_blocks.",
+    }
+    for key in list(original_state_dict.keys()):
+        for source_prefix, target_prefix in possible_expanded_block_prefixes.items():
+            if key.startswith(source_prefix):
+                converted_state_dict[target_prefix + key[len(source_prefix) :]] = original_state_dict.pop(key)
+                break
+
     num_double_layers = 0
     num_single_layers = 0
     for key in original_state_dict.keys():
@@ -2421,6 +2435,8 @@ def _convert_non_diffusers_flux2_lora_to_diffusers(state_dict):
         "txt_in": "context_embedder",
         "time_in.in_layer": "time_guidance_embed.timestep_embedder.linear_1",
         "time_in.out_layer": "time_guidance_embed.timestep_embedder.linear_2",
+        "guidance_in.in_layer": "time_guidance_embed.guidance_embedder.linear_1",
+        "guidance_in.out_layer": "time_guidance_embed.guidance_embedder.linear_2",
         "final_layer.linear": "proj_out",
         "final_layer.adaLN_modulation.1": "norm_out.linear",
         "single_stream_modulation.lin": "single_stream_modulation.linear",
diff --git a/src/diffusers/models/attention_dispatch.py b/src/diffusers/models/attention_dispatch.py
@@ -1521,17 +1521,16 @@ def _maybe_modify_attn_mask_npu(query: torch.Tensor, key: torch.Tensor, attn_mas
     if attn_mask is not None and torch.all(attn_mask != 0):
         attn_mask = None
 
-    # Reshape Attention Mask: [batch_size, seq_len_k] -> [batch_size, 1, sqe_len_q, seq_len_k]
+    # Reshape Attention Mask: [batch_size, seq_len_k] or [batch_size, 1, 1, seq_len_k] -> [batch_size, 1, sqe_len_q, seq_len_k]
     # https://www.hiascend.com/document/detail/zh/Pytorch/730/apiref/torchnpuCustomsapi/docs/context/torch_npu-npu_fusion_attention.md
-    if (
-        attn_mask is not None
-        and attn_mask.ndim == 2
-        and attn_mask.shape[0] == query.shape[0]
-        and attn_mask.shape[1] == key.shape[1]
-    ):
-        B, Sq, Skv = attn_mask.shape[0], query.shape[1], key.shape[1]
+    if attn_mask is not None:
+        if attn_mask.ndim == 2 and attn_mask.shape[0] == query.shape[0] and attn_mask.shape[1] == key.shape[1]:
+            batch_size, seq_len_q, seq_len_kv = attn_mask.shape[0], query.shape[1], key.shape[1]
+            attn_mask = attn_mask.unsqueeze(1).expand(batch_size, seq_len_q, seq_len_kv).unsqueeze(1).contiguous()
+        elif attn_mask.ndim == 4 and attn_mask.shape[1:3] == (1, 1):
+            attn_mask = attn_mask.expand(-1, -1, query.shape[1], -1).contiguous()
+
         attn_mask = ~attn_mask.to(torch.bool)
-        attn_mask = attn_mask.unsqueeze(1).expand(B, Sq, Skv).unsqueeze(1).contiguous()
 
     return attn_mask
 
diff --git a/src/diffusers/pipelines/__init__.py b/src/diffusers/pipelines/__init__.py
@@ -160,7 +160,12 @@
     ]
     _import_structure["bria"] = ["BriaPipeline"]
     _import_structure["bria_fibo"] = ["BriaFiboPipeline", "BriaFiboEditPipeline"]
-    _import_structure["flux2"] = ["Flux2Pipeline", "Flux2KleinPipeline", "Flux2KleinKVPipeline"]
+    _import_structure["flux2"] = [
+        "Flux2Pipeline",
+        "Flux2KleinPipeline",
+        "Flux2KleinInpaintPipeline",
+        "Flux2KleinKVPipeline",
+    ]
     _import_structure["flux"] = [
         "FluxControlPipeline",
         "FluxControlInpaintPipeline",
@@ -697,7 +702,7 @@
             FluxPriorReduxPipeline,
             ReduxImageEncoder,
         )
-        from .flux2 import Flux2KleinKVPipeline, Flux2KleinPipeline, Flux2Pipeline
+        from .flux2 import Flux2KleinInpaintPipeline, Flux2KleinKVPipeline, Flux2KleinPipeline, Flux2Pipeline
         from .glm_image import GlmImagePipeline
         from .helios import HeliosPipeline, HeliosPyramidPipeline
         from .hidream_image import HiDreamImagePipeline
diff --git a/src/diffusers/pipelines/flux2/__init__.py b/src/diffusers/pipelines/flux2/__init__.py
@@ -24,6 +24,7 @@
 else:
     _import_structure["pipeline_flux2"] = ["Flux2Pipeline"]
     _import_structure["pipeline_flux2_klein"] = ["Flux2KleinPipeline"]
+    _import_structure["pipeline_flux2_klein_inpaint"] = ["Flux2KleinInpaintPipeline"]
     _import_structure["pipeline_flux2_klein_kv"] = ["Flux2KleinKVPipeline"]
 if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
     try:
@@ -34,6 +35,7 @@
     else:
         from .pipeline_flux2 import Flux2Pipeline
         from .pipeline_flux2_klein import Flux2KleinPipeline
+        from .pipeline_flux2_klein_inpaint import Flux2KleinInpaintPipeline
         from .pipeline_flux2_klein_kv import Flux2KleinKVPipeline
 else:
     import sys
diff --git a/src/diffusers/pipelines/flux2/image_processor.py b/src/diffusers/pipelines/flux2/image_processor.py
@@ -35,8 +35,12 @@ class Flux2ImageProcessor(VaeImageProcessor):
             VAE latent channels.
         do_normalize (`bool`, *optional*, defaults to `True`):
             Whether to normalize the image to [-1,1].
+        do_binarize (`bool`, *optional*, defaults to `False`):
+            Whether to binarize the image to 0/1.
         do_convert_rgb (`bool`, *optional*, defaults to be `True`):
             Whether to convert the images to RGB format.
+        do_convert_grayscale (`bool`, *optional*, defaults to be `False`):
+            Whether to convert the images to grayscale format.
     """
 
     @register_to_config
@@ -46,14 +50,18 @@ def __init__(
         vae_scale_factor: int = 16,
         vae_latent_channels: int = 32,
         do_normalize: bool = True,
+        do_binarize: bool = False,
         do_convert_rgb: bool = True,
+        do_convert_grayscale: bool = False,
     ):
         super().__init__(
             do_resize=do_resize,
             vae_scale_factor=vae_scale_factor,
             vae_latent_channels=vae_latent_channels,
             do_normalize=do_normalize,
+            do_binarize=do_binarize,
             do_convert_rgb=do_convert_rgb,
+            do_convert_grayscale=do_convert_grayscale,
         )
 
     @staticmethod
diff --git a/src/diffusers/pipelines/flux2/pipeline_flux2_klein_inpaint.py b/src/diffusers/pipelines/flux2/pipeline_flux2_klein_inpaint.py
diff --git a/src/diffusers/utils/dummy_torch_and_transformers_objects.py b/src/diffusers/utils/dummy_torch_and_transformers_objects.py
diff --git a/tests/pipelines/flux2/test_pipeline_flux2_klein_inpaint.py b/tests/pipelines/flux2/test_pipeline_flux2_klein_inpaint.py