Skip to content

Commit cf29904

Browse files
Merge branch 'main' into feat/sd3-modular-pipeline
2 parents 5995a34 + c8c8401 commit cf29904

10 files changed

Lines changed: 1529 additions & 11 deletions

File tree

docs/source/en/optimization/speed-memory-optims.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ The table below provides a comparison of optimization strategy combinations and
3333

3434
This guide will show you how to compile and offload a quantized model with [bitsandbytes](../quantization/bitsandbytes#torchcompile). Make sure you are using [PyTorch nightly](https://pytorch.org/get-started/locally/) and the latest version of bitsandbytes.
3535

36+
While we use bitsandbytes in this example, other quantization backends such as [TorchAO](../quantization/torchao.md) also support these features.
37+
3638
```bash
3739
pip install -U bitsandbytes
3840
```

src/diffusers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,7 @@
535535
"EasyAnimateInpaintPipeline",
536536
"EasyAnimatePipeline",
537537
"ErnieImagePipeline",
538+
"Flux2KleinInpaintPipeline",
538539
"Flux2KleinKVPipeline",
539540
"Flux2KleinPipeline",
540541
"Flux2Pipeline",
@@ -1321,6 +1322,7 @@
13211322
EasyAnimateInpaintPipeline,
13221323
EasyAnimatePipeline,
13231324
ErnieImagePipeline,
1325+
Flux2KleinInpaintPipeline,
13241326
Flux2KleinKVPipeline,
13251327
Flux2KleinPipeline,
13261328
Flux2Pipeline,

src/diffusers/loaders/lora_conversion_utils.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2331,6 +2331,20 @@ def _convert_non_diffusers_flux2_lora_to_diffusers(state_dict):
23312331
temp_state_dict[new_key] = v
23322332
original_state_dict = temp_state_dict
23332333

2334+
# Some Flux2 checkpoints skip the ai-toolkit `single_blocks` / `double_blocks`
2335+
# layout and already store expanded diffusers block names. Accept those
2336+
# directly, and normalize the legacy `sformer_blocks` alias used by some exports.
2337+
possible_expanded_block_prefixes = {
2338+
"single_transformer_blocks.": "single_transformer_blocks.",
2339+
"transformer_blocks.": "transformer_blocks.",
2340+
"sformer_blocks.": "transformer_blocks.",
2341+
}
2342+
for key in list(original_state_dict.keys()):
2343+
for source_prefix, target_prefix in possible_expanded_block_prefixes.items():
2344+
if key.startswith(source_prefix):
2345+
converted_state_dict[target_prefix + key[len(source_prefix) :]] = original_state_dict.pop(key)
2346+
break
2347+
23342348
num_double_layers = 0
23352349
num_single_layers = 0
23362350
for key in original_state_dict.keys():
@@ -2421,6 +2435,8 @@ def _convert_non_diffusers_flux2_lora_to_diffusers(state_dict):
24212435
"txt_in": "context_embedder",
24222436
"time_in.in_layer": "time_guidance_embed.timestep_embedder.linear_1",
24232437
"time_in.out_layer": "time_guidance_embed.timestep_embedder.linear_2",
2438+
"guidance_in.in_layer": "time_guidance_embed.guidance_embedder.linear_1",
2439+
"guidance_in.out_layer": "time_guidance_embed.guidance_embedder.linear_2",
24242440
"final_layer.linear": "proj_out",
24252441
"final_layer.adaLN_modulation.1": "norm_out.linear",
24262442
"single_stream_modulation.lin": "single_stream_modulation.linear",

src/diffusers/models/attention_dispatch.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1521,17 +1521,16 @@ def _maybe_modify_attn_mask_npu(query: torch.Tensor, key: torch.Tensor, attn_mas
15211521
if attn_mask is not None and torch.all(attn_mask != 0):
15221522
attn_mask = None
15231523

1524-
# Reshape Attention Mask: [batch_size, seq_len_k] -> [batch_size, 1, sqe_len_q, seq_len_k]
1524+
# Reshape Attention Mask: [batch_size, seq_len_k] or [batch_size, 1, 1, seq_len_k] -> [batch_size, 1, sqe_len_q, seq_len_k]
15251525
# https://www.hiascend.com/document/detail/zh/Pytorch/730/apiref/torchnpuCustomsapi/docs/context/torch_npu-npu_fusion_attention.md
1526-
if (
1527-
attn_mask is not None
1528-
and attn_mask.ndim == 2
1529-
and attn_mask.shape[0] == query.shape[0]
1530-
and attn_mask.shape[1] == key.shape[1]
1531-
):
1532-
B, Sq, Skv = attn_mask.shape[0], query.shape[1], key.shape[1]
1526+
if attn_mask is not None:
1527+
if attn_mask.ndim == 2 and attn_mask.shape[0] == query.shape[0] and attn_mask.shape[1] == key.shape[1]:
1528+
batch_size, seq_len_q, seq_len_kv = attn_mask.shape[0], query.shape[1], key.shape[1]
1529+
attn_mask = attn_mask.unsqueeze(1).expand(batch_size, seq_len_q, seq_len_kv).unsqueeze(1).contiguous()
1530+
elif attn_mask.ndim == 4 and attn_mask.shape[1:3] == (1, 1):
1531+
attn_mask = attn_mask.expand(-1, -1, query.shape[1], -1).contiguous()
1532+
15331533
attn_mask = ~attn_mask.to(torch.bool)
1534-
attn_mask = attn_mask.unsqueeze(1).expand(B, Sq, Skv).unsqueeze(1).contiguous()
15351534

15361535
return attn_mask
15371536

src/diffusers/pipelines/__init__.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,12 @@
160160
]
161161
_import_structure["bria"] = ["BriaPipeline"]
162162
_import_structure["bria_fibo"] = ["BriaFiboPipeline", "BriaFiboEditPipeline"]
163-
_import_structure["flux2"] = ["Flux2Pipeline", "Flux2KleinPipeline", "Flux2KleinKVPipeline"]
163+
_import_structure["flux2"] = [
164+
"Flux2Pipeline",
165+
"Flux2KleinPipeline",
166+
"Flux2KleinInpaintPipeline",
167+
"Flux2KleinKVPipeline",
168+
]
164169
_import_structure["flux"] = [
165170
"FluxControlPipeline",
166171
"FluxControlInpaintPipeline",
@@ -697,7 +702,7 @@
697702
FluxPriorReduxPipeline,
698703
ReduxImageEncoder,
699704
)
700-
from .flux2 import Flux2KleinKVPipeline, Flux2KleinPipeline, Flux2Pipeline
705+
from .flux2 import Flux2KleinInpaintPipeline, Flux2KleinKVPipeline, Flux2KleinPipeline, Flux2Pipeline
701706
from .glm_image import GlmImagePipeline
702707
from .helios import HeliosPipeline, HeliosPyramidPipeline
703708
from .hidream_image import HiDreamImagePipeline

src/diffusers/pipelines/flux2/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
else:
2525
_import_structure["pipeline_flux2"] = ["Flux2Pipeline"]
2626
_import_structure["pipeline_flux2_klein"] = ["Flux2KleinPipeline"]
27+
_import_structure["pipeline_flux2_klein_inpaint"] = ["Flux2KleinInpaintPipeline"]
2728
_import_structure["pipeline_flux2_klein_kv"] = ["Flux2KleinKVPipeline"]
2829
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
2930
try:
@@ -34,6 +35,7 @@
3435
else:
3536
from .pipeline_flux2 import Flux2Pipeline
3637
from .pipeline_flux2_klein import Flux2KleinPipeline
38+
from .pipeline_flux2_klein_inpaint import Flux2KleinInpaintPipeline
3739
from .pipeline_flux2_klein_kv import Flux2KleinKVPipeline
3840
else:
3941
import sys

src/diffusers/pipelines/flux2/image_processor.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,12 @@ class Flux2ImageProcessor(VaeImageProcessor):
3535
VAE latent channels.
3636
do_normalize (`bool`, *optional*, defaults to `True`):
3737
Whether to normalize the image to [-1,1].
38+
do_binarize (`bool`, *optional*, defaults to `False`):
39+
Whether to binarize the image to 0/1.
3840
do_convert_rgb (`bool`, *optional*, defaults to be `True`):
3941
Whether to convert the images to RGB format.
42+
do_convert_grayscale (`bool`, *optional*, defaults to be `False`):
43+
Whether to convert the images to grayscale format.
4044
"""
4145

4246
@register_to_config
@@ -46,14 +50,18 @@ def __init__(
4650
vae_scale_factor: int = 16,
4751
vae_latent_channels: int = 32,
4852
do_normalize: bool = True,
53+
do_binarize: bool = False,
4954
do_convert_rgb: bool = True,
55+
do_convert_grayscale: bool = False,
5056
):
5157
super().__init__(
5258
do_resize=do_resize,
5359
vae_scale_factor=vae_scale_factor,
5460
vae_latent_channels=vae_latent_channels,
5561
do_normalize=do_normalize,
62+
do_binarize=do_binarize,
5663
do_convert_rgb=do_convert_rgb,
64+
do_convert_grayscale=do_convert_grayscale,
5765
)
5866

5967
@staticmethod

0 commit comments

Comments
 (0)