huggingface
diff --git a/‎docs/source/en/_toctree.yml‎
Lines changed: 5 additions & 1 deletion b/‎docs/source/en/_toctree.yml‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎docs/source/en/api/models/controlnet.md‎
Lines changed: 15 additions & 0 deletions b/‎docs/source/en/api/models/controlnet.md‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎docs/source/en/api/models/longcat_image_transformer2d.md‎
Lines changed: 25 additions & 0 deletions b/‎docs/source/en/api/models/longcat_image_transformer2d.md‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎docs/source/en/api/pipelines/longcat_image.md‎
Lines changed: 114 additions & 0 deletions b/‎docs/source/en/api/pipelines/longcat_image.md‎
Lines changed: 114 additions & 0 deletions
diff --git a/‎docs/source/en/training/distributed_inference.md‎
Lines changed: 68 additions & 25 deletions b/‎docs/source/en/training/distributed_inference.md‎
Lines changed: 68 additions & 25 deletions
diff --git a/‎examples/advanced_diffusion_training/train_dreambooth_lora_flux_advanced.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/advanced_diffusion_training/train_dreambooth_lora_flux_advanced.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/advanced_diffusion_training/train_dreambooth_lora_sd15_advanced.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/advanced_diffusion_training/train_dreambooth_lora_sd15_advanced.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/advanced_diffusion_training/train_dreambooth_lora_sdxl_advanced.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/advanced_diffusion_training/train_dreambooth_lora_sdxl_advanced.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/cogvideo/train_cogvideox_image_to_video_lora.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/cogvideo/train_cogvideox_image_to_video_lora.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/cogvideo/train_cogvideox_lora.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/cogvideo/train_cogvideox_lora.py‎
Lines changed: 1 addition & 1 deletion
@@ -365,6 +365,8 @@
         title: HunyuanVideoTransformer3DModel
       - local: api/models/latte_transformer3d
         title: LatteTransformer3DModel
+      - local: api/models/longcat_image_transformer2d
+        title: LongCatImageTransformer2DModel
       - local: api/models/ltx_video_transformer3d
         title: LTXVideoTransformer3DModel
       - local: api/models/lumina2_transformer2d
@@ -402,7 +404,7 @@
       - local: api/models/wan_transformer_3d
         title: WanTransformer3DModel
       - local: api/models/z_image_transformer2d
-        title: ZImageTransformer2DModel        
+        title: ZImageTransformer2DModel
       title: Transformers
     - sections:
       - local: api/models/stable_cascade_unet
@@ -563,6 +565,8 @@
         title: Latent Diffusion
       - local: api/pipelines/ledits_pp
         title: LEDITS++
+      - local: api/pipelines/longcat_image
+        title: LongCat-Image
       - local: api/pipelines/lumina2
         title: Lumina 2.0
       - local: api/pipelines/lumina
 
@@ -33,6 +33,21 @@ url = "https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/blob/m
 pipe = StableDiffusionControlNetPipeline.from_single_file(url, controlnet=controlnet)
 ```
 
+## Loading from Control LoRA
+
+Control-LoRA is introduced by Stability AI in [stabilityai/control-lora](https://huggingface.co/stabilityai/control-lora) by adding low-rank parameter efficient fine tuning to ControlNet. This approach offers a more efficient and compact method to bring model control to a wider variety of consumer GPUs.
+
+```py
+from diffusers import ControlNetModel, UNet2DConditionModel
+
+lora_id = "stabilityai/control-lora"
+lora_filename = "control-LoRAs-rank128/control-lora-canny-rank128.safetensors"
+
+unet = UNet2DConditionModel.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="unet", torch_dtype=torch.bfloat16).to("cuda")
+controlnet = ControlNetModel.from_unet(unet).to(device="cuda", dtype=torch.bfloat16)
+controlnet.load_lora_adapter(lora_id, weight_name=lora_filename, prefix=None, controlnet_config=controlnet.config)
+```
+
 ## ControlNetModel
 
 [[autodoc]] ControlNetModel
 
@@ -0,0 +1,25 @@
+<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+-->
+
+# LongCatImageTransformer2DModel
+
+The model can be loaded with the following code snippet.
+
+```python
+from diffusers import LongCatImageTransformer2DModel
+
+transformer = LongCatImageTransformer2DModel.from_pretrained("meituan-longcat/LongCat-Image ", subfolder="transformer", torch_dtype=torch.bfloat16)
+```
+
+## LongCatImageTransformer2DModel
+
+[[autodoc]] LongCatImageTransformer2DModel
@@ -0,0 +1,114 @@
+<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+-->
+
+# LongCat-Image
+
+<div class="flex flex-wrap space-x-1">
+  <img alt="LoRA" src="https://img.shields.io/badge/LoRA-d8b4fe?style=flat"/>
+</div>
+
+
+We introduce LongCat-Image, a pioneering open-source and bilingual (Chinese-English) foundation model for image generation, designed to address core challenges in multilingual text rendering, photorealism, deployment efficiency, and developer accessibility prevalent in current leading models.
+
+
+### Key Features
+- 🌟 **Exceptional Efficiency and Performance**: With only **6B parameters**, LongCat-Image surpasses numerous open-source models that are several times larger across multiple benchmarks, demonstrating the immense potential of efficient model design.
+- 🌟 **Superior Editing Performance**: LongCat-Image-Edit model achieves state-of-the-art performance among open-source models, delivering leading instruction-following and image quality with superior visual consistency.
+- 🌟 **Powerful Chinese Text Rendering**: LongCat-Image demonstrates superior accuracy and stability in rendering common Chinese characters compared to existing SOTA open-source models and achieves industry-leading coverage of the Chinese dictionary.
+- 🌟 **Remarkable Photorealism**: Through an innovative data strategy and training framework, LongCat-Image achieves remarkable photorealism in generated images.
+- 🌟 **Comprehensive Open-Source Ecosystem**: We provide a complete toolchain, from intermediate checkpoints to full training code, significantly lowering the barrier for further research and development.
+
+For more details, please refer to the comprehensive [***LongCat-Image Technical Report***](https://arxiv.org/abs/2412.11963)
+
+
+## Usage Example
+
+```py
+import torch
+import diffusers
+from diffusers import LongCatImagePipeline
+
+weight_dtype = torch.bfloat16
+pipe = LongCatImagePipeline.from_pretrained("meituan-longcat/LongCat-Image", torch_dtype=torch.bfloat16 )
+pipe.to('cuda')
+# pipe.enable_model_cpu_offload()
+
+prompt = '一个年轻的亚裔女性，身穿黄色针织衫，搭配白色项链。她的双手放在膝盖上，表情恬静。背景是一堵粗糙的砖墙，午后的阳光温暖地洒在她身上，营造出一种宁静而温馨的氛围。镜头采用中距离视角，突出她的神态和服饰的细节。光线柔和地打在她的脸上，强调她的五官和饰品的质感，增加画面的层次感与亲和力。整个画面构图简洁，砖墙的纹理与阳光的光影效果相得益彰，突显出人物的优雅与从容。'
+image = pipe(
+    prompt,
+    height=768,
+    width=1344,
+    guidance_scale=4.0,
+    num_inference_steps=50,
+    num_images_per_prompt=1,
+    generator=torch.Generator("cpu").manual_seed(43),
+    enable_cfg_renorm=True,
+    enable_prompt_rewrite=True,
+).images[0]
+image.save(f'./longcat_image_t2i_example.png')
+```
+
+
+This pipeline was contributed by LongCat-Image Team. The original codebase can be found [here](https://github.com/meituan-longcat/LongCat-Image).
+
+Available models:
+<div style="overflow-x: auto; margin-bottom: 16px;">
+  <table style="border-collapse: collapse; width: 100%;">
+    <thead>
+      <tr>
+        <th style="white-space: nowrap; padding: 8px; border: 1px solid #d0d7de; background-color: #f6f8fa;">Models</th>
+        <th style="white-space: nowrap; padding: 8px; border: 1px solid #d0d7de; background-color: #f6f8fa;">Type</th>
+        <th style="padding: 8px; border: 1px solid #d0d7de; background-color: #f6f8fa;">Description</th>
+        <th style="padding: 8px; border: 1px solid #d0d7de; background-color: #f6f8fa;">Download Link</th>
+      </tr>
+    </thead>
+    <tbody>
+      <tr>
+        <td style="white-space: nowrap; padding: 8px; border: 1px solid #d0d7de;">LongCat&#8209;Image</td>
+        <td style="white-space: nowrap; padding: 8px; border: 1px solid #d0d7de;">Text&#8209;to&#8209;Image</td>
+        <td style="padding: 8px; border: 1px solid #d0d7de;">Final Release. The standard model for out&#8209;of&#8209;the&#8209;box inference.</td>
+        <td style="padding: 8px; border: 1px solid #d0d7de;">
+          <span style="white-space: nowrap;">🤗&nbsp;<a href="https://huggingface.co/meituan-longcat/LongCat-Image">Huggingface</a></span>
+        </td>
+      </tr>
+      <tr>
+        <td style="white-space: nowrap; padding: 8px; border: 1px solid #d0d7de;">LongCat&#8209;Image&#8209;Dev</td>
+        <td style="white-space: nowrap; padding: 8px; border: 1px solid #d0d7de;">Text&#8209;to&#8209;Image</td>
+        <td style="padding: 8px; border: 1px solid #d0d7de;">Development. Mid-training checkpoint, suitable for fine-tuning.</td>
+        <td style="padding: 8px; border: 1px solid #d0d7de;">
+          <span style="white-space: nowrap;">🤗&nbsp;<a href="https://huggingface.co/meituan-longcat/LongCat-Image-Dev">Huggingface</a></span>
+        </td>
+      </tr>
+      <tr>
+        <td style="white-space: nowrap; padding: 8px; border: 1px solid #d0d7de;">LongCat&#8209;Image&#8209;Edit</td>
+        <td style="white-space: nowrap; padding: 8px; border: 1px solid #d0d7de;">Image Editing</td>
+        <td style="padding: 8px; border: 1px solid #d0d7de;">Specialized model for image editing.</td>
+        <td style="padding: 8px; border: 1px solid #d0d7de;">
+          <span style="white-space: nowrap;">🤗&nbsp;<a href="https://huggingface.co/meituan-longcat/LongCat-Image-Edit">Huggingface</a></span>
+        </td>
+      </tr>
+    </tbody>
+  </table>
+</div>
+
+## LongCatImagePipeline
+
+[[autodoc]] LongCatImagePipeline
+- all
+- __call__
+
+## LongCatImagePipelineOutput
+
+[[autodoc]] pipelines.longcat_image.pipeline_output.LongCatImagePipelineOutput
+
+
+
@@ -237,6 +237,8 @@ By selectively loading and unloading the models you need at a given stage and sh
 
 Use [`~ModelMixin.set_attention_backend`] to switch to a more optimized attention backend. Refer to this [table](../optimization/attention_backends#available-backends) for a complete list of available backends.
 
+Most attention backends are compatible with context parallelism. Open an [issue](https://github.com/huggingface/diffusers/issues/new) if a backend is not compatible.
+
 ### Ring Attention
 
 Key (K) and value (V) representations communicate between devices using [Ring Attention](https://huggingface.co/papers/2310.01889). This ensures each split sees every other token's K/V. Each GPU computes attention for its local K/V and passes it to the next GPU in the ring. No single GPU holds the full sequence, which reduces communication latency.
@@ -245,38 +247,58 @@ Pass a [`ContextParallelConfig`] to the `parallel_config` argument of the transf
 
 ```py
 import torch
-from diffusers import AutoModel, QwenImagePipeline, ContextParallelConfig
-
-try:
-    torch.distributed.init_process_group("nccl")
-    rank = torch.distributed.get_rank()
-    device = torch.device("cuda", rank % torch.cuda.device_count())
+from torch import distributed as dist
+from diffusers import DiffusionPipeline, ContextParallelConfig
+
+def setup_distributed():
+    if not dist.is_initialized():
+        dist.init_process_group(backend="nccl")
+    rank = dist.get_rank()
+    device = torch.device(f"cuda:{rank}")
     torch.cuda.set_device(device)
-    
-    transformer = AutoModel.from_pretrained("Qwen/Qwen-Image", subfolder="transformer", torch_dtype=torch.bfloat16, parallel_config=ContextParallelConfig(ring_degree=2))
-    pipeline = QwenImagePipeline.from_pretrained("Qwen/Qwen-Image", transformer=transformer, torch_dtype=torch.bfloat16, device_map="cuda")
-    pipeline.transformer.set_attention_backend("flash")
+    return device
+
+def main():
+    device = setup_distributed()
+    world_size = dist.get_world_size()
+
+    pipeline = DiffusionPipeline.from_pretrained(
+        "black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16, device_map=device
+    )
+    pipeline.transformer.set_attention_backend("_native_cudnn")
+
+    cp_config = ContextParallelConfig(ring_degree=world_size)
+    pipeline.transformer.enable_parallelism(config=cp_config)
 
     prompt = """
     cinematic film still of a cat sipping a margarita in a pool in Palm Springs, California
     highly detailed, high budget hollywood movie, cinemascope, moody, epic, gorgeous, film grain
     """
-    
+
     # Must specify generator so all ranks start with same latents (or pass your own)
     generator = torch.Generator().manual_seed(42)
-    image = pipeline(prompt, num_inference_steps=50, generator=generator).images[0]
-    
-    if rank == 0:
-        image.save("output.png")
-
-except Exception as e:
-    print(f"An error occurred: {e}")
-    torch.distributed.breakpoint()
-    raise
-
-finally:
-    if torch.distributed.is_initialized():
-        torch.distributed.destroy_process_group()
+    image = pipeline(
+        prompt,
+        guidance_scale=3.5,
+        num_inference_steps=50,
+        generator=generator,
+    ).images[0]
+
+    if dist.get_rank() == 0:
+        image.save(f"output.png")
+
+    if dist.is_initialized():
+        dist.destroy_process_group()
+
+
+if __name__ == "__main__":
+    main()
+```
+
+The script above needs to be run with a distributed launcher, such as [torchrun](https://docs.pytorch.org/docs/stable/elastic/run.html), that is compatible with PyTorch. `--nproc-per-node` is set to the number of GPUs available.
+
+```shell
+torchrun --nproc-per-node 2 above_script.py
 ```
 
 ### Ulysses Attention
@@ -288,5 +310,26 @@ finally:
 Pass the [`ContextParallelConfig`] to [`~ModelMixin.enable_parallelism`].
 
 ```py
+# Depending on the number of GPUs available.
 pipeline.transformer.enable_parallelism(config=ContextParallelConfig(ulysses_degree=2))
-```
+```
+
+### parallel_config
+
+Pass `parallel_config` during model initialization to enable context parallelism.
+
+```py
+CKPT_ID = "black-forest-labs/FLUX.1-dev"
+
+cp_config = ContextParallelConfig(ring_degree=2)
+transformer = AutoModel.from_pretrained(
+    CKPT_ID, 
+    subfolder="transformer", 
+    torch_dtype=torch.bfloat16, 
+    parallel_config=cp_config
+)
+
+pipeline = DiffusionPipeline.from_pretrained(
+    CKPT_ID, transformer=transformer, torch_dtype=torch.bfloat16,
+).to(device)
+```
@@ -94,7 +94,7 @@
     import wandb
 
 # Will error if the minimal version of diffusers is not installed. Remove at your own risks.
-check_min_version("0.36.0.dev0")
+check_min_version("0.37.0.dev0")
 
 logger = get_logger(__name__)
 
 
@@ -88,7 +88,7 @@
 
 
 # Will error if the minimal version of diffusers is not installed. Remove at your own risks.
-check_min_version("0.36.0.dev0")
+check_min_version("0.37.0.dev0")
 
 logger = get_logger(__name__)
 
 
@@ -95,7 +95,7 @@
     import wandb
 
 # Will error if the minimal version of diffusers is not installed. Remove at your own risks.
-check_min_version("0.36.0.dev0")
+check_min_version("0.37.0.dev0")
 
 logger = get_logger(__name__)
 
 
@@ -61,7 +61,7 @@
     import wandb
 
 # Will error if the minimal version of diffusers is not installed. Remove at your own risks.
-check_min_version("0.36.0.dev0")
+check_min_version("0.37.0.dev0")
 
 logger = get_logger(__name__)
 
 
@@ -52,7 +52,7 @@
     import wandb
 
 # Will error if the minimal version of diffusers is not installed. Remove at your own risks.
-check_min_version("0.36.0.dev0")
+check_min_version("0.37.0.dev0")
 
 logger = get_logger(__name__)