huggingface
diff --git a/‎.github/workflows/build_documentation.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/build_documentation.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/build_pr_documentation.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/build_pr_documentation.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/pr_tests.yml‎
Lines changed: 2 additions & 0 deletions b/‎.github/workflows/pr_tests.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/source/en/_toctree.yml‎
Lines changed: 4 additions & 0 deletions b/‎docs/source/en/_toctree.yml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎docs/source/en/api/cache.md‎
Lines changed: 7 additions & 1 deletion b/‎docs/source/en/api/cache.md‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎docs/source/en/api/models/transformer_joyimage.md‎
Lines changed: 29 additions & 0 deletions b/‎docs/source/en/api/models/transformer_joyimage.md‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎docs/source/en/api/pipelines/joyimage_edit.md‎
Lines changed: 85 additions & 0 deletions b/‎docs/source/en/api/pipelines/joyimage_edit.md‎
Lines changed: 85 additions & 0 deletions
diff --git a/‎docs/source/en/optimization/cache.md‎
Lines changed: 0 additions & 2 deletions b/‎docs/source/en/optimization/cache.md‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎src/diffusers/__init__.py‎
Lines changed: 5 additions & 4 deletions b/‎src/diffusers/__init__.py‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎src/diffusers/modular_pipelines/ernie_image/encoders.py‎
Lines changed: 12 additions & 5 deletions b/‎src/diffusers/modular_pipelines/ernie_image/encoders.py‎
Lines changed: 12 additions & 5 deletions
@@ -25,6 +25,7 @@ jobs:
       notebook_folder: diffusers_doc
       languages: en ko zh ja pt
       custom_container: diffusers/diffusers-doc-builder
+      pre_command: uv pip uninstall transformers huggingface_hub && UV_PRERELEASE=allow uv pip install -U transformers@git+https://github.com/huggingface/transformers.git
     secrets:
       token: ${{ secrets.HUGGINGFACE_PUSH }}
       hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
@@ -50,3 +50,4 @@ jobs:
       package: diffusers
       languages: en ko zh ja pt
       custom_container: diffusers/diffusers-doc-builder
+      pre_command: uv pip uninstall transformers huggingface_hub && UV_PRERELEASE=allow uv pip install -U transformers@git+https://github.com/huggingface/transformers.git
@@ -194,6 +194,8 @@ jobs:
     - name: Install dependencies
       run: |
         uv pip install -e ".[quality]"
+        uv pip uninstall transformers huggingface_hub && UV_PRERELEASE=allow uv pip install -U transformers@git+https://github.com/huggingface/transformers.git
+        uv pip uninstall tokenizers && uv pip install "tokenizers<=0.23.0"
 
     - name: Environment
       run: |
 
@@ -372,6 +372,8 @@
         title: HunyuanVideo15Transformer3DModel
       - local: api/models/hunyuan_video_transformer_3d
         title: HunyuanVideoTransformer3DModel
+      - local: api/models/transformer_joyimage
+        title: JoyImageEditTransformer3DModel
       - local: api/models/latte_transformer3d
         title: LatteTransformer3DModel
       - local: api/models/longcat_image_transformer2d
@@ -560,6 +562,8 @@
         title: HunyuanImage2.1
       - local: api/pipelines/pix2pix
         title: InstructPix2Pix
+      - local: api/pipelines/joyimage_edit
+        title: JoyImage Edit
       - local: api/pipelines/kandinsky
         title: Kandinsky 2.1
       - local: api/pipelines/kandinsky_v22
 
@@ -35,8 +35,14 @@ Cache methods speedup diffusion transformers by storing and reusing intermediate
 
 [[autodoc]] apply_first_block_cache
 
-### TaylorSeerCacheConfig
+## TaylorSeerCacheConfig
 
 [[autodoc]] TaylorSeerCacheConfig
 
 [[autodoc]] apply_taylorseer_cache
+
+## MagCacheConfig
+
+[[autodoc]] MagCacheConfig
+
+[[autodoc]] apply_mag_cache
@@ -0,0 +1,29 @@
+<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+-->
+
+# JoyImageEditTransformer3DModel
+
+The model can be loaded with the following code snippet.
+
+```python
+from diffusers import JoyImageEditTransformer3DModel
+
+transformer = JoyImageEditTransformer3DModel.from_pretrained("jdopensource/JoyAI-Image-Edit-Diffusers", subfolder="transformer", torch_dtype=torch.bfloat16)
+```
+
+## JoyImageEditTransformer3DModel
+
+[[autodoc]] JoyImageEditTransformer3DModel
+
+## Transformer2DModelOutput
+
+[[autodoc]] models.modeling_outputs.Transformer2DModelOutput
@@ -0,0 +1,85 @@
+<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+-->
+
+# JoyAI-Image-Edit
+
+[JoyAI-Image](https://github.com/jd-opensource/JoyAI-Image) is a unified multimodal foundation model for image understanding, text-to-image generation, and instruction-guided image editing. It combines an 8B Multimodal Large Language Model (MLLM) with a 16B Multimodal Diffusion Transformer (MMDiT). A central principle of JoyAI-Image is the closed-loop collaboration between understanding, generation, and editing.
+
+JoyAI-Image-Edit supports general image editing as well as spatial editing capabilities including object move, object rotation, and camera control.
+
+| Model | Description | Download |
+|:-----:|:-----------:|:--------:|
+| JoyAI-Image-Edit | Instruction-guided image editing with precise and controllable spatial manipulation | [Hugging Face](https://huggingface.co/jdopensource/JoyAI-Image-Edit-Diffusers) |
+
+```python
+import torch
+from diffusers import JoyImageEditPipeline
+from diffusers.utils import load_image
+
+pipeline = JoyImageEditPipeline.from_pretrained(
+    "jdopensource/JoyAI-Image-Edit-Diffusers", torch_dtype=torch.bfloat16
+)
+pipeline.to("cuda")
+
+image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/astronaut.jpg")
+prompt = "Add wings to the astronaut."
+
+output = pipeline(
+    image=image,
+    prompt=prompt,
+    num_inference_steps=40,
+    guidance_scale=4.0,
+    generator=torch.Generator("cuda").manual_seed(0),
+).images[0]
+output.save("joyimage_edit_output.png")
+```
+
+## Spatial editing
+
+JoyAI-Image supports three spatial editing prompt patterns: **Object Move**, **Object Rotation**, and **Camera Control**. For best results, follow the prompt templates below as closely as possible. For more information, refer to [SpatialEdit](https://github.com/EasonXiao-888/SpatialEdit).
+
+### Object Move
+
+Move a target object into a specified region marked by a red box in the input image.
+
+```text
+Move the <object> into the red box and finally remove the red box.
+```
+
+### Object Rotation
+
+Rotate an object to a specific canonical view. Supported `<view>` values: `front`, `right`, `left`, `rear`, `front right`, `front left`, `rear right`, `rear left`.
+
+```text
+Rotate the <object> to show the <view> side view.
+```
+
+### Camera Control
+
+Change the camera viewpoint while keeping the 3D scene unchanged.
+
+```text
+Move the camera.
+- Camera rotation: Yaw {y_rotation}°, Pitch {p_rotation}°.
+- Camera zoom: in/out/unchanged.
+- Keep the 3D scene static; only change the viewpoint.
+```
+
+## JoyImageEditPipeline
+
+[[autodoc]] JoyImageEditPipeline
+  - all
+  - __call__
+
+## JoyImageEditPipelineOutput
+
+[[autodoc]] pipelines.joyimage.pipeline_output.JoyImageEditPipelineOutput
@@ -118,8 +118,6 @@ pipe.transformer.enable_cache(config)
 
 MagCache relies on **Magnitude Ratios** (`mag_ratios`), which describe this decay curve. These ratios are specific to the model checkpoint and scheduler.
 
-### Usage
-
 To use MagCache, you typically follow a two-step process: **Calibration** and **Inference**.
 
 1.  **Calibration**: Run inference once with `calibrate=True`. The hook will measure the residual magnitudes and print the calculated ratios to the console.
 
@@ -22,6 +22,7 @@
     is_torchao_available,
     is_torchsde_available,
     is_transformers_available,
+    is_transformers_flax_compatible,
     is_transformers_version,
 )
 
@@ -861,7 +862,6 @@
     _import_structure["models.modeling_flax_utils"] = ["FlaxModelMixin"]
     _import_structure["models.unets.unet_2d_condition_flax"] = ["FlaxUNet2DConditionModel"]
     _import_structure["models.vae_flax"] = ["FlaxAutoencoderKL"]
-    _import_structure["pipelines"].extend(["FlaxDiffusionPipeline"])
     _import_structure["schedulers"].extend(
         [
             "FlaxDDIMScheduler",
@@ -878,7 +878,7 @@
 
 
 try:
-    if not (is_flax_available() and is_transformers_available()):
+    if not (is_flax_available() and is_transformers_available() and is_transformers_flax_compatible()):
         raise OptionalDependencyNotAvailable()
 except OptionalDependencyNotAvailable:
     from .utils import dummy_flax_and_transformers_objects  # noqa F403
@@ -891,6 +891,7 @@
 else:
     _import_structure["pipelines"].extend(
         [
+            "FlaxDiffusionPipeline",
             "FlaxStableDiffusionControlNetPipeline",
             "FlaxStableDiffusionImg2ImgPipeline",
             "FlaxStableDiffusionInpaintPipeline",
@@ -1620,7 +1621,6 @@
         from .models.modeling_flax_utils import FlaxModelMixin
         from .models.unets.unet_2d_condition_flax import FlaxUNet2DConditionModel
         from .models.vae_flax import FlaxAutoencoderKL
-        from .pipelines import FlaxDiffusionPipeline
         from .schedulers import (
             FlaxDDIMScheduler,
             FlaxDDPMScheduler,
@@ -1634,12 +1634,13 @@
         )
 
     try:
-        if not (is_flax_available() and is_transformers_available()):
+        if not (is_flax_available() and is_transformers_available() and is_transformers_flax_compatible()):
             raise OptionalDependencyNotAvailable()
     except OptionalDependencyNotAvailable:
         from .utils.dummy_flax_and_transformers_objects import *  # noqa F403
     else:
         from .pipelines import (
+            FlaxDiffusionPipeline,
             FlaxStableDiffusionControlNetPipeline,
             FlaxStableDiffusionImg2ImgPipeline,
             FlaxStableDiffusionInpaintPipeline,
 
@@ -15,16 +15,23 @@
 import json
 
 import torch
-from transformers import AutoModel, AutoModelForCausalLM, AutoTokenizer
+from transformers import AutoTokenizer, Mistral3Model
 
 from ...configuration_utils import FrozenDict
 from ...guiders import ClassifierFreeGuidance
 from ...utils import logging
+from ...utils.import_utils import is_transformers_version
 from ..modular_pipeline import ModularPipelineBlocks, PipelineState
 from ..modular_pipeline_utils import ComponentSpec, InputParam, OutputParam
 from .modular_pipeline import ErnieImageModularPipeline
 
 
+if is_transformers_version("<", "5.0.0"):
+    raise ImportError("`ErnieImageModularPipeline` requires `transformers>=5.0.0` for `Ministral3ForCausalLM`.")
+
+from transformers import Ministral3ForCausalLM  # noqa: E402
+
+
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
 
 
@@ -38,7 +45,7 @@ def description(self) -> str:
     @property
     def expected_components(self) -> list[ComponentSpec]:
         return [
-            ComponentSpec("pe", AutoModelForCausalLM),
+            ComponentSpec("pe", Ministral3ForCausalLM),
             ComponentSpec("pe_tokenizer", AutoTokenizer),
         ]
 
@@ -83,7 +90,7 @@ def intermediate_outputs(self) -> list[OutputParam]:
 
     @staticmethod
     def _enhance_prompt(
-        pe: AutoModelForCausalLM,
+        pe: Ministral3ForCausalLM,
         pe_tokenizer: AutoTokenizer,
         prompt: str,
         device: torch.device,
@@ -160,7 +167,7 @@ def description(self) -> str:
     @property
     def expected_components(self) -> list[ComponentSpec]:
         return [
-            ComponentSpec("text_encoder", AutoModel),
+            ComponentSpec("text_encoder", Mistral3Model),
             ComponentSpec("tokenizer", AutoTokenizer),
             ComponentSpec(
                 "guider",
@@ -200,7 +207,7 @@ def intermediate_outputs(self) -> list[OutputParam]:
 
     @staticmethod
     def _encode(
-        text_encoder: AutoModel,
+        text_encoder: Mistral3Model,
         tokenizer: AutoTokenizer,
         prompt: list[str],
         device: torch.device,