Skip to content

Commit 94e653d

Browse files
Add documentation and improve test stability for ZImageInpaintPipeline
- Add torch.empty fix for x_pad_token and cap_pad_token in test - Add # Copied from annotations for encode_prompt methods - Add documentation with usage example and autodoc directive
1 parent d747284 commit 94e653d

3 files changed

Lines changed: 49 additions & 0 deletions

File tree

docs/source/en/api/pipelines/z_image.md

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,41 @@ image = pipe(
5353
image.save("zimage_img2img.png")
5454
```
5555

56+
## Inpainting
57+
58+
Use [`ZImageInpaintPipeline`] to inpaint specific regions of an image based on a text prompt and mask.
59+
60+
```python
61+
import torch
62+
import numpy as np
63+
from PIL import Image
64+
from diffusers import ZImageInpaintPipeline
65+
from diffusers.utils import load_image
66+
67+
pipe = ZImageInpaintPipeline.from_pretrained("Tongyi-MAI/Z-Image-Turbo", torch_dtype=torch.bfloat16)
68+
pipe.to("cuda")
69+
70+
url = "https://raw.githubusercontent.com/CompVis/stable-diffusion/main/assets/stable-samples/img2img/sketch-mountains-input.jpg"
71+
init_image = load_image(url).resize((1024, 1024))
72+
73+
# Create a mask (white = inpaint, black = preserve)
74+
mask = np.zeros((1024, 1024), dtype=np.uint8)
75+
mask[256:768, 256:768] = 255 # Inpaint center region
76+
mask_image = Image.fromarray(mask)
77+
78+
prompt = "A beautiful lake with mountains in the background"
79+
image = pipe(
80+
prompt,
81+
image=init_image,
82+
mask_image=mask_image,
83+
strength=1.0,
84+
num_inference_steps=9,
85+
guidance_scale=0.0,
86+
generator=torch.Generator("cuda").manual_seed(42),
87+
).images[0]
88+
image.save("zimage_inpaint.png")
89+
```
90+
5691
## ZImagePipeline
5792

5893
[[autodoc]] ZImagePipeline
@@ -64,3 +99,9 @@ image.save("zimage_img2img.png")
6499
[[autodoc]] ZImageImg2ImgPipeline
65100
- all
66101
- __call__
102+
103+
## ZImageInpaintPipeline
104+
105+
[[autodoc]] ZImageInpaintPipeline
106+
- all
107+
- __call__

src/diffusers/pipelines/z_image/pipeline_z_image_inpaint.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,7 @@ def __init__(
204204
do_convert_grayscale=True,
205205
)
206206

207+
# Copied from diffusers.pipelines.z_image.pipeline_z_image.ZImagePipeline.encode_prompt
207208
def encode_prompt(
208209
self,
209210
prompt: Union[str, List[str]],
@@ -238,6 +239,7 @@ def encode_prompt(
238239
negative_prompt_embeds = []
239240
return prompt_embeds, negative_prompt_embeds
240241

242+
# Copied from diffusers.pipelines.z_image.pipeline_z_image.ZImagePipeline._encode_prompt
241243
def _encode_prompt(
242244
self,
243245
prompt: Union[str, List[str]],

tests/pipelines/z_image/test_z_image_inpaint.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,12 @@ def get_dummy_components(self):
109109
axes_dims=[8, 4, 4],
110110
axes_lens=[256, 32, 32],
111111
)
112+
# `x_pad_token` and `cap_pad_token` are initialized with `torch.empty`.
113+
# This can cause NaN data values in our testing environment. Fixating them
114+
# helps prevent that issue.
115+
with torch.no_grad():
116+
transformer.x_pad_token.copy_(torch.ones_like(transformer.x_pad_token.data))
117+
transformer.cap_pad_token.copy_(torch.ones_like(transformer.cap_pad_token.data))
112118

113119
torch.manual_seed(0)
114120
vae = AutoencoderKL(

0 commit comments

Comments
 (0)