Skip to content

Commit 7f19d76

Browse files
pthombreclaude
andcommitted
feat: add Flux and QwenImage T2I nightly CI tests
Extend the diffusion nightly CI pipeline to support text-to-image models (Flux and QwenImage) alongside the existing text-to-video models (Wan, HunyuanVideo). Uses the diffusers/tuxemon dataset for image CI smoke tests. Changes: - Add MEDIA_TYPE branching in launcher for image vs video stages - Add tuxemon dataset download/extraction with JSONL captions - Add image preprocessing and .png inference verification paths - Add ci: sections to flux_t2i_flow.yaml and qwen_image_t2i_flow.yaml - Register QwenImagePipeline in generate.py output type mapping Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Signed-off-by: Pranav Prashant Thombre <pthombre@nvidia.com>
1 parent 36cc5f2 commit 7f19d76

5 files changed

Lines changed: 105 additions & 28 deletions

File tree

examples/diffusion/finetune/flux_t2i_flow.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,3 +79,7 @@ dist_env:
7979
init_method: "env://"
8080

8181
seed: 42
82+
83+
ci:
84+
recipe_owner: pthombre
85+
time: "00:30:00"

examples/diffusion/finetune/qwen_image_t2i_flow.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,7 @@ dist_env:
7878
init_method: "env://"
7979

8080
seed: 42
81+
82+
ci:
83+
recipe_owner: pthombre
84+
time: "00:30:00"

examples/diffusion/generate/generate.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
# Pipeline class name -> output type mapping
5454
_PIPELINE_OUTPUT_TYPES = {
5555
"FluxPipeline": "image",
56+
"QwenImagePipeline": "image",
5657
"WanPipeline": "video",
5758
"HunyuanVideoPipeline": "video",
5859
"HunyuanVideo15Pipeline": "video",

tests/ci_tests/configs/diffusion_finetune/nightly_recipes.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,5 @@ examples_dir: diffusion/finetune
1616
configs:
1717
- wan2_1_t2v_flow.yaml
1818
- hunyuan_t2v_flow.yaml
19+
- flux_t2i_flow.yaml
20+
- qwen_image_t2i_flow.yaml

tests/ci_tests/scripts/diffusion_finetune_launcher.sh

Lines changed: 94 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -30,51 +30,100 @@ cd /opt/Automodel
3030
RECIPE_NAME=$(basename "$CONFIG_PATH" .yaml)
3131
case "$RECIPE_NAME" in
3232
wan2_1_t2v_flow*)
33+
MEDIA_TYPE="video"
3334
PROCESSOR="wan"
3435
GENERATE_CONFIG="examples/diffusion/generate/configs/generate_wan.yaml"
3536
MODEL_NAME="Wan-AI/Wan2.1-T2V-1.3B-Diffusers"
3637
INFER_NUM_FRAMES=9
3738
PREPROCESS_EXTRA_ARGS=""
3839
;;
3940
hunyuan_t2v_flow*)
41+
MEDIA_TYPE="video"
4042
PROCESSOR="hunyuan"
4143
GENERATE_CONFIG="examples/diffusion/generate/configs/generate_hunyuan.yaml"
4244
MODEL_NAME="hunyuanvideo-community/HunyuanVideo-1.5-Diffusers-720p_t2v"
4345
INFER_NUM_FRAMES=5
4446
PREPROCESS_EXTRA_ARGS="--target_frames 13"
4547
;;
48+
flux_t2i_flow*)
49+
MEDIA_TYPE="image"
50+
PROCESSOR="flux"
51+
GENERATE_CONFIG="examples/diffusion/generate/configs/generate_flux.yaml"
52+
MODEL_NAME="black-forest-labs/FLUX.1-dev"
53+
PREPROCESS_EXTRA_ARGS=""
54+
;;
55+
qwen_image_t2i_flow*)
56+
MEDIA_TYPE="image"
57+
PROCESSOR="qwen_image"
58+
GENERATE_CONFIG="examples/diffusion/generate/configs/generate_qwen_image.yaml"
59+
MODEL_NAME="Qwen/Qwen-Image"
60+
PREPROCESS_EXTRA_ARGS=""
61+
;;
4662
*)
4763
echo "ERROR: Unknown recipe '$RECIPE_NAME'. Add a case to diffusion_finetune_launcher.sh."
4864
exit 1
4965
;;
5066
esac
51-
echo "[config] Recipe=$RECIPE_NAME Processor=$PROCESSOR Model=$MODEL_NAME"
67+
echo "[config] Recipe=$RECIPE_NAME MediaType=$MEDIA_TYPE Processor=$PROCESSOR Model=$MODEL_NAME"
5268

5369
# ============================================
54-
# Stage 1: Download dissolve dataset
70+
# Stage 1: Download dataset
5571
# ============================================
5672
echo "============================================"
57-
echo "[data] Downloading dissolve dataset..."
73+
echo "[data] Downloading dataset..."
5874
echo "============================================"
59-
uv run --extra diffusion python -c "
75+
if [ "$MEDIA_TYPE" = "image" ]; then
76+
uv run --extra diffusion python -c "
77+
from datasets import load_dataset
78+
from pathlib import Path
79+
import json
80+
81+
ds = load_dataset('diffusers/tuxemon', split='train')
82+
out_dir = Path('$DATA_DIR/raw')
83+
out_dir.mkdir(parents=True, exist_ok=True)
84+
85+
jsonl_entries = []
86+
for i, row in enumerate(ds):
87+
fname = f'tuxemon_sample_{i:04d}.png'
88+
row['image'].save(out_dir / fname)
89+
jsonl_entries.append({'file_name': fname, 'internvl': row['gpt4_turbo_caption']})
90+
91+
jsonl_path = out_dir / 'tuxemon_internvl.json'
92+
with open(jsonl_path, 'w') as jf:
93+
for entry in jsonl_entries:
94+
jf.write(json.dumps(entry) + '\n')
95+
96+
print(f'Extracted {len(ds)} images to {out_dir}')
97+
"
98+
else
99+
uv run --extra diffusion python -c "
60100
from huggingface_hub import snapshot_download
61101
snapshot_download('modal-labs/dissolve', repo_type='dataset', local_dir='$DATA_DIR/raw')
62102
print('Dataset downloaded successfully')
63103
"
104+
fi
64105

65106
# ============================================
66-
# Stage 2: Preprocess videos to latents
107+
# Stage 2: Preprocess to latents
67108
# ============================================
68109
echo "============================================"
69-
echo "[preprocess] Converting videos to latents..."
110+
echo "[preprocess] Converting ${MEDIA_TYPE}s to latents..."
70111
echo "============================================"
71-
uv run --extra diffusion python -m tools.diffusion.preprocessing_multiprocess video \
72-
--video_dir "$DATA_DIR/raw" \
73-
--output_dir "$DATA_DIR/cache" \
74-
--processor "$PROCESSOR" \
75-
--resolution_preset 512p \
76-
--caption_format sidecar \
77-
$PREPROCESS_EXTRA_ARGS
112+
if [ "$MEDIA_TYPE" = "image" ]; then
113+
uv run --extra diffusion python -m tools.diffusion.preprocessing_multiprocess image \
114+
--image_dir "$DATA_DIR/raw" \
115+
--output_dir "$DATA_DIR/cache" \
116+
--processor "$PROCESSOR" \
117+
$PREPROCESS_EXTRA_ARGS
118+
else
119+
uv run --extra diffusion python -m tools.diffusion.preprocessing_multiprocess video \
120+
--video_dir "$DATA_DIR/raw" \
121+
--output_dir "$DATA_DIR/cache" \
122+
--processor "$PROCESSOR" \
123+
--resolution_preset 512p \
124+
--caption_format sidecar \
125+
$PREPROCESS_EXTRA_ARGS
126+
fi
78127

79128
# ============================================
80129
# Stage 3: Finetune
@@ -107,20 +156,37 @@ echo "[inference] Running inference smoke test..."
107156
echo "============================================"
108157
CKPT_STEP_DIR=$(ls -d $CKPT_DIR/epoch_*_step_* | sort -t_ -k4 -n | tail -1)
109158

110-
uv run --extra diffusion python examples/diffusion/generate/generate.py \
111-
--config "$GENERATE_CONFIG" \
112-
--model.pretrained_model_name_or_path "$MODEL_NAME" \
113-
--model.checkpoint "$CKPT_STEP_DIR" \
114-
--inference.num_inference_steps 5 \
115-
--inference.pipeline_kwargs.num_frames "$INFER_NUM_FRAMES" \
116-
--output.output_dir "$INFER_DIR" \
117-
--vae.enable_slicing true \
118-
--vae.enable_tiling true
119-
120-
# Verify output
121-
if ls $INFER_DIR/sample_*.mp4 1>/dev/null 2>&1; then
122-
echo "[inference] SUCCESS: Output video(s) generated"
159+
if [ "$MEDIA_TYPE" = "image" ]; then
160+
uv run --extra diffusion python examples/diffusion/generate/generate.py \
161+
--config "$GENERATE_CONFIG" \
162+
--model.pretrained_model_name_or_path "$MODEL_NAME" \
163+
--model.checkpoint "$CKPT_STEP_DIR" \
164+
--inference.num_inference_steps 5 \
165+
--output.output_dir "$INFER_DIR" \
166+
--vae.enable_slicing true \
167+
--vae.enable_tiling true
168+
169+
if ls $INFER_DIR/sample_*.png 1>/dev/null 2>&1; then
170+
echo "[inference] SUCCESS: Output image(s) generated"
171+
else
172+
echo "[inference] FAILURE: No output images found"
173+
exit 1
174+
fi
123175
else
124-
echo "[inference] FAILURE: No output videos found"
125-
exit 1
176+
uv run --extra diffusion python examples/diffusion/generate/generate.py \
177+
--config "$GENERATE_CONFIG" \
178+
--model.pretrained_model_name_or_path "$MODEL_NAME" \
179+
--model.checkpoint "$CKPT_STEP_DIR" \
180+
--inference.num_inference_steps 5 \
181+
--inference.pipeline_kwargs.num_frames "$INFER_NUM_FRAMES" \
182+
--output.output_dir "$INFER_DIR" \
183+
--vae.enable_slicing true \
184+
--vae.enable_tiling true
185+
186+
if ls $INFER_DIR/sample_*.mp4 1>/dev/null 2>&1; then
187+
echo "[inference] SUCCESS: Output video(s) generated"
188+
else
189+
echo "[inference] FAILURE: No output videos found"
190+
exit 1
191+
fi
126192
fi

0 commit comments

Comments
 (0)