@@ -30,51 +30,100 @@ cd /opt/Automodel
3030RECIPE_NAME=$( basename " $CONFIG_PATH " .yaml)
3131case " $RECIPE_NAME " in
3232 wan2_1_t2v_flow* )
33+ MEDIA_TYPE=" video"
3334 PROCESSOR=" wan"
3435 GENERATE_CONFIG=" examples/diffusion/generate/configs/generate_wan.yaml"
3536 MODEL_NAME=" Wan-AI/Wan2.1-T2V-1.3B-Diffusers"
3637 INFER_NUM_FRAMES=9
3738 PREPROCESS_EXTRA_ARGS=" "
3839 ;;
3940 hunyuan_t2v_flow* )
41+ MEDIA_TYPE=" video"
4042 PROCESSOR=" hunyuan"
4143 GENERATE_CONFIG=" examples/diffusion/generate/configs/generate_hunyuan.yaml"
4244 MODEL_NAME=" hunyuanvideo-community/HunyuanVideo-1.5-Diffusers-720p_t2v"
4345 INFER_NUM_FRAMES=5
4446 PREPROCESS_EXTRA_ARGS=" --target_frames 13"
4547 ;;
48+ flux_t2i_flow* )
49+ MEDIA_TYPE=" image"
50+ PROCESSOR=" flux"
51+ GENERATE_CONFIG=" examples/diffusion/generate/configs/generate_flux.yaml"
52+ MODEL_NAME=" black-forest-labs/FLUX.1-dev"
53+ PREPROCESS_EXTRA_ARGS=" "
54+ ;;
55+ qwen_image_t2i_flow* )
56+ MEDIA_TYPE=" image"
57+ PROCESSOR=" qwen_image"
58+ GENERATE_CONFIG=" examples/diffusion/generate/configs/generate_qwen_image.yaml"
59+ MODEL_NAME=" Qwen/Qwen-Image"
60+ PREPROCESS_EXTRA_ARGS=" "
61+ ;;
4662 * )
4763 echo " ERROR: Unknown recipe '$RECIPE_NAME '. Add a case to diffusion_finetune_launcher.sh."
4864 exit 1
4965 ;;
5066esac
51- echo " [config] Recipe=$RECIPE_NAME Processor=$PROCESSOR Model=$MODEL_NAME "
67+ echo " [config] Recipe=$RECIPE_NAME MediaType= $MEDIA_TYPE Processor=$PROCESSOR Model=$MODEL_NAME "
5268
5369# ============================================
54- # Stage 1: Download dissolve dataset
70+ # Stage 1: Download dataset
5571# ============================================
5672echo " ============================================"
57- echo " [data] Downloading dissolve dataset..."
73+ echo " [data] Downloading dataset..."
5874echo " ============================================"
59- uv run --extra diffusion python -c "
75+ if [ " $MEDIA_TYPE " = " image" ]; then
76+ uv run --extra diffusion python -c "
77+ from datasets import load_dataset
78+ from pathlib import Path
79+ import json
80+
81+ ds = load_dataset('diffusers/tuxemon', split='train')
82+ out_dir = Path('$DATA_DIR /raw')
83+ out_dir.mkdir(parents=True, exist_ok=True)
84+
85+ jsonl_entries = []
86+ for i, row in enumerate(ds):
87+ fname = f'tuxemon_sample_{i:04d}.png'
88+ row['image'].save(out_dir / fname)
89+ jsonl_entries.append({'file_name': fname, 'internvl': row['gpt4_turbo_caption']})
90+
91+ jsonl_path = out_dir / 'tuxemon_internvl.json'
92+ with open(jsonl_path, 'w') as jf:
93+ for entry in jsonl_entries:
94+ jf.write(json.dumps(entry) + '\n')
95+
96+ print(f'Extracted {len(ds)} images to {out_dir}')
97+ "
98+ else
99+ uv run --extra diffusion python -c "
60100from huggingface_hub import snapshot_download
61101snapshot_download('modal-labs/dissolve', repo_type='dataset', local_dir='$DATA_DIR /raw')
62102print('Dataset downloaded successfully')
63103"
104+ fi
64105
65106# ============================================
66- # Stage 2: Preprocess videos to latents
107+ # Stage 2: Preprocess to latents
67108# ============================================
68109echo " ============================================"
69- echo " [preprocess] Converting videos to latents..."
110+ echo " [preprocess] Converting ${MEDIA_TYPE} s to latents..."
70111echo " ============================================"
71- uv run --extra diffusion python -m tools.diffusion.preprocessing_multiprocess video \
72- --video_dir " $DATA_DIR /raw" \
73- --output_dir " $DATA_DIR /cache" \
74- --processor " $PROCESSOR " \
75- --resolution_preset 512p \
76- --caption_format sidecar \
77- $PREPROCESS_EXTRA_ARGS
112+ if [ " $MEDIA_TYPE " = " image" ]; then
113+ uv run --extra diffusion python -m tools.diffusion.preprocessing_multiprocess image \
114+ --image_dir " $DATA_DIR /raw" \
115+ --output_dir " $DATA_DIR /cache" \
116+ --processor " $PROCESSOR " \
117+ $PREPROCESS_EXTRA_ARGS
118+ else
119+ uv run --extra diffusion python -m tools.diffusion.preprocessing_multiprocess video \
120+ --video_dir " $DATA_DIR /raw" \
121+ --output_dir " $DATA_DIR /cache" \
122+ --processor " $PROCESSOR " \
123+ --resolution_preset 512p \
124+ --caption_format sidecar \
125+ $PREPROCESS_EXTRA_ARGS
126+ fi
78127
79128# ============================================
80129# Stage 3: Finetune
@@ -107,20 +156,37 @@ echo "[inference] Running inference smoke test..."
107156echo " ============================================"
108157CKPT_STEP_DIR=$( ls -d $CKPT_DIR /epoch_* _step_* | sort -t_ -k4 -n | tail -1)
109158
110- uv run --extra diffusion python examples/diffusion/generate/generate.py \
111- --config " $GENERATE_CONFIG " \
112- --model.pretrained_model_name_or_path " $MODEL_NAME " \
113- --model.checkpoint " $CKPT_STEP_DIR " \
114- --inference.num_inference_steps 5 \
115- --inference.pipeline_kwargs.num_frames " $INFER_NUM_FRAMES " \
116- --output.output_dir " $INFER_DIR " \
117- --vae.enable_slicing true \
118- --vae.enable_tiling true
119-
120- # Verify output
121- if ls $INFER_DIR /sample_* .mp4 1> /dev/null 2>&1 ; then
122- echo " [inference] SUCCESS: Output video(s) generated"
159+ if [ " $MEDIA_TYPE " = " image" ]; then
160+ uv run --extra diffusion python examples/diffusion/generate/generate.py \
161+ --config " $GENERATE_CONFIG " \
162+ --model.pretrained_model_name_or_path " $MODEL_NAME " \
163+ --model.checkpoint " $CKPT_STEP_DIR " \
164+ --inference.num_inference_steps 5 \
165+ --output.output_dir " $INFER_DIR " \
166+ --vae.enable_slicing true \
167+ --vae.enable_tiling true
168+
169+ if ls $INFER_DIR /sample_* .png 1> /dev/null 2>&1 ; then
170+ echo " [inference] SUCCESS: Output image(s) generated"
171+ else
172+ echo " [inference] FAILURE: No output images found"
173+ exit 1
174+ fi
123175else
124- echo " [inference] FAILURE: No output videos found"
125- exit 1
176+ uv run --extra diffusion python examples/diffusion/generate/generate.py \
177+ --config " $GENERATE_CONFIG " \
178+ --model.pretrained_model_name_or_path " $MODEL_NAME " \
179+ --model.checkpoint " $CKPT_STEP_DIR " \
180+ --inference.num_inference_steps 5 \
181+ --inference.pipeline_kwargs.num_frames " $INFER_NUM_FRAMES " \
182+ --output.output_dir " $INFER_DIR " \
183+ --vae.enable_slicing true \
184+ --vae.enable_tiling true
185+
186+ if ls $INFER_DIR /sample_* .mp4 1> /dev/null 2>&1 ; then
187+ echo " [inference] SUCCESS: Output video(s) generated"
188+ else
189+ echo " [inference] FAILURE: No output videos found"
190+ exit 1
191+ fi
126192fi
0 commit comments