Skip to content

Commit 3bad2ff

Browse files
committed
feat: add KV caching support for Wan and VACE models, fix pyconfig getattr and fix jax tree map
1 parent 79cd005 commit 3bad2ff

19 files changed

Lines changed: 1278 additions & 170 deletions

src/maxdiffusion/configs/base_wan_14b.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,7 @@ use_cfg_cache: False
355355
# Batch positive and negative prompts in text encoder to save compute.
356356
use_batched_text_encoder: False
357357

358+
use_kv_cache: False
358359
use_magcache: False
359360
magcache_thresh: 0.12
360361
magcache_K: 2

src/maxdiffusion/configs/base_wan_1_3b.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,7 @@ flow_shift: 3.0
301301

302302
# Diffusion CFG cache (FasterCache-style, WAN 2.1 T2V only)
303303
use_cfg_cache: False
304+
use_kv_cache: False
304305

305306
# Batch positive and negative prompts in text encoder to save compute.
306307
use_batched_text_encoder: False

src/maxdiffusion/configs/base_wan_27b.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ use_cfg_cache: False
331331
# Batch positive and negative prompts in text encoder to save compute.
332332
use_batched_text_encoder: False
333333

334-
334+
use_kv_cache: False
335335
# SenCache: Sensitivity-Aware Caching (arXiv:2602.24208) — skip forward pass
336336
# when predicted output change (based on accumulated latent/timestep drift) is small
337337
use_sen_cache: False

src/maxdiffusion/configs/base_wan_i2v_14b.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ profiler_steps: 10
302302
enable_jax_named_scopes: False
303303

304304
# Generation parameters
305-
prompt: "An astronaut hatching from an egg, on the surface of the moon, the darkness and depth of space realised in the background. High quality, ultrarealistic detail and breath-taking movie-like camera shot." #LoRA prompt "An astronaut hatching from an egg, on the surface of the moon, the darkness and depth of space realised in the background. Appearing behind him is a giant, translucent, pink spiritual manifestation (faxiang) that is synchronized with the man's action and pose."
305+
prompt: "An astronaut hatching from an egg, on the surface of the moon, the darkness and depth of space realised in the background. They are raising their left arm for a thumbs up. High quality, ultrarealistic detail and breath-taking movie-like camera shot." #LoRA prompt "An astronaut hatching from an egg, on the surface of the moon, the darkness and depth of space realised in the background. Appearing behind him is a giant, translucent, pink spiritual manifestation (faxiang) that is synchronized with the man's action and pose."
306306
prompt_2: "An astronaut hatching from an egg, on the surface of the moon, the darkness and depth of space realised in the background. High quality, ultrarealistic detail and breath-taking movie-like camera shot." #LoRA prompt "An astronaut hatching from an egg, on the surface of the moon, the darkness and depth of space realised in the background. Appearing behind him is a giant, translucent, pink spiritual manifestation (faxiang) that is synchronized with the man's action and pose."
307307
negative_prompt: "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"
308308
do_classifier_free_guidance: True
@@ -318,7 +318,7 @@ use_cfg_cache: False
318318
# Batch positive and negative prompts in text encoder to save compute.
319319
use_batched_text_encoder: False
320320

321-
321+
use_kv_cache: False
322322
# SenCache: Sensitivity-Aware Caching (arXiv:2602.24208)
323323
use_sen_cache: False
324324
use_magcache: False

src/maxdiffusion/configs/base_wan_i2v_27b.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ profiler_steps: 10
303303
enable_jax_named_scopes: False
304304

305305
# Generation parameters
306-
prompt: "An astronaut hatching from an egg, on the surface of the moon, the darkness and depth of space realised in the background. High quality, ultrarealistic detail and breath-taking movie-like camera shot." #LoRA prompt "orbit 180 around an astronaut on the moon, the darkness and depth of space realised in the background. High quality, ultrarealistic detail and breath-taking movie-like camera shot."
306+
prompt: "An astronaut hatching from an egg, on the surface of the moon, the darkness and depth of space realised in the background. They are raising their left arm for a thumbs up. High quality, ultrarealistic detail and breath-taking movie-like camera shot." #LoRA prompt "orbit 180 around an astronaut on the moon, the darkness and depth of space realised in the background. High quality, ultrarealistic detail and breath-taking movie-like camera shot."
307307
prompt_2: "An astronaut hatching from an egg, on the surface of the moon, the darkness and depth of space realised in the background. High quality, ultrarealistic detail and breath-taking movie-like camera shot." #LoRA prompt "orbit 180 around an astronaut on the moon, the darkness and depth of space realised in the background. High quality, ultrarealistic detail and breath-taking movie-like camera shot."
308308
negative_prompt: "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"
309309
do_classifier_free_guidance: True
@@ -330,7 +330,7 @@ use_cfg_cache: False
330330
# Batch positive and negative prompts in text encoder to save compute.
331331
use_batched_text_encoder: False
332332

333-
333+
use_kv_cache: False
334334
# SenCache: Sensitivity-Aware Caching (arXiv:2602.24208)
335335
use_sen_cache: False
336336

src/maxdiffusion/generate_wan.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ def call_pipeline(config, pipeline, prompt, negative_prompt):
104104
magcache_thresh=config.magcache_thresh,
105105
magcache_K=config.magcache_K,
106106
retention_ratio=config.retention_ratio,
107+
use_kv_cache=config.use_kv_cache,
107108
)
108109
elif model_key == WAN2_2:
109110
return pipeline(
@@ -118,6 +119,7 @@ def call_pipeline(config, pipeline, prompt, negative_prompt):
118119
guidance_scale_high=config.guidance_scale_high,
119120
use_cfg_cache=config.use_cfg_cache,
120121
use_sen_cache=config.use_sen_cache,
122+
use_kv_cache=config.use_kv_cache,
121123
)
122124
else:
123125
raise ValueError(f"Unsupported model_name for I2V in config: {model_key}")
@@ -136,6 +138,7 @@ def call_pipeline(config, pipeline, prompt, negative_prompt):
136138
magcache_thresh=config.magcache_thresh,
137139
magcache_K=config.magcache_K,
138140
retention_ratio=config.retention_ratio,
141+
use_kv_cache=config.use_kv_cache,
139142
)
140143
elif model_key == WAN2_2:
141144
return pipeline(
@@ -149,9 +152,10 @@ def call_pipeline(config, pipeline, prompt, negative_prompt):
149152
guidance_scale_high=config.guidance_scale_high,
150153
use_cfg_cache=config.use_cfg_cache,
151154
use_sen_cache=config.use_sen_cache,
155+
use_kv_cache=config.use_kv_cache,
152156
)
153157
else:
154-
raise ValueError(f"Unsupported model_name for T2Vin config: {model_key}")
158+
raise ValueError(f"Unsupported model_name for T2V in config: {model_key}")
155159

156160

157161
def inference_generate_video(config, pipeline, filename_prefix=""):

0 commit comments

Comments
 (0)