File tree Expand file tree Collapse file tree
backends/mlx/examples/llm Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -216,11 +216,15 @@ def _export_with_custom_components(
216216 torch_dtype = torch_dtype_map .get (dtype , torch .bfloat16 )
217217
218218 effective_use_custom_sdpa = use_custom_sdpa
219+ effective_use_custom_kv_cache = use_custom_kv_cache
219220 if model_id == _GEMMA4_MODEL_ID and use_custom_sdpa :
220221 logger .info (
221222 "Disabling custom SDPA for Gemma 4 while keeping the custom cache path"
222223 )
223224 effective_use_custom_sdpa = False
225+ if model_id == _GEMMA4_MODEL_ID and use_custom_kv_cache :
226+ logger .info ("Disabling custom KV cache for Gemma 4" )
227+ effective_use_custom_kv_cache = False
224228
225229 if effective_use_custom_sdpa :
226230 from executorch .backends .mlx .llm .hf_attention import register_mlx_attention
@@ -288,7 +292,7 @@ def _export_with_custom_components(
288292 max_cache_len = effective_cache_len ,
289293 )
290294
291- if use_custom_kv_cache :
295+ if effective_use_custom_kv_cache :
292296 from executorch .backends .mlx .llm .source_transformation import (
293297 replace_hf_cache_with_mlx ,
294298 )
You can’t perform that action at this time.
0 commit comments