Skip to content

Commit 9d3f841

Browse files
committed
Disable custom cache path for Gemma 4 MLX export
1 parent 41e3a51 commit 9d3f841

1 file changed

Lines changed: 5 additions & 1 deletion

File tree

backends/mlx/examples/llm/export_llm_hf.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,11 +216,15 @@ def _export_with_custom_components(
216216
torch_dtype = torch_dtype_map.get(dtype, torch.bfloat16)
217217

218218
effective_use_custom_sdpa = use_custom_sdpa
219+
effective_use_custom_kv_cache = use_custom_kv_cache
219220
if model_id == _GEMMA4_MODEL_ID and use_custom_sdpa:
220221
logger.info(
221222
"Disabling custom SDPA for Gemma 4 while keeping the custom cache path"
222223
)
223224
effective_use_custom_sdpa = False
225+
if model_id == _GEMMA4_MODEL_ID and use_custom_kv_cache:
226+
logger.info("Disabling custom KV cache for Gemma 4")
227+
effective_use_custom_kv_cache = False
224228

225229
if effective_use_custom_sdpa:
226230
from executorch.backends.mlx.llm.hf_attention import register_mlx_attention
@@ -288,7 +292,7 @@ def _export_with_custom_components(
288292
max_cache_len=effective_cache_len,
289293
)
290294

291-
if use_custom_kv_cache:
295+
if effective_use_custom_kv_cache:
292296
from executorch.backends.mlx.llm.source_transformation import (
293297
replace_hf_cache_with_mlx,
294298
)

0 commit comments

Comments
 (0)