Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions tensorrt_llm/_torch/pyexecutor/py_executor_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -673,10 +673,10 @@ def drafting_loop_wrapper(model):

sm_version = get_sm_version()
if kv_cache_config.enable_block_reuse and sm_version not in [
90, 100, 103, 120
90, 100, 103, 120, 121
]:
logger.warning(
f"KV cache reuse for MLA can only be enabled on SM90/SM100/SM103/SM120, "
f"KV cache reuse for MLA can only be enabled on SM90/SM100/SM103/SM120/SM121, "
f"disable enable_block_reuse for SM{sm_version}")
kv_cache_config.enable_block_reuse = False
_set_model_engines_cache_reuse([model_engine, draft_model_engine],
Expand All @@ -693,9 +693,9 @@ def drafting_loop_wrapper(model):
kv_cache_config.enable_block_reuse = False
_set_model_engines_cache_reuse([model_engine, draft_model_engine],
False)
if enable_chunked_context and sm_version not in [90, 100, 103, 120]:
if enable_chunked_context and sm_version not in [90, 100, 103, 120, 121]:
logger.warning(
"Chunked Prefill for MLA can only be enabled on SM90/SM100/SM103/SM120, "
"Chunked Prefill for MLA can only be enabled on SM90/SM100/SM103/SM120/SM121, "
f"disable enable_chunked_context for SM{sm_version}")
enable_chunked_context = False
model_engine.attn_runtime_features.chunked_prefill = False
Expand Down