Skip to content

Commit 800c7ee

Browse files
committed
graceful fallback to eager
Signed-off-by: ixlmar <206748156+ixlmar@users.noreply.github.com>
1 parent 4be3013 commit 800c7ee

1 file changed

Lines changed: 8 additions & 0 deletions

File tree

tensorrt_llm/_torch/pyexecutor/cuda_graph_runner.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -818,6 +818,14 @@ def maybe_get_cuda_graph(
818818
if not self._capture_allowed:
819819
return None, None
820820

821+
if "multi_item_part_lens" in inputs:
822+
# See model_engine.py for more details
823+
logger.warning_once(
824+
"Encoder CUDA graph does not support multi-item scoring; "
825+
"falling back to eager.",
826+
key="encoder_cuda_graph_multi_item_scoring_warning")
827+
return None, None
828+
821829
if attn_metadata.has_cross_sub_metadata:
822830
logger.warning_once(
823831
"Encoder CUDA graph does not support cross-attention metadata; "

0 commit comments

Comments
 (0)