We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 4be3013 commit 800c7eeCopy full SHA for 800c7ee
1 file changed
tensorrt_llm/_torch/pyexecutor/cuda_graph_runner.py
@@ -818,6 +818,14 @@ def maybe_get_cuda_graph(
818
if not self._capture_allowed:
819
return None, None
820
821
+ if "multi_item_part_lens" in inputs:
822
+ # See model_engine.py for more details
823
+ logger.warning_once(
824
+ "Encoder CUDA graph does not support multi-item scoring; "
825
+ "falling back to eager.",
826
+ key="encoder_cuda_graph_multi_item_scoring_warning")
827
+ return None, None
828
+
829
if attn_metadata.has_cross_sub_metadata:
830
logger.warning_once(
831
"Encoder CUDA graph does not support cross-attention metadata; "
0 commit comments