[None][fix] reuse Qwen VL disagg prompt expansion for embeddings

yechank-nvidia · 2ez4bz · commit 9572dc65beb5 · 2026-06-12T10:26:57.000-07:00
Signed-off-by: yechank &lt;161688079+yechank-nvidia@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/_torch/models/modeling_qwen2vl.py b/tensorrt_llm/_torch/models/modeling_qwen2vl.py
@@ -517,16 +517,18 @@ def _attach_multimodal_embeddings_impl(
                     f"Image embedding {index} must be rank 2, got shape {tuple(image_embedding.shape)}"
                 )
 
-        get_prompt_token_ids = getattr(self, "get_prompt_token_ids", None)
-        if not callable(get_prompt_token_ids):
+        build_disagg_prefill_multimodal_inputs = getattr(
+            self, "build_disagg_prefill_multimodal_inputs", None)
+        if not callable(build_disagg_prefill_multimodal_inputs):
             raise NotImplementedError(
                 f"{type(self).__name__} does not support external multimodal embeddings"
             )
 
         mm_handles = [{
             "tensor_size": tuple(image_embedding.shape)
         } for image_embedding in image_embeddings]
-        prompt_token_ids, _, _ = get_prompt_token_ids(inputs, mm_handles)
+        prompt_token_ids = build_disagg_prefill_multimodal_inputs(
+            inputs, mm_handles).prompt_token_ids
 
         mrope_input_ids = torch.tensor(prompt_token_ids,
                                        dtype=torch.long).unsqueeze(0)
diff --git a/tensorrt_llm/_torch/models/modeling_qwen3vl.py b/tensorrt_llm/_torch/models/modeling_qwen3vl.py
@@ -169,6 +169,7 @@ class Qwen3VLInputProcessorBase(Qwen2VLInputProcessorBase):
     separate timestamp tokens, so each frame is its own (1, h, w) block rather
     than a ``tokens_per_second``-scaled stretch.
     """
+
     def __init__(
         self,
         model_path: str,