Skip to content

Commit 9572dc6

Browse files
yechank-nvidia2ez4bz
authored andcommitted
[None][fix] reuse Qwen VL disagg prompt expansion for embeddings
Signed-off-by: yechank <161688079+yechank-nvidia@users.noreply.github.com>
1 parent 2a9e003 commit 9572dc6

2 files changed

Lines changed: 6 additions & 3 deletions

File tree

tensorrt_llm/_torch/models/modeling_qwen2vl.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -517,16 +517,18 @@ def _attach_multimodal_embeddings_impl(
517517
f"Image embedding {index} must be rank 2, got shape {tuple(image_embedding.shape)}"
518518
)
519519

520-
get_prompt_token_ids = getattr(self, "get_prompt_token_ids", None)
521-
if not callable(get_prompt_token_ids):
520+
build_disagg_prefill_multimodal_inputs = getattr(
521+
self, "build_disagg_prefill_multimodal_inputs", None)
522+
if not callable(build_disagg_prefill_multimodal_inputs):
522523
raise NotImplementedError(
523524
f"{type(self).__name__} does not support external multimodal embeddings"
524525
)
525526

526527
mm_handles = [{
527528
"tensor_size": tuple(image_embedding.shape)
528529
} for image_embedding in image_embeddings]
529-
prompt_token_ids, _, _ = get_prompt_token_ids(inputs, mm_handles)
530+
prompt_token_ids = build_disagg_prefill_multimodal_inputs(
531+
inputs, mm_handles).prompt_token_ids
530532

531533
mrope_input_ids = torch.tensor(prompt_token_ids,
532534
dtype=torch.long).unsqueeze(0)

tensorrt_llm/_torch/models/modeling_qwen3vl.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ class Qwen3VLInputProcessorBase(Qwen2VLInputProcessorBase):
169169
separate timestamp tokens, so each frame is its own (1, h, w) block rather
170170
than a ``tokens_per_second``-scaled stretch.
171171
"""
172+
172173
def __init__(
173174
self,
174175
model_path: str,

0 commit comments

Comments
 (0)