File tree Expand file tree Collapse file tree
tensorrt_llm/_torch/models Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -827,12 +827,13 @@ def forward(
827827
828828 # Vision RoPE backend (FlashInfer path) gates on `position_ids is
829829 # not None`; supply trivial 0..seq_len-1 positions on device so
830- # the gate clears when `head_dim % 64 == 0`. For Qwen3-VL
831- # (head_dim=72) the gate misses and we fall through to the
832- # PyTorch path, which broadcasts cos/sin over the chunked q/k.
833- # Slicing the pre-allocated `_rope_position_ids_buffer` is a
834- # view -- no per-iter alloc and no host->device copy.
830+ # the gate clears when `head_dim % 64 == 0`. Keep the pre-allocated
831+ # buffer large enough for packed multi-video batches.
835832 seq_len = hidden_states .shape [0 ]
833+ if seq_len > self ._rope_position_ids_buffer .numel ():
834+ self ._rope_position_ids_buffer = torch .arange (
835+ seq_len , dtype = torch .int32 , device = self .device
836+ )
836837 rope_position_ids = self ._rope_position_ids_buffer [:seq_len ]
837838 position_embeddings = (cos , sin )
838839
You can’t perform that action at this time.
0 commit comments