@@ -996,7 +996,10 @@ def _parse_text_data(
996996 if data is None :
997997 return TextProcessorItems (None )
998998
999- if self ._is_empty (data ):
999+ # _is_empty was removed in vLLM >=0.18; handle emptiness inline
1000+ if isinstance (data , str ) and not data :
1001+ return None
1002+ if isinstance (data , list ) and len (data ) == 0 :
10001003 return None
10011004
10021005 # Text data should be a string or list of strings
@@ -1030,15 +1033,11 @@ def create_encoder_prompt(
10301033 prompt : str | list [int ],
10311034 mm_data : MultiModalDataDict ,
10321035 ) -> str | list [int ]:
1033- if not prompt :
1034- return [0 ]
1035- tokenizer = self .info .get_tokenizer ()
1036- tokens = tokenizer (
1037- prompt ,
1038- add_special_tokens = False ,
1039- return_tensors = "pt" ,
1040- )["input_ids" ].flatten ()
1041- return tokens .tolist ()
1036+ # In vLLM >=0.18, `prompt` here is the DECODER prompt text, not the
1037+ # encoder text. The encoder content lives in mm_data ("text" key).
1038+ # Always return [0] as a single placeholder token; _get_prompt_updates
1039+ # will replace it with the correct number of encoder token slots.
1040+ return [0 ]
10421041
10431042 def create_decoder_prompt (
10441043 self ,
@@ -1079,14 +1078,20 @@ def _call_hf_processor(
10791078 )
10801079 result ["encoder_input_ids" ] = encoder_tokenized ["input_ids" ]
10811080
1082- # Always tokenize the prompt (for decoder or as dummy)
1083- # This will be popped by the base class
1084- prompt_tokenized = tokenizer (
1085- prompt if prompt else "" ,
1086- return_tensors = "pt" ,
1087- ** tok_kwargs ,
1088- )
1089- result ["input_ids" ] = prompt_tokenized ["input_ids" ]
1081+ # Always produce input_ids for the decoder prompt.
1082+ # In vLLM >=0.18 the rendering pipeline may call _call_hf_processor
1083+ # with an already-tokenized prompt (a list of ints) instead of a str.
1084+ # Handle both cases.
1085+ import torch as _torch
1086+ if isinstance (prompt , (list , tuple )) and len (prompt ) > 0 and isinstance (prompt [0 ], int ):
1087+ result ["input_ids" ] = _torch .tensor ([prompt ])
1088+ else :
1089+ prompt_tokenized = tokenizer (
1090+ prompt if prompt else "" ,
1091+ return_tensors = "pt" ,
1092+ ** tok_kwargs ,
1093+ )
1094+ result ["input_ids" ] = prompt_tokenized ["input_ids" ]
10901095
10911096 return BatchFeature (result )
10921097
0 commit comments