@@ -996,10 +996,7 @@ def _parse_text_data(
996996 if data is None :
997997 return TextProcessorItems (None )
998998
999- # _is_empty was removed in vLLM >=0.18; handle emptiness inline
1000- if isinstance (data , str ) and not data :
1001- return None
1002- if isinstance (data , list ) and len (data ) == 0 :
999+ if self ._is_empty (data ):
10031000 return None
10041001
10051002 # Text data should be a string or list of strings
@@ -1033,11 +1030,15 @@ def create_encoder_prompt(
10331030 prompt : str | list [int ],
10341031 mm_data : MultiModalDataDict ,
10351032 ) -> str | list [int ]:
1036- # In vLLM >=0.18, `prompt` here is the DECODER prompt text, not the
1037- # encoder text. The encoder content lives in mm_data ("text" key).
1038- # Always return [0] as a single placeholder token; _get_prompt_updates
1039- # will replace it with the correct number of encoder token slots.
1040- return [0 ]
1033+ if not prompt :
1034+ return [0 ]
1035+ tokenizer = self .info .get_tokenizer ()
1036+ tokens = tokenizer (
1037+ prompt ,
1038+ add_special_tokens = False ,
1039+ return_tensors = "pt" ,
1040+ )["input_ids" ].flatten ()
1041+ return tokens .tolist ()
10411042
10421043 def create_decoder_prompt (
10431044 self ,
@@ -1078,21 +1079,14 @@ def _call_hf_processor(
10781079 )
10791080 result ["encoder_input_ids" ] = encoder_tokenized ["input_ids" ]
10801081
1081- # Always produce input_ids for the decoder prompt.
1082- # In vLLM >=0.18 the rendering pipeline may call _call_hf_processor
1083- # with an already-tokenized prompt (a list of ints) instead of a str.
1084- # Handle both cases.
1085- import torch as _torch
1086- if isinstance (prompt , (list , tuple )) and len (prompt ) > 0 and isinstance (prompt [0 ], int ):
1087- # Already token IDs — wrap without re-tokenizing
1088- result ["input_ids" ] = _torch .tensor ([prompt ])
1089- else :
1090- prompt_tokenized = tokenizer (
1091- prompt if prompt else "" ,
1092- return_tensors = "pt" ,
1093- ** tok_kwargs ,
1094- )
1095- result ["input_ids" ] = prompt_tokenized ["input_ids" ]
1082+ # Always tokenize the prompt (for decoder or as dummy)
1083+ # This will be popped by the base class
1084+ prompt_tokenized = tokenizer (
1085+ prompt if prompt else "" ,
1086+ return_tensors = "pt" ,
1087+ ** tok_kwargs ,
1088+ )
1089+ result ["input_ids" ] = prompt_tokenized ["input_ids" ]
10961090
10971091 return BatchFeature (result )
10981092
0 commit comments