@@ -945,6 +945,21 @@ def get_data_parser(self) -> MultiModalDataParser:
945945 return TextDataParser ()
946946
947947
948+ # vLLM >=0.18 moved tokenization defaults from a global enc-dec override
949+ # (InputPreprocessor._get_tokenization_kw) into per-model ProcessingInfo.
950+ # The old code forced add_special_tokens=False for every is_encoder_decoder
951+ # model; replicate that here so the renderer does not inject extra BOS/EOS
952+ # into the decoder prompt. On vLLM <0.18 the method does not exist on the
953+ # base class and is not needed (the global override handles it).
954+ if hasattr (BaseProcessingInfo , "get_default_tok_params" ):
955+
956+ def _bart_get_default_tok_params (self ):
957+ return super (BartProcessingInfo , self ).get_default_tok_params () \
958+ .with_kwargs (add_special_tokens = False )
959+
960+ BartProcessingInfo .get_default_tok_params = _bart_get_default_tok_params # type: ignore[attr-defined]
961+
962+
948963class BartDummyInputsBuilder (BaseDummyInputsBuilder [BartProcessingInfo ]):
949964 """Builds dummy inputs for profiling BART models."""
950965
@@ -993,14 +1008,9 @@ def _parse_text_data(
9931008 data : ModalityData [str ],
9941009 ) -> ModalityDataItems [Any , Any ] | None :
9951010 """Parse text data for BART."""
996- if data is None :
997- return TextProcessorItems (None )
998-
9991011 # _is_empty was removed in vLLM >=0.18; handle emptiness inline
1000- if isinstance (data , str ) and not data :
1001- return None
1002- if isinstance (data , list ) and len (data ) == 0 :
1003- return None
1012+ if data is None or not len (data ):
1013+ return TextProcessorItems (None )
10041014
10051015 # Text data should be a string or list of strings
10061016 if isinstance (data , str ) or is_list_of (data , str ):
@@ -1033,10 +1043,22 @@ def create_encoder_prompt(
10331043 prompt : str | list [int ],
10341044 mm_data : MultiModalDataDict ,
10351045 ) -> str | list [int ]:
1036- # In vLLM >=0.18, `prompt` here is the DECODER prompt text, not the
1037- # encoder text. The encoder content lives in mm_data ("text" key).
1038- # Always return [0] as a single placeholder token; _get_prompt_updates
1039- # will replace it with the correct number of encoder token slots.
1046+ # vLLM compatibility:
1047+ # - Legacy (<0.18): prompt is encoder text (str) — tokenize directly.
1048+ # - Modern (>=0.18): prompt is decoder token IDs or empty str from
1049+ # profiling — return a single [0] placeholder that _get_prompt_updates
1050+ # will expand to the real encoder token count. The placeholder IDs
1051+ # are structural (KV-cache sizing); the actual encoder computation
1052+ # uses encoder_input_ids from mm_kwargs.
1053+ if isinstance (prompt , str ) and prompt :
1054+ tokenizer = self .info .get_tokenizer ()
1055+ tokens = tokenizer (
1056+ prompt ,
1057+ add_special_tokens = False ,
1058+ return_tensors = "pt" ,
1059+ )["input_ids" ].flatten ()
1060+ return tokens .tolist ()
1061+
10401062 return [0 ]
10411063
10421064 def create_decoder_prompt (
@@ -1055,10 +1077,16 @@ def _call_hf_processor(
10551077 tok_kwargs : Mapping [str , object ],
10561078 ):
10571079 """
1058- BART doesn't have a HuggingFace Processor - it only has a tokenizer.
1059- We tokenize both the prompt (decoder) and encoder text from mm_data.
1080+ BART doesn't have a HuggingFace Processor — it only has a tokenizer.
1081+
1082+ Produces two sets of token IDs:
1083+ - ``encoder_input_ids``: tokenized encoder text from ``mm_data["texts"]``
1084+ - ``input_ids``: tokenized decoder prompt (used by the base class to
1085+ build ``prompt_token_ids``)
1086+
1087+ Encoder text is always tokenized with ``add_special_tokens=False`` to
1088+ match v0.16 behaviour and stay consistent with ``_get_prompt_updates``.
10601089 """
1061- # tok_kwargs["add_special_tokens"] = False
10621090 from transformers .feature_extraction_utils import BatchFeature
10631091
10641092 tokenizer = self .info .get_tokenizer ()
@@ -1068,13 +1096,13 @@ def _call_hf_processor(
10681096 result = {}
10691097
10701098 if has_encoder_data :
1071- # Tokenize the encoder text from mm_data
10721099 encoder_texts = mm_data ["texts" ]
10731100 encoder_text = encoder_texts [0 ] if encoder_texts else ""
1101+ # Tokenize the encoder text from mm_data
10741102 encoder_tokenized = tokenizer (
10751103 encoder_text ,
10761104 return_tensors = "pt" ,
1077- ** tok_kwargs ,
1105+ add_special_tokens = False ,
10781106 )
10791107 result ["encoder_input_ids" ] = encoder_tokenized ["input_ids" ]
10801108
@@ -1109,6 +1137,13 @@ def _get_prompt_updates(
11091137 hf_processor_mm_kwargs : Mapping [str , object ],
11101138 out_mm_kwargs : MultiModalKwargsItems ,
11111139 ) -> Sequence [PromptUpdate ]:
1140+ """Replace the single [0] encoder placeholder with N placeholder
1141+ tokens, where N equals the tokenized length of the encoder text.
1142+
1143+ The token count must use ``add_special_tokens=False`` to stay
1144+ consistent with ``_call_hf_processor`` (which tokenizes the encoder
1145+ text the same way).
1146+ """
11121147 from vllm .multimodal .processing import PromptReplacement
11131148
11141149 # Get the number of text items to determine token count
0 commit comments