Remove BART add_special_tokens fix

carlesonielfa · carlesonielfa · commit 0a00dbbf16ef · 2026-03-13T10:31:29.000+01:00
Signed-off-by: Carles Onielfa &lt;carlesonielfa@gmail.com&gt;
diff --git a/vllm_bart_plugin/bart.py b/vllm_bart_plugin/bart.py
@@ -1045,9 +1045,6 @@ def _call_hf_processor(
         has_encoder_data = mm_data is not None and "texts" in mm_data
         result = {}
 
-        # vLLM may pass add_special_tokens in tok_kwargs; we set it ourselves
-        tok_kwargs = {k: v for k, v in tok_kwargs.items() if k != "add_special_tokens"}
-
         if has_encoder_data:
             # Tokenize the encoder text from mm_data
             encoder_texts = mm_data["texts"]
@@ -1159,8 +1156,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
             config.vocab_size, config.d_model, embed_scale=embed_scale
         )
         # Bias added to logits after lm_head, matching HuggingFace approach
-        self.register_buffer("final_logits_bias",
-                             torch.zeros((1, config.vocab_size)))
+        self.register_buffer("final_logits_bias", torch.zeros((1, config.vocab_size)))
         self.logits_processor = LogitsProcessor(
             self.unpadded_vocab_size, config.vocab_size
         )