From 99f89d4ebe05f0914871821feba2a0de0fc167b8 Mon Sep 17 00:00:00 2001
From: Carles Onielfa <carlesonielfa@gmail.com>
Date: Fri, 13 Mar 2026 10:33:29 +0100
Subject: [PATCH] Prevent `add_special_tokens` overwrite for BART

Signed-off-by: Carles Onielfa <carlesonielfa@gmail.com>
---
 vllm_bart_plugin/bart.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/vllm_bart_plugin/bart.py b/vllm_bart_plugin/bart.py
index 08dd93d..90d30d1 100644
--- a/vllm_bart_plugin/bart.py
+++ b/vllm_bart_plugin/bart.py
@@ -1041,6 +1041,9 @@ def _call_hf_processor(
         has_encoder_data = mm_data is not None and "texts" in mm_data
         result = {}
 
+        # vLLM may pass add_special_tokens in tok_kwargs; we set it ourselves
+        tok_kwargs = {k: v for k, v in tok_kwargs.items() if k != "add_special_tokens"}
+        
         if has_encoder_data:
             # Tokenize the encoder text from mm_data
             encoder_texts = mm_data["texts"]
@@ -1379,4 +1382,4 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
         for key in self.keys_to_ignore_on_load_missing:
             loaded_params.add(key)
 
-        return loaded_params
+        return loaded_params
\ No newline at end of file