Trim v0.18 fixes from NLLB feature branch

dschulmeist · dschulmeist · commit 4f0c1521ed52 · 2026-04-16T11:23:29.000+02:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "vllm-bart-plugin"
-version = "0.3.4"
+version = "0.3.3"
 description = "BART, Florence-2, and NLLB/M2M-100 (translation) model plugin for vLLM"
 readme = "README.md"
 requires-python = ">=3.10"
diff --git a/vllm_bart_plugin/bart.py b/vllm_bart_plugin/bart.py
@@ -996,10 +996,7 @@ def _parse_text_data(
         if data is None:
             return TextProcessorItems(None)
 
-        # _is_empty was removed in vLLM >=0.18; handle emptiness inline
-        if isinstance(data, str) and not data:
-            return None
-        if isinstance(data, list) and len(data) == 0:
+        if self._is_empty(data):
             return None
 
         # Text data should be a string or list of strings
@@ -1033,11 +1030,15 @@ def create_encoder_prompt(
         prompt: str | list[int],
         mm_data: MultiModalDataDict,
     ) -> str | list[int]:
-        # In vLLM >=0.18, `prompt` here is the DECODER prompt text, not the
-        # encoder text.  The encoder content lives in mm_data ("text" key).
-        # Always return [0] as a single placeholder token; _get_prompt_updates
-        # will replace it with the correct number of encoder token slots.
-        return [0]
+        if not prompt:
+            return [0]
+        tokenizer = self.info.get_tokenizer()
+        tokens = tokenizer(
+            prompt,
+            add_special_tokens=False,
+            return_tensors="pt",
+        )["input_ids"].flatten()
+        return tokens.tolist()
 
     def create_decoder_prompt(
         self,
@@ -1078,21 +1079,14 @@ def _call_hf_processor(
             )
             result["encoder_input_ids"] = encoder_tokenized["input_ids"]
 
-        # Always produce input_ids for the decoder prompt.
-        # In vLLM >=0.18 the rendering pipeline may call _call_hf_processor
-        # with an already-tokenized prompt (a list of ints) instead of a str.
-        # Handle both cases.
-        import torch as _torch
-        if isinstance(prompt, (list, tuple)) and len(prompt) > 0 and isinstance(prompt[0], int):
-            # Already token IDs — wrap without re-tokenizing
-            result["input_ids"] = _torch.tensor([prompt])
-        else:
-            prompt_tokenized = tokenizer(
-                prompt if prompt else "",
-                return_tensors="pt",
-                **tok_kwargs,
-            )
-            result["input_ids"] = prompt_tokenized["input_ids"]
+        # Always tokenize the prompt (for decoder or as dummy)
+        # This will be popped by the base class
+        prompt_tokenized = tokenizer(
+            prompt if prompt else "",
+            return_tensors="pt",
+            **tok_kwargs,
+        )
+        result["input_ids"] = prompt_tokenized["input_ids"]
 
         return BatchFeature(result)