Skip to content

Commit 4f0c152

Browse files
committed
Trim v0.18 fixes from NLLB feature branch
1 parent 11df39e commit 4f0c152

2 files changed

Lines changed: 19 additions & 25 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "vllm-bart-plugin"
7-
version = "0.3.4"
7+
version = "0.3.3"
88
description = "BART, Florence-2, and NLLB/M2M-100 (translation) model plugin for vLLM"
99
readme = "README.md"
1010
requires-python = ">=3.10"

vllm_bart_plugin/bart.py

Lines changed: 18 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -996,10 +996,7 @@ def _parse_text_data(
996996
if data is None:
997997
return TextProcessorItems(None)
998998

999-
# _is_empty was removed in vLLM >=0.18; handle emptiness inline
1000-
if isinstance(data, str) and not data:
1001-
return None
1002-
if isinstance(data, list) and len(data) == 0:
999+
if self._is_empty(data):
10031000
return None
10041001

10051002
# Text data should be a string or list of strings
@@ -1033,11 +1030,15 @@ def create_encoder_prompt(
10331030
prompt: str | list[int],
10341031
mm_data: MultiModalDataDict,
10351032
) -> str | list[int]:
1036-
# In vLLM >=0.18, `prompt` here is the DECODER prompt text, not the
1037-
# encoder text. The encoder content lives in mm_data ("text" key).
1038-
# Always return [0] as a single placeholder token; _get_prompt_updates
1039-
# will replace it with the correct number of encoder token slots.
1040-
return [0]
1033+
if not prompt:
1034+
return [0]
1035+
tokenizer = self.info.get_tokenizer()
1036+
tokens = tokenizer(
1037+
prompt,
1038+
add_special_tokens=False,
1039+
return_tensors="pt",
1040+
)["input_ids"].flatten()
1041+
return tokens.tolist()
10411042

10421043
def create_decoder_prompt(
10431044
self,
@@ -1078,21 +1079,14 @@ def _call_hf_processor(
10781079
)
10791080
result["encoder_input_ids"] = encoder_tokenized["input_ids"]
10801081

1081-
# Always produce input_ids for the decoder prompt.
1082-
# In vLLM >=0.18 the rendering pipeline may call _call_hf_processor
1083-
# with an already-tokenized prompt (a list of ints) instead of a str.
1084-
# Handle both cases.
1085-
import torch as _torch
1086-
if isinstance(prompt, (list, tuple)) and len(prompt) > 0 and isinstance(prompt[0], int):
1087-
# Already token IDs — wrap without re-tokenizing
1088-
result["input_ids"] = _torch.tensor([prompt])
1089-
else:
1090-
prompt_tokenized = tokenizer(
1091-
prompt if prompt else "",
1092-
return_tensors="pt",
1093-
**tok_kwargs,
1094-
)
1095-
result["input_ids"] = prompt_tokenized["input_ids"]
1082+
# Always tokenize the prompt (for decoder or as dummy)
1083+
# This will be popped by the base class
1084+
prompt_tokenized = tokenizer(
1085+
prompt if prompt else "",
1086+
return_tensors="pt",
1087+
**tok_kwargs,
1088+
)
1089+
result["input_ids"] = prompt_tokenized["input_ids"]
10961090

10971091
return BatchFeature(result)
10981092

0 commit comments

Comments
 (0)