Skip to content

Commit 4bc4975

Browse files
committed
vllm 0.16.0 support
Signed-off-by: Carles Onielfa <carlesonielfa@gmail.com>
1 parent 22f41e8 commit 4bc4975

5 files changed

Lines changed: 26 additions & 14 deletions

File tree

README.md

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,13 @@ This plugin requires [uv](https://docs.astral.sh/uv/) for package management. If
1515
```bash
1616
curl -LsSf https://astral.sh/uv/install.sh | sh
1717
```
18+
### From Git
19+
20+
Install using git as a package index:
21+
22+
```bash
23+
pip install git+https://github.com/vllm-project/bart-plugin
24+
```
1825

1926
### From Source
2027

@@ -186,11 +193,14 @@ Notes:
186193
```
187194
bart-plugin/
188195
├── vllm_bart_plugin/
189-
│ ├── __init__.py # Plugin registration
190-
│ └── bart.py # BART model implementation
191-
├── setup.py # Package configuration and entry points
192-
├── README.md # This file
193-
└── LICENSE # License file
196+
│ ├── __init__.py # Plugin registration
197+
│ └── bart.py # BART model implementation
198+
│ └── florence2.py # Florence-2 model implementation
199+
├── setup.py # Package configuration and entry points
200+
├── README.md # This file
201+
└── LICENSE # License file
202+
└── example_bart_usage.py # Example usage script for BART
203+
└── example_florence2_usage.py # Example usage script for Florence-2
194204
```
195205

196206
### Running Tests

example_florence2_usage.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
def main():
1414
"""Run Florence-2 model examples."""
15-
model_name = "microsoft/Florence-2-large"
15+
model_name = "microsoft/Florence-2-large-ft"
1616
tokenizer_name = "Isotr0py/Florence-2-tokenizer"
1717

1818
llm = LLM(
@@ -60,4 +60,4 @@ def main():
6060

6161

6262
if __name__ == "__main__":
63-
main()
63+
main()

pyproject.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "vllm-bart-plugin"
7-
version = "0.2.0"
7+
version = "0.3.0"
88
description = "BART model plugin for vLLM"
99
readme = "README.md"
1010
requires-python = ">=3.10"
@@ -26,9 +26,9 @@ classifiers = [
2626
]
2727

2828
dependencies = [
29-
"vllm>=0.14.0",
29+
"vllm>=0.16.0",
3030
"torch>=2.9.0",
31-
"transformers>=4.56.0,<5",
31+
"transformers>=4.56.0",
3232
]
3333

3434
[project.optional-dependencies]

vllm_bart_plugin/bart.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@
2929
from torch import nn
3030
from transformers import BartConfig
3131
from transformers.utils import logging
32-
from vllm.attention.layer import Attention, AttentionType
32+
from vllm.model_executor.layers.attention import Attention
33+
from vllm.v1.attention.backend import AttentionType
3334
from vllm.config import CacheConfig, VllmConfig
3435
from vllm.config.lora import LoRAConfig
3536
from vllm.config.multimodal import BaseDummyOptions
@@ -78,7 +79,7 @@
7879
EncDecMultiModalProcessor,
7980
PromptUpdate,
8081
)
81-
from vllm.multimodal.profiling import BaseDummyInputsBuilder
82+
from vllm.multimodal.processing.dummy_inputs import BaseDummyInputsBuilder
8283
from vllm.sequence import IntermediateTensors
8384
from vllm.utils.collection_utils import is_list_of
8485

vllm_bart_plugin/florence2.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@
1313
from transformers import BartConfig, BatchFeature, BartTokenizer, PretrainedConfig
1414
from transformers.utils import logging
1515

16-
from vllm.attention.layer import Attention, AttentionType
16+
from vllm.model_executor.layers.attention import Attention
17+
from vllm.v1.attention.backend import AttentionType
1718
from vllm.model_executor.layers.attention.cross_attention import CrossAttention
1819
from vllm.model_executor.layers.attention.mm_encoder_attention import MMEncoderAttention
1920
from vllm.config import CacheConfig, VllmConfig
@@ -53,7 +54,7 @@
5354
PromptInsertion,
5455
PromptIndexTargets,
5556
)
56-
from vllm.multimodal.profiling import BaseDummyInputsBuilder
57+
from vllm.multimodal.processing.dummy_inputs import BaseDummyInputsBuilder
5758
from vllm.sequence import IntermediateTensors
5859
from vllm.utils.collection_utils import is_list_of
5960

0 commit comments

Comments
 (0)