Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions example_t5gemma2_usage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/usr/bin/env python3
"""
Example usage of the vLLM BART plugin with T5Gemma2.

This script demonstrates how to use T5Gemma2 models with vLLM
after installing the BART plugin and the custom transformers fork.
"""
import vllm_bart_plugin
from vllm import LLM, SamplingParams
from vllm.assets.image import ImageAsset


def main():
"""Run T5Gemma2 model examples."""
model_name = "google/t5gemma-2-270m-270m"

print(f"Loading {model_name}...")
llm = LLM(
model=model_name,
trust_remote_code=True,
enforce_eager=True,
max_model_len=1024,
)

params = SamplingParams(
temperature=0.0,
max_tokens=64,
)

outputs = llm.generate(
[
{ # Simple text-to-text inference
"prompt": "Translate English to French: The president of the United States is",
},
{ # Explicit encoder/decoder prompt
"encoder_prompt": {
"prompt": "",
"multi_modal_data": {
"text": "Summarize: Machine learning is a field of study in artificial intelligence.",
},
},
"decoder_prompt": "Machine",
},
{ # Multimodal inference example (if the model supports vision tasks via its SigLIP encoder)
"prompt": "Describe this image in detail.",
"multi_modal_data": {"image": ImageAsset("stop_sign").pil_image},
},
],
sampling_params=params,
)

for i, o in enumerate(outputs):
generated_text = o.outputs[0].text
print(f"\n--- Output {i+1} ---")
print(generated_text)


if __name__ == "__main__":
main()
4 changes: 4 additions & 0 deletions vllm_bart_plugin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ def register_bart_model() -> None:
"Florence2ForConditionalGeneration",
"vllm_bart_plugin.florence2:Florence2ForConditionalGeneration",
)
ModelRegistry.register_model(
"T5Gemma2ForConditionalGeneration",
"vllm_bart_plugin.t5gemma2:T5Gemma2ForConditionalGeneration",
)

logger.info("Successfully registered BART model with vLLM")

Expand Down
2 changes: 1 addition & 1 deletion vllm_bart_plugin/bart.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
from vllm.model_executor.layers.attention import Attention
from vllm.model_executor.layers.attention.cross_attention import CrossAttention
from vllm.model_executor.layers.attention.mm_encoder_attention import MMEncoderAttention
from vllm.multimodal.processing.dummy_inputs import BaseDummyInputsBuilder
from vllm.multimodal.processing import BaseDummyInputsBuilder
except ImportError:
# These were moved after vLLM 0.13; try the legacy path
from vllm.attention.backends.abstract import AttentionType
Expand Down
5 changes: 3 additions & 2 deletions vllm_bart_plugin/florence2.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
from transformers import BartConfig, BatchFeature, BartTokenizer, PretrainedConfig
from transformers.utils import logging

from vllm.attention.layer import Attention, AttentionType
from vllm.model_executor.layers.attention import Attention
from vllm.v1.attention.backend import AttentionType
try:
from vllm.model_executor.layers.attention.cross_attention import CrossAttention
from vllm.model_executor.layers.attention.mm_encoder_attention import MMEncoderAttention
Expand Down Expand Up @@ -58,7 +59,7 @@
PromptInsertion,
PromptIndexTargets,
)
from vllm.multimodal.profiling import BaseDummyInputsBuilder
from vllm.multimodal.processing import BaseDummyInputsBuilder
from vllm.sequence import IntermediateTensors
from vllm.utils.collection_utils import is_list_of

Expand Down
Loading