vllm-project · akh64bit · Mar 5, 2026 · Mar 5, 2026 · Mar 5, 2026 · Mar 5, 2026
diff --git a/example_t5gemma2_usage.py b/example_t5gemma2_usage.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python3
+"""
+Example usage of the vLLM BART plugin with T5Gemma2.
+
+This script demonstrates how to use T5Gemma2 models with vLLM
+after installing the BART plugin and the custom transformers fork.
+"""
+import vllm_bart_plugin
+from vllm import LLM, SamplingParams
+from vllm.assets.image import ImageAsset
+
+
+def main():
+    """Run T5Gemma2 model examples."""
+    model_name = "google/t5gemma-2-270m-270m"
+
+    print(f"Loading {model_name}...")
+    llm = LLM(
+        model=model_name,
+        trust_remote_code=True,
+        enforce_eager=True,
+        max_model_len=1024,
+    )
+
+    params = SamplingParams(
+        temperature=0.0,
+        max_tokens=64,
+    )
+
+    outputs = llm.generate(
+        [
+            {  # Simple text-to-text inference
+                "prompt": "Translate English to French: The president of the United States is",
+            },
+            {  # Explicit encoder/decoder prompt
+                "encoder_prompt": {
+                    "prompt": "",
+                    "multi_modal_data": {
+                        "text": "Summarize: Machine learning is a field of study in artificial intelligence.",
+                    },
+                },
+                "decoder_prompt": "Machine",
+            },
+            {  # Multimodal inference example (if the model supports vision tasks via its SigLIP encoder)
+                "prompt": "Describe this image in detail.",
+                "multi_modal_data": {"image": ImageAsset("stop_sign").pil_image},
+            },
+        ],
+        sampling_params=params,
+    )
+
+    for i, o in enumerate(outputs):
+        generated_text = o.outputs[0].text
+        print(f"\n--- Output {i+1} ---")
+        print(generated_text)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/vllm_bart_plugin/__init__.py b/vllm_bart_plugin/__init__.py
@@ -33,6 +33,10 @@ def register_bart_model() -> None:
             "Florence2ForConditionalGeneration",
             "vllm_bart_plugin.florence2:Florence2ForConditionalGeneration",
         )
+        ModelRegistry.register_model(
+            "T5Gemma2ForConditionalGeneration",
+            "vllm_bart_plugin.t5gemma2:T5Gemma2ForConditionalGeneration",
+        )
 
         logger.info("Successfully registered BART model with vLLM")
 

diff --git a/vllm_bart_plugin/bart.py b/vllm_bart_plugin/bart.py
@@ -42,7 +42,7 @@
     from vllm.model_executor.layers.attention import Attention
     from vllm.model_executor.layers.attention.cross_attention import CrossAttention
     from vllm.model_executor.layers.attention.mm_encoder_attention import MMEncoderAttention
-    from vllm.multimodal.processing.dummy_inputs import BaseDummyInputsBuilder
+    from vllm.multimodal.processing import BaseDummyInputsBuilder
 except ImportError:
     # These were moved after vLLM 0.13; try the legacy path
     from vllm.attention.backends.abstract import AttentionType

diff --git a/vllm_bart_plugin/florence2.py b/vllm_bart_plugin/florence2.py
@@ -13,7 +13,8 @@
 from transformers import BartConfig, BatchFeature, BartTokenizer, PretrainedConfig
 from transformers.utils import logging
 
-from vllm.attention.layer import Attention, AttentionType
+from vllm.model_executor.layers.attention import Attention
+from vllm.v1.attention.backend import AttentionType
 try:
     from vllm.model_executor.layers.attention.cross_attention import CrossAttention
     from vllm.model_executor.layers.attention.mm_encoder_attention import MMEncoderAttention
@@ -58,7 +59,7 @@
     PromptInsertion,
     PromptIndexTargets,
 )
-from vllm.multimodal.profiling import BaseDummyInputsBuilder
+from vllm.multimodal.processing import BaseDummyInputsBuilder
 from vllm.sequence import IntermediateTensors
 from vllm.utils.collection_utils import is_list_of