vllm-project
diff --git a/‎example_nllb_usage.py‎
Lines changed: 105 additions & 0 deletions b/‎example_nllb_usage.py‎
Lines changed: 105 additions & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 3 additions & 3 deletions b/‎pyproject.toml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎tests/conftest.py‎
Lines changed: 14 additions & 0 deletions b/‎tests/conftest.py‎
Lines changed: 14 additions & 0 deletions
@@ -0,0 +1,105 @@
+"""Example: NLLB translation with vLLM via the bart-plugin.
+
+Supported models (all use model_type=m2m_100):
+  facebook/nllb-200-distilled-600M   (~1.2 GB)
+  facebook/nllb-200-distilled-1.3B   (~2.6 GB)
+  facebook/nllb-200-3.3B             (~6.6 GB)
+
+Language codes follow the FLORES-200 format: <language>_<script>
+  English   → eng_Latn
+  French    → fra_Latn
+  German    → deu_Latn
+  Arabic    → arb_Arab
+  Chinese   → zho_Hans
+  Amharic   → amh_Ethi
+  Hindi     → hin_Deva
+  (200+ languages supported)
+
+Run:
+    python example_nllb_usage.py
+
+Required:
+    pip install vllm-bart-plugin
+"""
+
+import os
+
+os.environ["VLLM_ENABLE_V1_MULTIPROCESSING"] = "0"
+
+from vllm import LLM, SamplingParams
+from vllm_bart_plugin.nllb import make_nllb_prompt
+
+MODEL_NAME = "facebook/nllb-200-distilled-600M"
+
+# ---------------------------------------------------------------------------
+# Demo 1: English → multiple target languages
+# ---------------------------------------------------------------------------
+
+ENGLISH_TEXTS = [
+    "The United Nations was founded in 1945.",
+    "Machine translation has improved significantly in recent years.",
+    "Hello, how are you doing today?",
+]
+
+TARGET_LANGS = [
+    ("French",  "fra_Latn"),
+    ("German",  "deu_Latn"),
+    ("Spanish", "spa_Latn"),
+    ("Arabic",  "arb_Arab"),
+    ("Chinese", "zho_Hans"),
+]
+
+# ---------------------------------------------------------------------------
+# Demo 2: Non-English source → English
+# ---------------------------------------------------------------------------
+
+NON_ENGLISH_TEXTS = [
+    # Amharic (Ge'ez script)
+    ("amh_Ethi", "eng_Latn", "ሰላም፣ ዓለም! የተባበሩት መንግሥታት ድርጅት በ1945 ዓ.ም ተቋቋመ።"),
+    # French → German
+    ("fra_Latn", "deu_Latn", "La traduction automatique s'est beaucoup améliorée."),
+    # Hindi → English
+    ("hin_Deva", "eng_Latn", "संयुक्त राष्ट्र की स्थापना 1945 में हुई थी।"),
+]
+
+
+def main():
+    llm = LLM(
+        model=MODEL_NAME,
+        enforce_eager=True,
+        max_model_len=512,
+        gpu_memory_utilization=0.15,
+        dtype="float16",
+    )
+    params = SamplingParams(temperature=0.0, max_tokens=60)
+
+    # --- Demo 1: English source -------------------------------------------
+    print("=" * 60)
+    print("Demo 1: English → multiple languages")
+    print("=" * 60)
+
+    for tgt_name, tgt_lang in TARGET_LANGS:
+        prompts = [
+            make_nllb_prompt(text, src_lang="eng_Latn", tgt_lang=tgt_lang)
+            for text in ENGLISH_TEXTS
+        ]
+        outputs = llm.generate(prompts, sampling_params=params)
+        print(f"\n→ {tgt_name} ({tgt_lang})")
+        for text, out in zip(ENGLISH_TEXTS, outputs):
+            print(f"  [EN] {text}")
+            print(f"  [{tgt_lang[:3].upper()}] {out.outputs[0].text}")
+
+    # --- Demo 2: Non-English sources --------------------------------------
+    print("\n" + "=" * 60)
+    print("Demo 2: Non-English sources")
+    print("=" * 60)
+
+    for src_lang, tgt_lang, text in NON_ENGLISH_TEXTS:
+        prompt = make_nllb_prompt(text, src_lang=src_lang, tgt_lang=tgt_lang)
+        out = llm.generate([prompt], sampling_params=params)[0]
+        print(f"\n[{src_lang}] {text}")
+        print(f"[{tgt_lang}] {out.outputs[0].text}")
+
+
+if __name__ == "__main__":
+    main()
@@ -4,15 +4,15 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "vllm-bart-plugin"
-version = "0.3.3"
-description = "BART model plugin for vLLM"
+version = "0.3.4"
+description = "BART, Florence-2, and NLLB/M2M-100 (translation) model plugin for vLLM"
 readme = "README.md"
 requires-python = ">=3.10"
 license = {text = "Apache-2.0"}
 authors = [
     {name = "Nicolò Lucchesi", email = "nick.lucche@redhat.com"}
 ]
-keywords = ["vllm", "bart", "language-model", "inference", "plugin"]
+keywords = ["vllm", "bart", "nllb", "m2m100", "translation", "language-model", "inference", "plugin"]
 classifiers = [
     "Development Status :: 3 - Alpha",
     "Intended Audience :: Developers",
 
@@ -10,6 +10,20 @@ def cuda_available():
     return torch.cuda.is_available()
 
 
+@pytest.fixture
+def vllm_config_ctx():
+    """Context manager that sets a minimal vLLM config.
+
+    Required for tests that instantiate vLLM attention layers directly
+    (Attention, MMEncoderAttention, CrossAttention all call
+    get_current_vllm_config() during __init__).
+    """
+    from vllm.config import VllmConfig, set_current_vllm_config
+    vllm_config = VllmConfig()
+    with set_current_vllm_config(vllm_config):
+        yield vllm_config
+
+
 @pytest.fixture(scope="session")
 def device():
     """Get the device to use for tests."""