llama-stack-client-completions-min.py

makelinux · makelinux · commit 4826e574c613 · 2026-01-11T12:17:20.000+02:00
diff --git a/llm/llama-stack-client-completions-min.py b/llm/llama-stack-client-completions-min.py
@@ -0,0 +1,20 @@
+#!/usr/bin/python3
+
+# export INFERENCE_MODEL=llama3.2:1b-instruct-fp16
+## start ollama:
+# ollama run $INFERENCE_MODEL
+## start llama-stack:
+# uv run --with llama-stack llama stack build --template ollama --image-type venv --image-name ~/my-ollama-llama-stack --run
+
+from llama_stack_client import LlamaStackClient
+
+c = LlamaStackClient(base_url="http://localhost:8321")
+
+r = c.chat.completions.create(
+    model="gemini/models/gemini-2.5-pro",
+    messages=[
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "Write a haiku about coding"},
+    ],
+)
+print(r.choices[0].message.content)