Skip to content

Commit fa627c3

Browse files
committed
INFERENCE_MODEL
1 parent 74931d9 commit fa627c3

File tree

3 files changed

+6
-4
lines changed

3 files changed

+6
-4
lines changed

llm/client-openai-vector_stores-responses.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
# ./llama-stack/docs/docs/getting_started/demo_script.py
44

5-
import io, requests
5+
import io, os, requests
66
from openai import OpenAI
77

88
c = OpenAI(base_url="http://localhost:8321/v1/", api_key="none")
@@ -14,7 +14,7 @@
1414
c.vector_stores.files.create(vs.id, file_id=fid)
1515

1616
resp = c.responses.create(
17-
model="vertexai/google/gemini-2.5-flash",
17+
model=os.getenv("INFERENCE_MODEL", "vertexai/google/gemini-2.5-flash"),
1818
input="What is the color of abc?",
1919
tools=[{"type": "file_search", "vector_store_ids": [vs.id]}],
2020
# include=["file_search_call.results"],

llm/llama-stack-client-completions-min.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,13 @@
66
## start llama-stack:
77
# uv run --with llama-stack llama stack build --template ollama --image-type venv --image-name ~/my-ollama-llama-stack --run
88

9+
import os
910
from llama_stack_client import LlamaStackClient
1011

1112
c = LlamaStackClient(base_url="http://localhost:8321")
1213

1314
r = c.chat.completions.create(
14-
model="gemini/models/gemini-2.5-pro",
15+
model=os.getenv("INFERENCE_MODEL", "gemini/models/gemini-2.5-pro"),
1516
messages=[
1617
{"role": "system", "content": "You are a helpful assistant."},
1718
{"role": "user", "content": "Write a haiku about coding"},

llm/llama-stack-client-vector_stores-agent.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#!/usr/bin/python3
22

3+
import os
34
from llama_stack_client import Agent, AgentEventLogger, LlamaStackClient
45
import requests
56
from io import BytesIO
@@ -16,7 +17,7 @@
1617

1718
agent = Agent(
1819
c,
19-
model="vertexai/google/gemini-2.5-flash",
20+
model=os.getenv("INFERENCE_MODEL", "vertexai/google/gemini-2.5-flash"),
2021
instructions="You are a helpful assistant",
2122
tools=[
2223
{ "type": "file_search", "vector_store_ids": [vs.id], }

0 commit comments

Comments
 (0)