Skip to content

Commit 21480cd

Browse files
committed
fix(rlsapi): use chat completions API instead of Agent abstraction
The AsyncAgent.create_turn() method internally calls self.initialize() which does not exist in llama-stack-client 0.3.5, causing AttributeError. Switch to direct chat.completions.create() API which: - Is simpler for stateless single-turn inference - Avoids the broken Agent abstraction - Has fewer moving parts (no session management needed) Signed-off-by: Major Hayden <major@redhat.com>
1 parent 2adb747 commit 21480cd

1 file changed

Lines changed: 26 additions & 18 deletions

File tree

src/app/endpoints/rlsapi_v1.py

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,14 @@
55
"""
66

77
import logging
8-
from typing import Annotated, Any, cast
8+
from typing import Annotated, Any
99

1010
from fastapi import APIRouter, Depends, HTTPException
1111
from llama_stack_client import APIConnectionError # type: ignore
12-
from llama_stack_client.types import UserMessage # type: ignore
13-
from llama_stack_client.types.alpha.agents.turn import Turn
12+
from llama_stack_client.types.chat.completion_create_params import (
13+
MessageOpenAISystemMessageParam,
14+
MessageOpenAIUserMessageParam,
15+
)
1416

1517
import constants
1618
from authentication import get_auth_dependency
@@ -27,9 +29,7 @@
2729
)
2830
from models.rlsapi.requests import RlsapiV1InferRequest
2931
from models.rlsapi.responses import RlsapiV1InferData, RlsapiV1InferResponse
30-
from utils.endpoints import get_temp_agent
3132
from utils.suid import get_suid
32-
from utils.types import content_to_str
3333

3434
logger = logging.getLogger(__name__)
3535
router = APIRouter(tags=["rlsapi-v1"])
@@ -82,8 +82,7 @@ def _get_default_model_id() -> str:
8282
async def retrieve_simple_response(question: str) -> str:
8383
"""Retrieve a simple response from the LLM for a stateless query.
8484
85-
Creates a temporary agent, sends a single turn with the user's question,
86-
and returns the LLM response text. No conversation persistence or tools.
85+
Uses direct chat completion API for simple stateless inference.
8786
8887
Args:
8988
question: The combined user input (question + context).
@@ -100,24 +99,33 @@ async def retrieve_simple_response(question: str) -> str:
10099

101100
logger.debug("Using model %s for rlsapi v1 inference", model_id)
102101

103-
agent, session_id, _ = await get_temp_agent(
104-
client, model_id, constants.DEFAULT_SYSTEM_PROMPT
102+
sys_msg: MessageOpenAISystemMessageParam = {
103+
"role": "system",
104+
"content": constants.DEFAULT_SYSTEM_PROMPT,
105+
}
106+
user_msg: MessageOpenAIUserMessageParam = {
107+
"role": "user",
108+
"content": question,
109+
}
110+
111+
response = await client.chat.completions.create(
112+
model=model_id,
113+
messages=[sys_msg, user_msg],
105114
)
106115

107-
response = await agent.create_turn(
108-
messages=[UserMessage(role="user", content=question).model_dump()],
109-
session_id=session_id,
110-
stream=False,
111-
)
112-
response = cast(Turn, response)
116+
if not response.choices:
117+
return ""
113118

114-
if getattr(response, "output_message", None) is None:
119+
choice = response.choices[0]
120+
message = getattr(choice, "message", None)
121+
if message is None:
115122
return ""
116123

117-
if getattr(response.output_message, "content", None) is None:
124+
content = getattr(message, "content", None)
125+
if content is None:
118126
return ""
119127

120-
return content_to_str(response.output_message.content)
128+
return str(content)
121129

122130

123131
@router.post("/infer", responses=infer_responses)

0 commit comments

Comments
 (0)