Skip to content

Commit d2933e0

Browse files
committed
Bump to llama-stack and llama-stack-client 0.3.0
1 parent c57d2ba commit d2933e0

10 files changed

Lines changed: 76 additions & 65 deletions

File tree

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ dependencies = [
2828
# Used by authentication/k8s integration
2929
"kubernetes>=30.1.0",
3030
# Used to call Llama Stack APIs
31-
"llama-stack==0.2.22",
32-
"llama-stack-client==0.2.22",
31+
"llama-stack==0.3.0",
32+
"llama-stack-client==0.3.0",
3333
# Used by Logger
3434
"rich>=14.0.0",
3535
# Used by JWK token auth handler

src/app/endpoints/query.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,16 @@
1313
APIConnectionError,
1414
AsyncLlamaStackClient, # type: ignore
1515
)
16-
from llama_stack_client.lib.agents.event_logger import interleaved_content_as_str
1716
from llama_stack_client.types import Shield, UserMessage # type: ignore
18-
from llama_stack_client.types.agents.turn import Turn
19-
from llama_stack_client.types.agents.turn_create_params import (
17+
from llama_stack_client.types.alpha.agents.turn import Turn
18+
from llama_stack_client.types.alpha.agents.turn_create_params import (
2019
Document,
2120
Toolgroup,
2221
ToolgroupAgentToolGroupWithArgs,
2322
)
2423
from llama_stack_client.types.model_list_response import ModelListResponse
2524
from llama_stack_client.types.shared.interleaved_content_item import TextContentItem
26-
from llama_stack_client.types.tool_execution_step import ToolExecutionStep
25+
from llama_stack_client.types.alpha.tool_execution_step import ToolExecutionStep
2726
from sqlalchemy.exc import SQLAlchemyError
2827

2928
import constants
@@ -68,7 +67,7 @@
6867
)
6968
from utils.token_counter import TokenCounter, extract_and_update_token_metrics
7069
from utils.transcripts import store_transcript
71-
from utils.types import TurnSummary
70+
from utils.types import TurnSummary, content_to_str
7271

7372
logger = logging.getLogger("app.endpoints.handlers")
7473
router = APIRouter(tags=["query"])
@@ -202,7 +201,7 @@ async def get_topic_summary(
202201
)
203202
response = cast(Turn, response)
204203
return (
205-
interleaved_content_as_str(response.output_message.content)
204+
content_to_str(response.output_message.content)
206205
if (
207206
getattr(response, "output_message", None) is not None
208207
and getattr(response.output_message, "content", None) is not None
@@ -764,7 +763,7 @@ async def retrieve_response( # pylint: disable=too-many-locals,too-many-branche
764763

765764
summary = TurnSummary(
766765
llm_response=(
767-
interleaved_content_as_str(response.output_message.content)
766+
content_to_str(response.output_message.content)
768767
if (
769768
getattr(response, "output_message", None) is not None
770769
and getattr(response.output_message, "content", None) is not None

src/app/endpoints/streaming_query.py

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,11 @@
1616
APIConnectionError,
1717
AsyncLlamaStackClient, # type: ignore
1818
)
19-
from llama_stack_client.lib.agents.event_logger import interleaved_content_as_str
2019
from llama_stack_client.types import UserMessage # type: ignore
21-
from llama_stack_client.types.agents.agent_turn_response_stream_chunk import (
20+
from llama_stack_client.types.alpha.agents.agent_turn_response_stream_chunk import (
2221
AgentTurnResponseStreamChunk,
2322
)
24-
from llama_stack_client.types.agents.turn_create_params import Document
23+
from llama_stack_client.types.alpha.agents.turn_create_params import Document
2524
from llama_stack_client.types.shared import ToolCall
2625
from llama_stack_client.types.shared.interleaved_content_item import TextContentItem
2726

@@ -69,7 +68,7 @@
6968
from utils.mcp_headers import handle_mcp_headers_with_toolgroups, mcp_headers_dependency
7069
from utils.token_counter import TokenCounter, extract_token_usage_from_turn
7170
from utils.transcripts import store_transcript
72-
from utils.types import TurnSummary
71+
from utils.types import TurnSummary, content_to_str
7372

7473
logger = logging.getLogger("app.endpoints.handlers")
7574
router = APIRouter(tags=["streaming_query"])
@@ -431,9 +430,7 @@ def _handle_turn_complete_event(
431430
str: SSE-formatted string containing the turn completion
432431
event and output message content.
433432
"""
434-
full_response = interleaved_content_as_str(
435-
chunk.event.payload.turn.output_message.content
436-
)
433+
full_response = content_to_str(chunk.event.payload.turn.output_message.content)
437434

438435
if media_type == MEDIA_TYPE_TEXT:
439436
yield (
@@ -602,7 +599,7 @@ def _handle_tool_execution_event(
602599

603600
for r in chunk.event.payload.step_details.tool_responses:
604601
if r.tool_name == "query_from_memory":
605-
inserted_context = interleaved_content_as_str(r.content)
602+
inserted_context = content_to_str(r.content)
606603
yield stream_event(
607604
data={
608605
"id": chunk_id,
@@ -653,7 +650,7 @@ def _handle_tool_execution_event(
653650
"id": chunk_id,
654651
"token": {
655652
"tool_name": r.tool_name,
656-
"response": interleaved_content_as_str(r.content),
653+
"response": content_to_str(r.content),
657654
},
658655
},
659656
event_type=LLM_TOOL_RESULT_EVENT,
@@ -736,9 +733,7 @@ async def response_generator(
736733
continue
737734
p = chunk.event.payload
738735
if p.event_type == "turn_complete":
739-
summary.llm_response = interleaved_content_as_str(
740-
p.turn.output_message.content
741-
)
736+
summary.llm_response = content_to_str(p.turn.output_message.content)
742737
latest_turn = p.turn
743738
system_prompt = get_system_prompt(context.query_request, configuration)
744739
try:

src/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
# Minimal and maximal supported Llama Stack version
44
MINIMAL_SUPPORTED_LLAMA_STACK_VERSION = "0.2.17"
5-
MAXIMAL_SUPPORTED_LLAMA_STACK_VERSION = "0.2.22"
5+
MAXIMAL_SUPPORTED_LLAMA_STACK_VERSION = "0.3.0"
66

77
UNABLE_TO_PROCESS_RESPONSE = "Unable to process this request"
88

src/metrics/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from llama_stack.models.llama.llama3.chat_format import ChatFormat
88
from llama_stack.models.llama.llama3.tokenizer import Tokenizer
99
from llama_stack_client import APIConnectionError, APIStatusError
10-
from llama_stack_client.types.agents.turn import Turn
10+
from llama_stack_client.types.alpha.agents.turn import Turn
1111

1212
import metrics
1313
from client import AsyncLlamaStackClientHolder

src/models/requests.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from enum import Enum
55

66
from pydantic import BaseModel, model_validator, field_validator, Field
7-
from llama_stack_client.types.agents.turn_create_params import Document
7+
from llama_stack_client.types.alpha.agents.turn_create_params import Document
88

99
from log import get_logger
1010
from utils import suid

src/utils/token_counter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from llama_stack.models.llama.datatypes import RawMessage
88
from llama_stack.models.llama.llama3.chat_format import ChatFormat
99
from llama_stack.models.llama.llama3.tokenizer import Tokenizer
10-
from llama_stack_client.types.agents.turn import Turn
10+
from llama_stack_client.types.alpha.agents.turn import Turn
1111

1212
import metrics
1313

src/utils/types.py

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,41 @@
22

33
from typing import Any, Optional
44
import json
5-
from llama_stack_client.lib.agents.event_logger import interleaved_content_as_str
65
from llama_stack_client.lib.agents.tool_parser import ToolParser
76
from llama_stack_client.types.shared.completion_message import CompletionMessage
87
from llama_stack_client.types.shared.tool_call import ToolCall
9-
from llama_stack_client.types.tool_execution_step import ToolExecutionStep
8+
from llama_stack_client.types.shared.interleaved_content_item import (
9+
TextContentItem,
10+
ImageContentItem,
11+
)
12+
from llama_stack_client.types.alpha.tool_execution_step import ToolExecutionStep
1013
from pydantic import BaseModel
1114
from models.responses import RAGChunk
1215
from constants import DEFAULT_RAG_TOOL
1316

1417

18+
def content_to_str(content: Any) -> str:
19+
"""Convert content (str, TextContentItem, ImageContentItem, or list) to string.
20+
21+
Args:
22+
content: Content to convert to string.
23+
24+
Returns:
25+
str: String representation of the content.
26+
"""
27+
if content is None:
28+
return ""
29+
if isinstance(content, str):
30+
return content
31+
if isinstance(content, TextContentItem):
32+
return content.text
33+
if isinstance(content, ImageContentItem):
34+
return "<image>"
35+
if isinstance(content, list):
36+
return " ".join(content_to_str(item) for item in content)
37+
return str(content)
38+
39+
1540
class Singleton(type):
1641
"""Metaclass for Singleton support."""
1742

@@ -99,9 +124,7 @@ def append_tool_calls_from_llama(self, tec: ToolExecutionStep) -> None:
99124
responses_by_id = {tc.call_id: tc for tc in tec.tool_responses}
100125
for call_id, tc in calls_by_id.items():
101126
resp = responses_by_id.get(call_id)
102-
response_content = (
103-
interleaved_content_as_str(resp.content) if resp else None
104-
)
127+
response_content = content_to_str(resp.content) if resp else None
105128

106129
self.tool_calls.append(
107130
ToolCallSummary(

tests/unit/app/endpoints/test_query.py

Lines changed: 17 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,11 @@
1111
from fastapi import HTTPException, Request, status
1212
from litellm.exceptions import RateLimitError
1313
from llama_stack_client import APIConnectionError
14-
from llama_stack_client.types import UserMessage
15-
from llama_stack_client.types.agents.turn import Turn
14+
from llama_stack_client.types import UserMessage # type: ignore
15+
from llama_stack_client.types.alpha.agents.turn import Turn
1616
from llama_stack_client.types.shared.interleaved_content_item import TextContentItem
17-
from llama_stack_client.types.tool_execution_step import ToolExecutionStep
18-
from llama_stack_client.types.tool_response import ToolResponse
17+
from llama_stack_client.types.alpha.tool_execution_step import ToolExecutionStep
18+
from llama_stack_client.types.alpha.tool_response import ToolResponse
1919
from pydantic import AnyUrl
2020
from pytest_mock import MockerFixture
2121

@@ -1935,9 +1935,9 @@ async def test_get_topic_summary_successful_response(mocker: MockerFixture) -> N
19351935
# Mock the agent's create_turn method
19361936
mock_agent.create_turn.return_value = mock_response
19371937

1938-
# Mock the interleaved_content_as_str function
1938+
# Mock the content_to_str function
19391939
mocker.patch(
1940-
"app.endpoints.query.interleaved_content_as_str",
1940+
"app.endpoints.query.content_to_str",
19411941
return_value="This is a topic summary about OpenStack",
19421942
)
19431943

@@ -2068,9 +2068,9 @@ async def test_get_topic_summary_with_interleaved_content(
20682068
# Mock the agent's create_turn method
20692069
mock_agent.create_turn.return_value = mock_response
20702070

2071-
# Mock the interleaved_content_as_str function
2072-
mock_interleaved_content_as_str = mocker.patch(
2073-
"app.endpoints.query.interleaved_content_as_str", return_value="Topic summary"
2071+
# Mock the content_to_str function
2072+
mock_content_to_str = mocker.patch(
2073+
"app.endpoints.query.content_to_str", return_value="Topic summary"
20742074
)
20752075

20762076
# Mock the get_topic_summary_system_prompt function
@@ -2091,8 +2091,8 @@ async def test_get_topic_summary_with_interleaved_content(
20912091
# Assertions
20922092
assert result == "Topic summary"
20932093

2094-
# Verify interleaved_content_as_str was called with the content
2095-
mock_interleaved_content_as_str.assert_called_once_with(mock_content)
2094+
# Verify content_to_str was called with the content
2095+
mock_content_to_str.assert_called_once_with(mock_content)
20962096

20972097

20982098
@pytest.mark.asyncio
@@ -2113,10 +2113,8 @@ async def test_get_topic_summary_system_prompt_retrieval(mocker: MockerFixture)
21132113
# Mock the agent's create_turn method
21142114
mock_agent.create_turn.return_value = mock_response
21152115

2116-
# Mock the interleaved_content_as_str function
2117-
mocker.patch(
2118-
"app.endpoints.query.interleaved_content_as_str", return_value="Topic summary"
2119-
)
2116+
# Mock the content_to_str function
2117+
mocker.patch("app.endpoints.query.content_to_str", return_value="Topic summary")
21202118

21212119
# Mock the get_topic_summary_system_prompt function
21222120
mock_get_topic_summary_system_prompt = mocker.patch(
@@ -2189,10 +2187,8 @@ async def test_get_topic_summary_agent_creation_parameters(
21892187
# Mock the agent's create_turn method
21902188
mock_agent.create_turn.return_value = mock_response
21912189

2192-
# Mock the interleaved_content_as_str function
2193-
mocker.patch(
2194-
"app.endpoints.query.interleaved_content_as_str", return_value="Topic summary"
2195-
)
2190+
# Mock the content_to_str function
2191+
mocker.patch("app.endpoints.query.content_to_str", return_value="Topic summary")
21962192

21972193
# Mock the get_topic_summary_system_prompt function
21982194
mocker.patch(
@@ -2236,10 +2232,8 @@ async def test_get_topic_summary_create_turn_parameters(mocker: MockerFixture) -
22362232
# Mock the agent's create_turn method
22372233
mock_agent.create_turn.return_value = mock_response
22382234

2239-
# Mock the interleaved_content_as_str function
2240-
mocker.patch(
2241-
"app.endpoints.query.interleaved_content_as_str", return_value="Topic summary"
2242-
)
2235+
# Mock the content_to_str function
2236+
mocker.patch("app.endpoints.query.content_to_str", return_value="Topic summary")
22432237

22442238
# Mock the get_topic_summary_system_prompt function
22452239
mocker.patch(

tests/unit/app/endpoints/test_streaming_query.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,27 +10,27 @@
1010
from litellm.exceptions import RateLimitError
1111
from llama_stack_client import APIConnectionError
1212
from llama_stack_client.types import UserMessage # type: ignore
13-
from llama_stack_client.types.agents import Turn
14-
from llama_stack_client.types.agents.agent_turn_response_stream_chunk import (
13+
from llama_stack_client.types.alpha.agents.turn import Turn
14+
from llama_stack_client.types.shared.completion_message import CompletionMessage
15+
from llama_stack_client.types.shared.interleaved_content_item import TextContentItem
16+
from llama_stack_client.types.shared.safety_violation import SafetyViolation
17+
from llama_stack_client.types.alpha.shield_call_step import ShieldCallStep
18+
from llama_stack_client.types.shared.tool_call import ToolCall
19+
from llama_stack_client.types.shared.content_delta import TextDelta, ToolCallDelta
20+
from llama_stack_client.types.alpha.agents.turn_response_event import TurnResponseEvent
21+
from llama_stack_client.types.alpha.agents.agent_turn_response_stream_chunk import (
1522
AgentTurnResponseStreamChunk,
1623
)
17-
from llama_stack_client.types.agents.turn_response_event import TurnResponseEvent
18-
from llama_stack_client.types.agents.turn_response_event_payload import (
24+
from llama_stack_client.types.alpha.agents.turn_response_event_payload import (
1925
AgentTurnResponseStepCompletePayload,
2026
AgentTurnResponseStepProgressPayload,
2127
AgentTurnResponseTurnAwaitingInputPayload,
2228
AgentTurnResponseTurnCompletePayload,
2329
AgentTurnResponseTurnStartPayload,
2430
)
25-
from llama_stack_client.types.shared.completion_message import CompletionMessage
26-
from llama_stack_client.types.shared.content_delta import TextDelta, ToolCallDelta
27-
from llama_stack_client.types.shared.interleaved_content_item import TextContentItem
28-
from llama_stack_client.types.shared.safety_violation import SafetyViolation
29-
from llama_stack_client.types.shared.tool_call import ToolCall
30-
from llama_stack_client.types.shield_call_step import ShieldCallStep
31-
from llama_stack_client.types.tool_execution_step import ToolExecutionStep
32-
from llama_stack_client.types.tool_response import ToolResponse
3331
from pytest_mock import MockerFixture
32+
from llama_stack_client.types.alpha.tool_execution_step import ToolExecutionStep
33+
from llama_stack_client.types.alpha.tool_response import ToolResponse
3434

3535
from app.endpoints.query import get_rag_toolgroups
3636
from app.endpoints.streaming_query import (

0 commit comments

Comments
 (0)