Bump to llama-stack and llama-stack-client 0.3.0

luis5tb · luis5tb · commit 970f50e59aab · 2025-11-21T14:26:34.000+01:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -28,8 +28,8 @@ dependencies = [
     # Used by authentication/k8s integration
     "kubernetes>=30.1.0",
     # Used to call Llama Stack APIs
-    "llama-stack==0.2.22",
-    "llama-stack-client==0.2.22",
+    "llama-stack==0.3.0",
+    "llama-stack-client==0.3.0",
     # Used by Logger
     "rich>=14.0.0",
     # Used by JWK token auth handler
diff --git a/src/app/endpoints/query.py b/src/app/endpoints/query.py
@@ -13,17 +13,16 @@
     APIConnectionError,
     AsyncLlamaStackClient,  # type: ignore
 )
-from llama_stack_client.lib.agents.event_logger import interleaved_content_as_str
 from llama_stack_client.types import Shield, UserMessage  # type: ignore
-from llama_stack_client.types.agents.turn import Turn
-from llama_stack_client.types.agents.turn_create_params import (
+from llama_stack_client.types.alpha.agents.turn import Turn
+from llama_stack_client.types.alpha.agents.turn_create_params import (
     Toolgroup,
     ToolgroupAgentToolGroupWithArgs,
     Document,
 )
 from llama_stack_client.types.model_list_response import ModelListResponse
 from llama_stack_client.types.shared.interleaved_content_item import TextContentItem
-from llama_stack_client.types.tool_execution_step import ToolExecutionStep
+from llama_stack_client.types.alpha.tool_execution_step import ToolExecutionStep
 
 import constants
 import metrics
@@ -62,7 +61,7 @@
 )
 from utils.mcp_headers import handle_mcp_headers_with_toolgroups, mcp_headers_dependency
 from utils.transcripts import store_transcript
-from utils.types import TurnSummary
+from utils.types import TurnSummary, content_to_str
 from utils.token_counter import extract_and_update_token_metrics, TokenCounter
 
 logger = logging.getLogger("app.endpoints.handlers")
@@ -211,7 +210,7 @@ async def get_topic_summary(
     )
     response = cast(Turn, response)
     return (
-        interleaved_content_as_str(response.output_message.content)
+        content_to_str(response.output_message.content)
         if (
             getattr(response, "output_message", None) is not None
             and getattr(response.output_message, "content", None) is not None
@@ -778,7 +777,7 @@ async def retrieve_response(  # pylint: disable=too-many-locals,too-many-branche
 
     summary = TurnSummary(
         llm_response=(
-            interleaved_content_as_str(response.output_message.content)
+            content_to_str(response.output_message.content)
             if (
                 getattr(response, "output_message", None) is not None
                 and getattr(response.output_message, "content", None) is not None
diff --git a/src/app/endpoints/streaming_query.py b/src/app/endpoints/streaming_query.py
@@ -16,14 +16,13 @@
     APIConnectionError,
     AsyncLlamaStackClient,  # type: ignore
 )
-from llama_stack_client.lib.agents.event_logger import interleaved_content_as_str
 from llama_stack_client.types import UserMessage  # type: ignore
-from llama_stack_client.types.agents.agent_turn_response_stream_chunk import (
+from llama_stack_client.types.alpha.agents.agent_turn_response_stream_chunk import (
     AgentTurnResponseStreamChunk,
 )
 from llama_stack_client.types.shared import ToolCall
 from llama_stack_client.types.shared.interleaved_content_item import TextContentItem
-from llama_stack_client.types.agents.turn_create_params import Document
+from llama_stack_client.types.alpha.agents.turn_create_params import Document
 
 from app.endpoints.query import (
     get_rag_toolgroups,
@@ -65,7 +64,7 @@
 from utils.mcp_headers import handle_mcp_headers_with_toolgroups, mcp_headers_dependency
 from utils.token_counter import TokenCounter, extract_token_usage_from_turn
 from utils.transcripts import store_transcript
-from utils.types import TurnSummary
+from utils.types import TurnSummary, content_to_str
 
 
 logger = logging.getLogger("app.endpoints.handlers")
@@ -444,9 +443,7 @@ def _handle_turn_complete_event(
         str: SSE-formatted string containing the turn completion
         event and output message content.
     """
-    full_response = interleaved_content_as_str(
-        chunk.event.payload.turn.output_message.content
-    )
+    full_response = content_to_str(chunk.event.payload.turn.output_message.content)
 
     if media_type == MEDIA_TYPE_TEXT:
         yield (
@@ -615,7 +612,7 @@ def _handle_tool_execution_event(
 
         for r in chunk.event.payload.step_details.tool_responses:
             if r.tool_name == "query_from_memory":
-                inserted_context = interleaved_content_as_str(r.content)
+                inserted_context = content_to_str(r.content)
                 yield stream_event(
                     data={
                         "id": chunk_id,
@@ -666,7 +663,7 @@ def _handle_tool_execution_event(
                         "id": chunk_id,
                         "token": {
                             "tool_name": r.tool_name,
-                            "response": interleaved_content_as_str(r.content),
+                            "response": content_to_str(r.content),
                         },
                     },
                     event_type=LLM_TOOL_RESULT_EVENT,
@@ -749,9 +746,7 @@ async def response_generator(
                 continue
             p = chunk.event.payload
             if p.event_type == "turn_complete":
-                summary.llm_response = interleaved_content_as_str(
-                    p.turn.output_message.content
-                )
+                summary.llm_response = content_to_str(p.turn.output_message.content)
                 latest_turn = p.turn
                 system_prompt = get_system_prompt(context.query_request, configuration)
                 try:
diff --git a/src/constants.py b/src/constants.py
@@ -2,7 +2,7 @@
 
 # Minimal and maximal supported Llama Stack version
 MINIMAL_SUPPORTED_LLAMA_STACK_VERSION = "0.2.17"
-MAXIMAL_SUPPORTED_LLAMA_STACK_VERSION = "0.2.22"
+MAXIMAL_SUPPORTED_LLAMA_STACK_VERSION = "0.3.0"
 
 UNABLE_TO_PROCESS_RESPONSE = "Unable to process this request"
 
diff --git a/src/metrics/utils.py b/src/metrics/utils.py
@@ -5,7 +5,7 @@
 from llama_stack.models.llama.datatypes import RawMessage
 from llama_stack.models.llama.llama3.chat_format import ChatFormat
 from llama_stack.models.llama.llama3.tokenizer import Tokenizer
-from llama_stack_client.types.agents.turn import Turn
+from llama_stack_client.types.alpha.agents.turn import Turn
 
 import metrics
 from client import AsyncLlamaStackClientHolder
diff --git a/src/models/requests.py b/src/models/requests.py
@@ -4,7 +4,7 @@
 from enum import Enum
 
 from pydantic import BaseModel, model_validator, field_validator, Field
-from llama_stack_client.types.agents.turn_create_params import Document
+from llama_stack_client.types.alpha.agents.turn_create_params import Document
 
 from log import get_logger
 from utils import suid
diff --git a/src/utils/token_counter.py b/src/utils/token_counter.py
@@ -7,7 +7,7 @@
 from llama_stack.models.llama.datatypes import RawMessage
 from llama_stack.models.llama.llama3.chat_format import ChatFormat
 from llama_stack.models.llama.llama3.tokenizer import Tokenizer
-from llama_stack_client.types.agents.turn import Turn
+from llama_stack_client.types.alpha.agents.turn import Turn
 
 import metrics
 
diff --git a/src/utils/types.py b/src/utils/types.py
@@ -2,16 +2,41 @@
 
 from typing import Any, Optional
 import json
-from llama_stack_client.lib.agents.event_logger import interleaved_content_as_str
 from llama_stack_client.lib.agents.tool_parser import ToolParser
 from llama_stack_client.types.shared.completion_message import CompletionMessage
 from llama_stack_client.types.shared.tool_call import ToolCall
-from llama_stack_client.types.tool_execution_step import ToolExecutionStep
+from llama_stack_client.types.shared.interleaved_content_item import (
+    TextContentItem,
+    ImageContentItem,
+)
+from llama_stack_client.types.alpha.tool_execution_step import ToolExecutionStep
 from pydantic import BaseModel
 from models.responses import RAGChunk
 from constants import DEFAULT_RAG_TOOL
 
 
+def content_to_str(content: Any) -> str:
+    """Convert content (str, TextContentItem, ImageContentItem, or list) to string.
+
+    Args:
+        content: Content to convert to string.
+
+    Returns:
+        str: String representation of the content.
+    """
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content
+    if isinstance(content, TextContentItem):
+        return content.text
+    if isinstance(content, ImageContentItem):
+        return "<image>"
+    if isinstance(content, list):
+        return " ".join(content_to_str(item) for item in content)
+    return str(content)
+
+
 class Singleton(type):
     """Metaclass for Singleton support."""
 
@@ -99,9 +124,7 @@ def append_tool_calls_from_llama(self, tec: ToolExecutionStep) -> None:
         responses_by_id = {tc.call_id: tc for tc in tec.tool_responses}
         for call_id, tc in calls_by_id.items():
             resp = responses_by_id.get(call_id)
-            response_content = (
-                interleaved_content_as_str(resp.content) if resp else None
-            )
+            response_content = content_to_str(resp.content) if resp else None
 
             self.tool_calls.append(
                 ToolCallSummary(
diff --git a/tests/unit/app/endpoints/test_query.py b/tests/unit/app/endpoints/test_query.py
@@ -14,10 +14,10 @@
 
 from llama_stack_client import APIConnectionError
 from llama_stack_client.types import UserMessage  # type: ignore
-from llama_stack_client.types.agents.turn import Turn
+from llama_stack_client.types.alpha.agents.turn import Turn
 from llama_stack_client.types.shared.interleaved_content_item import TextContentItem
-from llama_stack_client.types.tool_execution_step import ToolExecutionStep
-from llama_stack_client.types.tool_response import ToolResponse
+from llama_stack_client.types.alpha.tool_execution_step import ToolExecutionStep
+from llama_stack_client.types.alpha.tool_response import ToolResponse
 from pydantic import AnyUrl
 
 from tests.unit.conftest import AgentFixtures
@@ -1929,9 +1929,9 @@ async def test_get_topic_summary_successful_response(mocker: MockerFixture) -> N
     # Mock the agent's create_turn method
     mock_agent.create_turn.return_value = mock_response
 
-    # Mock the interleaved_content_as_str function
+    # Mock the content_to_str function
     mocker.patch(
-        "app.endpoints.query.interleaved_content_as_str",
+        "app.endpoints.query.content_to_str",
         return_value="This is a topic summary about OpenStack",
     )
 
@@ -2062,9 +2062,9 @@ async def test_get_topic_summary_with_interleaved_content(
     # Mock the agent's create_turn method
     mock_agent.create_turn.return_value = mock_response
 
-    # Mock the interleaved_content_as_str function
-    mock_interleaved_content_as_str = mocker.patch(
-        "app.endpoints.query.interleaved_content_as_str", return_value="Topic summary"
+    # Mock the content_to_str function
+    mock_content_to_str = mocker.patch(
+        "app.endpoints.query.content_to_str", return_value="Topic summary"
     )
 
     # Mock the get_topic_summary_system_prompt function
@@ -2085,8 +2085,8 @@ async def test_get_topic_summary_with_interleaved_content(
     # Assertions
     assert result == "Topic summary"
 
-    # Verify interleaved_content_as_str was called with the content
-    mock_interleaved_content_as_str.assert_called_once_with(mock_content)
+    # Verify content_to_str was called with the content
+    mock_content_to_str.assert_called_once_with(mock_content)
 
 
 @pytest.mark.asyncio
@@ -2107,10 +2107,8 @@ async def test_get_topic_summary_system_prompt_retrieval(mocker: MockerFixture)
     # Mock the agent's create_turn method
     mock_agent.create_turn.return_value = mock_response
 
-    # Mock the interleaved_content_as_str function
-    mocker.patch(
-        "app.endpoints.query.interleaved_content_as_str", return_value="Topic summary"
-    )
+    # Mock the content_to_str function
+    mocker.patch("app.endpoints.query.content_to_str", return_value="Topic summary")
 
     # Mock the get_topic_summary_system_prompt function
     mock_get_topic_summary_system_prompt = mocker.patch(
@@ -2183,10 +2181,8 @@ async def test_get_topic_summary_agent_creation_parameters(
     # Mock the agent's create_turn method
     mock_agent.create_turn.return_value = mock_response
 
-    # Mock the interleaved_content_as_str function
-    mocker.patch(
-        "app.endpoints.query.interleaved_content_as_str", return_value="Topic summary"
-    )
+    # Mock the content_to_str function
+    mocker.patch("app.endpoints.query.content_to_str", return_value="Topic summary")
 
     # Mock the get_topic_summary_system_prompt function
     mocker.patch(
@@ -2230,10 +2226,8 @@ async def test_get_topic_summary_create_turn_parameters(mocker: MockerFixture) -
     # Mock the agent's create_turn method
     mock_agent.create_turn.return_value = mock_response
 
-    # Mock the interleaved_content_as_str function
-    mocker.patch(
-        "app.endpoints.query.interleaved_content_as_str", return_value="Topic summary"
-    )
+    # Mock the content_to_str function
+    mocker.patch("app.endpoints.query.content_to_str", return_value="Topic summary")
 
     # Mock the get_topic_summary_system_prompt function
     mocker.patch(
diff --git a/tests/unit/app/endpoints/test_streaming_query.py b/tests/unit/app/endpoints/test_streaming_query.py
@@ -15,26 +15,26 @@
 
 from llama_stack_client import APIConnectionError
 from llama_stack_client.types import UserMessage  # type: ignore
-from llama_stack_client.types.agents import Turn
+from llama_stack_client.types.alpha.agents.turn import Turn
 from llama_stack_client.types.shared.completion_message import CompletionMessage
 from llama_stack_client.types.shared.interleaved_content_item import TextContentItem
 from llama_stack_client.types.shared.safety_violation import SafetyViolation
-from llama_stack_client.types.shield_call_step import ShieldCallStep
+from llama_stack_client.types.alpha.shield_call_step import ShieldCallStep
 from llama_stack_client.types.shared.tool_call import ToolCall
 from llama_stack_client.types.shared.content_delta import TextDelta, ToolCallDelta
-from llama_stack_client.types.agents.turn_response_event import TurnResponseEvent
-from llama_stack_client.types.agents.agent_turn_response_stream_chunk import (
+from llama_stack_client.types.alpha.agents.turn_response_event import TurnResponseEvent
+from llama_stack_client.types.alpha.agents.agent_turn_response_stream_chunk import (
     AgentTurnResponseStreamChunk,
 )
-from llama_stack_client.types.agents.turn_response_event_payload import (
+from llama_stack_client.types.alpha.agents.turn_response_event_payload import (
     AgentTurnResponseStepProgressPayload,
     AgentTurnResponseStepCompletePayload,
     AgentTurnResponseTurnStartPayload,
     AgentTurnResponseTurnAwaitingInputPayload,
     AgentTurnResponseTurnCompletePayload,
 )
-from llama_stack_client.types.tool_execution_step import ToolExecutionStep
-from llama_stack_client.types.tool_response import ToolResponse
+from llama_stack_client.types.alpha.tool_execution_step import ToolExecutionStep
+from llama_stack_client.types.alpha.tool_response import ToolResponse
 
 from configuration import AppConfig
 from app.endpoints.query import get_rag_toolgroups