Bump to llama-stack and llama-stack-client 0.3.0

luis5tb · luis5tb · commit d2933e0d1dd5 · 2025-11-24T10:56:58.000+01:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -28,8 +28,8 @@ dependencies = [
     # Used by authentication/k8s integration
     "kubernetes>=30.1.0",
     # Used to call Llama Stack APIs
-    "llama-stack==0.2.22",
-    "llama-stack-client==0.2.22",
+    "llama-stack==0.3.0",
+    "llama-stack-client==0.3.0",
     # Used by Logger
     "rich>=14.0.0",
     # Used by JWK token auth handler
diff --git a/src/app/endpoints/query.py b/src/app/endpoints/query.py
@@ -13,17 +13,16 @@
     APIConnectionError,
     AsyncLlamaStackClient,  # type: ignore
 )
-from llama_stack_client.lib.agents.event_logger import interleaved_content_as_str
 from llama_stack_client.types import Shield, UserMessage  # type: ignore
-from llama_stack_client.types.agents.turn import Turn
-from llama_stack_client.types.agents.turn_create_params import (
+from llama_stack_client.types.alpha.agents.turn import Turn
+from llama_stack_client.types.alpha.agents.turn_create_params import (
     Document,
     Toolgroup,
     ToolgroupAgentToolGroupWithArgs,
 )
 from llama_stack_client.types.model_list_response import ModelListResponse
 from llama_stack_client.types.shared.interleaved_content_item import TextContentItem
-from llama_stack_client.types.tool_execution_step import ToolExecutionStep
+from llama_stack_client.types.alpha.tool_execution_step import ToolExecutionStep
 from sqlalchemy.exc import SQLAlchemyError
 
 import constants
@@ -68,7 +67,7 @@
 )
 from utils.token_counter import TokenCounter, extract_and_update_token_metrics
 from utils.transcripts import store_transcript
-from utils.types import TurnSummary
+from utils.types import TurnSummary, content_to_str
 
 logger = logging.getLogger("app.endpoints.handlers")
 router = APIRouter(tags=["query"])
@@ -202,7 +201,7 @@ async def get_topic_summary(
     )
     response = cast(Turn, response)
     return (
-        interleaved_content_as_str(response.output_message.content)
+        content_to_str(response.output_message.content)
         if (
             getattr(response, "output_message", None) is not None
             and getattr(response.output_message, "content", None) is not None
@@ -764,7 +763,7 @@ async def retrieve_response(  # pylint: disable=too-many-locals,too-many-branche
 
     summary = TurnSummary(
         llm_response=(
-            interleaved_content_as_str(response.output_message.content)
+            content_to_str(response.output_message.content)
             if (
                 getattr(response, "output_message", None) is not None
                 and getattr(response.output_message, "content", None) is not None
diff --git a/src/app/endpoints/streaming_query.py b/src/app/endpoints/streaming_query.py
@@ -16,12 +16,11 @@
     APIConnectionError,
     AsyncLlamaStackClient,  # type: ignore
 )
-from llama_stack_client.lib.agents.event_logger import interleaved_content_as_str
 from llama_stack_client.types import UserMessage  # type: ignore
-from llama_stack_client.types.agents.agent_turn_response_stream_chunk import (
+from llama_stack_client.types.alpha.agents.agent_turn_response_stream_chunk import (
     AgentTurnResponseStreamChunk,
 )
-from llama_stack_client.types.agents.turn_create_params import Document
+from llama_stack_client.types.alpha.agents.turn_create_params import Document
 from llama_stack_client.types.shared import ToolCall
 from llama_stack_client.types.shared.interleaved_content_item import TextContentItem
 
@@ -69,7 +68,7 @@
 from utils.mcp_headers import handle_mcp_headers_with_toolgroups, mcp_headers_dependency
 from utils.token_counter import TokenCounter, extract_token_usage_from_turn
 from utils.transcripts import store_transcript
-from utils.types import TurnSummary
+from utils.types import TurnSummary, content_to_str
 
 logger = logging.getLogger("app.endpoints.handlers")
 router = APIRouter(tags=["streaming_query"])
@@ -431,9 +430,7 @@ def _handle_turn_complete_event(
         str: SSE-formatted string containing the turn completion
         event and output message content.
     """
-    full_response = interleaved_content_as_str(
-        chunk.event.payload.turn.output_message.content
-    )
+    full_response = content_to_str(chunk.event.payload.turn.output_message.content)
 
     if media_type == MEDIA_TYPE_TEXT:
         yield (
@@ -602,7 +599,7 @@ def _handle_tool_execution_event(
 
         for r in chunk.event.payload.step_details.tool_responses:
             if r.tool_name == "query_from_memory":
-                inserted_context = interleaved_content_as_str(r.content)
+                inserted_context = content_to_str(r.content)
                 yield stream_event(
                     data={
                         "id": chunk_id,
@@ -653,7 +650,7 @@ def _handle_tool_execution_event(
                         "id": chunk_id,
                         "token": {
                             "tool_name": r.tool_name,
-                            "response": interleaved_content_as_str(r.content),
+                            "response": content_to_str(r.content),
                         },
                     },
                     event_type=LLM_TOOL_RESULT_EVENT,
@@ -736,9 +733,7 @@ async def response_generator(
                 continue
             p = chunk.event.payload
             if p.event_type == "turn_complete":
-                summary.llm_response = interleaved_content_as_str(
-                    p.turn.output_message.content
-                )
+                summary.llm_response = content_to_str(p.turn.output_message.content)
                 latest_turn = p.turn
                 system_prompt = get_system_prompt(context.query_request, configuration)
                 try:
diff --git a/src/constants.py b/src/constants.py
@@ -2,7 +2,7 @@
 
 # Minimal and maximal supported Llama Stack version
 MINIMAL_SUPPORTED_LLAMA_STACK_VERSION = "0.2.17"
-MAXIMAL_SUPPORTED_LLAMA_STACK_VERSION = "0.2.22"
+MAXIMAL_SUPPORTED_LLAMA_STACK_VERSION = "0.3.0"
 
 UNABLE_TO_PROCESS_RESPONSE = "Unable to process this request"
 
diff --git a/src/metrics/utils.py b/src/metrics/utils.py
@@ -7,7 +7,7 @@
 from llama_stack.models.llama.llama3.chat_format import ChatFormat
 from llama_stack.models.llama.llama3.tokenizer import Tokenizer
 from llama_stack_client import APIConnectionError, APIStatusError
-from llama_stack_client.types.agents.turn import Turn
+from llama_stack_client.types.alpha.agents.turn import Turn
 
 import metrics
 from client import AsyncLlamaStackClientHolder
diff --git a/src/models/requests.py b/src/models/requests.py
@@ -4,7 +4,7 @@
 from enum import Enum
 
 from pydantic import BaseModel, model_validator, field_validator, Field
-from llama_stack_client.types.agents.turn_create_params import Document
+from llama_stack_client.types.alpha.agents.turn_create_params import Document
 
 from log import get_logger
 from utils import suid
diff --git a/src/utils/token_counter.py b/src/utils/token_counter.py
@@ -7,7 +7,7 @@
 from llama_stack.models.llama.datatypes import RawMessage
 from llama_stack.models.llama.llama3.chat_format import ChatFormat
 from llama_stack.models.llama.llama3.tokenizer import Tokenizer
-from llama_stack_client.types.agents.turn import Turn
+from llama_stack_client.types.alpha.agents.turn import Turn
 
 import metrics
 
diff --git a/src/utils/types.py b/src/utils/types.py
@@ -2,16 +2,41 @@
 
 from typing import Any, Optional
 import json
-from llama_stack_client.lib.agents.event_logger import interleaved_content_as_str
 from llama_stack_client.lib.agents.tool_parser import ToolParser
 from llama_stack_client.types.shared.completion_message import CompletionMessage
 from llama_stack_client.types.shared.tool_call import ToolCall
-from llama_stack_client.types.tool_execution_step import ToolExecutionStep
+from llama_stack_client.types.shared.interleaved_content_item import (
+    TextContentItem,
+    ImageContentItem,
+)
+from llama_stack_client.types.alpha.tool_execution_step import ToolExecutionStep
 from pydantic import BaseModel
 from models.responses import RAGChunk
 from constants import DEFAULT_RAG_TOOL
 
 
+def content_to_str(content: Any) -> str:
+    """Convert content (str, TextContentItem, ImageContentItem, or list) to string.
+
+    Args:
+        content: Content to convert to string.
+
+    Returns:
+        str: String representation of the content.
+    """
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content
+    if isinstance(content, TextContentItem):
+        return content.text
+    if isinstance(content, ImageContentItem):
+        return "<image>"
+    if isinstance(content, list):
+        return " ".join(content_to_str(item) for item in content)
+    return str(content)
+
+
 class Singleton(type):
     """Metaclass for Singleton support."""
 
@@ -99,9 +124,7 @@ def append_tool_calls_from_llama(self, tec: ToolExecutionStep) -> None:
         responses_by_id = {tc.call_id: tc for tc in tec.tool_responses}
         for call_id, tc in calls_by_id.items():
             resp = responses_by_id.get(call_id)
-            response_content = (
-                interleaved_content_as_str(resp.content) if resp else None
-            )
+            response_content = content_to_str(resp.content) if resp else None
 
             self.tool_calls.append(
                 ToolCallSummary(
diff --git a/tests/unit/app/endpoints/test_query.py b/tests/unit/app/endpoints/test_query.py
@@ -11,11 +11,11 @@
 from fastapi import HTTPException, Request, status
 from litellm.exceptions import RateLimitError
 from llama_stack_client import APIConnectionError
-from llama_stack_client.types import UserMessage
-from llama_stack_client.types.agents.turn import Turn
+from llama_stack_client.types import UserMessage  # type: ignore
+from llama_stack_client.types.alpha.agents.turn import Turn
 from llama_stack_client.types.shared.interleaved_content_item import TextContentItem
-from llama_stack_client.types.tool_execution_step import ToolExecutionStep
-from llama_stack_client.types.tool_response import ToolResponse
+from llama_stack_client.types.alpha.tool_execution_step import ToolExecutionStep
+from llama_stack_client.types.alpha.tool_response import ToolResponse
 from pydantic import AnyUrl
 from pytest_mock import MockerFixture
 
@@ -1935,9 +1935,9 @@ async def test_get_topic_summary_successful_response(mocker: MockerFixture) -> N
     # Mock the agent's create_turn method
     mock_agent.create_turn.return_value = mock_response
 
-    # Mock the interleaved_content_as_str function
+    # Mock the content_to_str function
     mocker.patch(
-        "app.endpoints.query.interleaved_content_as_str",
+        "app.endpoints.query.content_to_str",
         return_value="This is a topic summary about OpenStack",
     )
 
@@ -2068,9 +2068,9 @@ async def test_get_topic_summary_with_interleaved_content(
     # Mock the agent's create_turn method
     mock_agent.create_turn.return_value = mock_response
 
-    # Mock the interleaved_content_as_str function
-    mock_interleaved_content_as_str = mocker.patch(
-        "app.endpoints.query.interleaved_content_as_str", return_value="Topic summary"
+    # Mock the content_to_str function
+    mock_content_to_str = mocker.patch(
+        "app.endpoints.query.content_to_str", return_value="Topic summary"
     )
 
     # Mock the get_topic_summary_system_prompt function
@@ -2091,8 +2091,8 @@ async def test_get_topic_summary_with_interleaved_content(
     # Assertions
     assert result == "Topic summary"
 
-    # Verify interleaved_content_as_str was called with the content
-    mock_interleaved_content_as_str.assert_called_once_with(mock_content)
+    # Verify content_to_str was called with the content
+    mock_content_to_str.assert_called_once_with(mock_content)
 
 
 @pytest.mark.asyncio
@@ -2113,10 +2113,8 @@ async def test_get_topic_summary_system_prompt_retrieval(mocker: MockerFixture)
     # Mock the agent's create_turn method
     mock_agent.create_turn.return_value = mock_response
 
-    # Mock the interleaved_content_as_str function
-    mocker.patch(
-        "app.endpoints.query.interleaved_content_as_str", return_value="Topic summary"
-    )
+    # Mock the content_to_str function
+    mocker.patch("app.endpoints.query.content_to_str", return_value="Topic summary")
 
     # Mock the get_topic_summary_system_prompt function
     mock_get_topic_summary_system_prompt = mocker.patch(
@@ -2189,10 +2187,8 @@ async def test_get_topic_summary_agent_creation_parameters(
     # Mock the agent's create_turn method
     mock_agent.create_turn.return_value = mock_response
 
-    # Mock the interleaved_content_as_str function
-    mocker.patch(
-        "app.endpoints.query.interleaved_content_as_str", return_value="Topic summary"
-    )
+    # Mock the content_to_str function
+    mocker.patch("app.endpoints.query.content_to_str", return_value="Topic summary")
 
     # Mock the get_topic_summary_system_prompt function
     mocker.patch(
@@ -2236,10 +2232,8 @@ async def test_get_topic_summary_create_turn_parameters(mocker: MockerFixture) -
     # Mock the agent's create_turn method
     mock_agent.create_turn.return_value = mock_response
 
-    # Mock the interleaved_content_as_str function
-    mocker.patch(
-        "app.endpoints.query.interleaved_content_as_str", return_value="Topic summary"
-    )
+    # Mock the content_to_str function
+    mocker.patch("app.endpoints.query.content_to_str", return_value="Topic summary")
 
     # Mock the get_topic_summary_system_prompt function
     mocker.patch(
diff --git a/tests/unit/app/endpoints/test_streaming_query.py b/tests/unit/app/endpoints/test_streaming_query.py
@@ -10,27 +10,27 @@
 from litellm.exceptions import RateLimitError
 from llama_stack_client import APIConnectionError
 from llama_stack_client.types import UserMessage  # type: ignore
-from llama_stack_client.types.agents import Turn
-from llama_stack_client.types.agents.agent_turn_response_stream_chunk import (
+from llama_stack_client.types.alpha.agents.turn import Turn
+from llama_stack_client.types.shared.completion_message import CompletionMessage
+from llama_stack_client.types.shared.interleaved_content_item import TextContentItem
+from llama_stack_client.types.shared.safety_violation import SafetyViolation
+from llama_stack_client.types.alpha.shield_call_step import ShieldCallStep
+from llama_stack_client.types.shared.tool_call import ToolCall
+from llama_stack_client.types.shared.content_delta import TextDelta, ToolCallDelta
+from llama_stack_client.types.alpha.agents.turn_response_event import TurnResponseEvent
+from llama_stack_client.types.alpha.agents.agent_turn_response_stream_chunk import (
     AgentTurnResponseStreamChunk,
 )
-from llama_stack_client.types.agents.turn_response_event import TurnResponseEvent
-from llama_stack_client.types.agents.turn_response_event_payload import (
+from llama_stack_client.types.alpha.agents.turn_response_event_payload import (
     AgentTurnResponseStepCompletePayload,
     AgentTurnResponseStepProgressPayload,
     AgentTurnResponseTurnAwaitingInputPayload,
     AgentTurnResponseTurnCompletePayload,
     AgentTurnResponseTurnStartPayload,
 )
-from llama_stack_client.types.shared.completion_message import CompletionMessage
-from llama_stack_client.types.shared.content_delta import TextDelta, ToolCallDelta
-from llama_stack_client.types.shared.interleaved_content_item import TextContentItem
-from llama_stack_client.types.shared.safety_violation import SafetyViolation
-from llama_stack_client.types.shared.tool_call import ToolCall
-from llama_stack_client.types.shield_call_step import ShieldCallStep
-from llama_stack_client.types.tool_execution_step import ToolExecutionStep
-from llama_stack_client.types.tool_response import ToolResponse
 from pytest_mock import MockerFixture
+from llama_stack_client.types.alpha.tool_execution_step import ToolExecutionStep
+from llama_stack_client.types.alpha.tool_response import ToolResponse
 
 from app.endpoints.query import get_rag_toolgroups
 from app.endpoints.streaming_query import (