Skip to content

Commit 5149638

Browse files
committed
Bump up to llama-stack 0.4.2
- updated lls to 0.4.2 - removed unused methods from deprecated endpoints - updated /models endpoint parsing of lls models - added huggingface folder creation in container - did some refactoring to satisfy the stricter linting/mypy checks - updated run.yaml
1 parent 6de3909 commit 5149638

39 files changed

Lines changed: 354 additions & 5155 deletions

.github/workflows/e2e_tests.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ jobs:
110110
grep -A 3 "llama_stack:" lightspeed-stack.yaml
111111
112112
- name: Docker Login for quay access
113+
if: matrix.mode == 'server'
113114
env:
114115
QUAY_ROBOT_USERNAME: ${{ secrets.QUAY_DOWNSTREAM_USERNAME }}
115116
QUAY_ROBOT_TOKEN: ${{ secrets.QUAY_DOWNSTREAM_TOKEN }}

Containerfile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,10 @@ RUN microdnf install -y --nodocs --setopt=keepcache=0 --setopt=tsflags=nodocs jq
8585
RUN mkdir -p /opt/app-root/src/.llama/storage /opt/app-root/src/.llama/providers.d && \
8686
chown -R 1001:1001 /opt/app-root/src/.llama
8787

88+
# Create Hugging Face cache directory for embedding models
89+
RUN mkdir -p /opt/app-root/src/.cache/huggingface && \
90+
chown -R 1001:1001 /opt/app-root/src/.cache
91+
8892
# Add executables from .venv to system PATH
8993
ENV PATH="/app-root/.venv/bin:$PATH"
9094

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ dependencies = [
2828
# Used by authentication/k8s integration
2929
"kubernetes>=30.1.0",
3030
# Used to call Llama Stack APIs
31-
"llama-stack==0.3.5",
32-
"llama-stack-client==0.3.5",
31+
"llama-stack==0.4.2",
32+
"llama-stack-client==0.4.2",
3333
# Used by Logger
3434
"rich>=14.0.0",
3535
# Used by JWK token auth handler

run.yaml

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ apis:
99
- inference
1010
- safety
1111
- scoring
12-
- telemetry
1312
- tool_runtime
1413
- vector_io
1514

@@ -137,11 +136,7 @@ storage:
137136
namespace: prompts
138137
backend: kv_default
139138
registered_resources:
140-
models:
141-
- model_id: gpt-4o-mini
142-
provider_id: openai
143-
model_type: llm
144-
provider_model_id: gpt-4o-mini
139+
models: []
145140
shields:
146141
- shield_id: llama-guard
147142
provider_id: llama-guard
@@ -160,5 +155,3 @@ vector_stores:
160155
model_id: nomic-ai/nomic-embed-text-v1.5
161156
safety:
162157
default_shield_id: llama-guard
163-
telemetry:
164-
enabled: true

src/app/endpoints/a2a.py

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,17 @@
77
from datetime import datetime, timezone
88
from typing import Annotated, Any, AsyncIterator, MutableMapping, Optional
99

10-
from fastapi import APIRouter, Depends, HTTPException, Request, status
11-
from llama_stack.apis.agents.openai_responses import (
12-
OpenAIResponseObjectStream,
13-
)
14-
from llama_stack_client import APIConnectionError
15-
from starlette.responses import Response, StreamingResponse
16-
10+
from a2a.server.agent_execution import AgentExecutor, RequestContext
11+
from a2a.server.apps import A2AStarletteApplication
12+
from a2a.server.events import EventQueue
13+
from a2a.server.request_handlers import DefaultRequestHandler
14+
from a2a.server.tasks import TaskStore
15+
from a2a.server.tasks.task_updater import TaskUpdater
1716
from a2a.types import (
17+
AgentCapabilities,
1818
AgentCard,
19-
AgentSkill,
2019
AgentProvider,
21-
AgentCapabilities,
20+
AgentSkill,
2221
Artifact,
2322
Message,
2423
Part,
@@ -28,27 +27,27 @@
2827
TaskStatusUpdateEvent,
2928
TextPart,
3029
)
31-
from a2a.server.agent_execution import AgentExecutor, RequestContext
32-
from a2a.server.events import EventQueue
33-
from a2a.server.request_handlers import DefaultRequestHandler
34-
from a2a.server.tasks import TaskStore
35-
from a2a.server.tasks.task_updater import TaskUpdater
36-
from a2a.server.apps import A2AStarletteApplication
3730
from a2a.utils import new_agent_text_message, new_task
31+
from fastapi import APIRouter, Depends, HTTPException, Request, status
32+
from llama_stack_api.openai_responses import (
33+
OpenAIResponseObjectStream,
34+
)
35+
from llama_stack_client import APIConnectionError
36+
from starlette.responses import Response, StreamingResponse
3837

39-
from authentication.interface import AuthTuple
40-
from authentication import get_auth_dependency
41-
from authorization.middleware import authorize
42-
from configuration import configuration
43-
from a2a_storage import A2AStorageFactory, A2AContextStore
44-
from models.config import Action
45-
from models.requests import QueryRequest
38+
from a2a_storage import A2AContextStore, A2AStorageFactory
4639
from app.endpoints.query import (
47-
select_model_and_provider_id,
4840
evaluate_model_hints,
41+
select_model_and_provider_id,
4942
)
5043
from app.endpoints.streaming_query_v2 import retrieve_response
44+
from authentication import get_auth_dependency
45+
from authentication.interface import AuthTuple
46+
from authorization.middleware import authorize
5147
from client import AsyncLlamaStackClientHolder
48+
from configuration import configuration
49+
from models.config import Action
50+
from models.requests import QueryRequest
5251
from utils.mcp_headers import mcp_headers_dependency
5352
from utils.responses import extract_text_from_response_output_item
5453
from version import __version__

src/app/endpoints/conversations_v3.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from llama_stack_client import (
88
APIConnectionError,
99
APIStatusError,
10-
NOT_GIVEN,
1110
)
1211
from sqlalchemy.exc import SQLAlchemyError
1312

@@ -332,10 +331,10 @@ async def get_conversation_endpoint_handler(
332331
# Use Conversations API to retrieve conversation items
333332
conversation_items_response = await client.conversations.items.list(
334333
conversation_id=llama_stack_conv_id,
335-
after=NOT_GIVEN,
336-
include=NOT_GIVEN,
337-
limit=NOT_GIVEN,
338-
order=NOT_GIVEN,
334+
after=None,
335+
include=None,
336+
limit=None,
337+
order=None,
339338
)
340339
items = (
341340
conversation_items_response.data

src/app/endpoints/health.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@
66
"""
77

88
import logging
9+
from enum import Enum
910
from typing import Annotated, Any
1011

1112
from fastapi import APIRouter, Depends, Response, status
12-
from llama_stack.providers.datatypes import HealthStatus
1313
from llama_stack_client import APIConnectionError
1414

1515
from authentication import get_auth_dependency
@@ -30,6 +30,18 @@
3030
router = APIRouter(tags=["health"])
3131

3232

33+
# HealthStatus enum was removed from llama_stack in newer versions
34+
# Defining locally for compatibility
35+
class HealthStatus(str, Enum):
36+
"""Health status enum for provider health checks."""
37+
38+
OK = "ok"
39+
ERROR = "Error"
40+
NOT_IMPLEMENTED = "not_implemented"
41+
HEALTHY = "healthy"
42+
UNKNOWN = "unknown"
43+
44+
3345
get_readiness_responses: dict[int | str, dict[str, Any]] = {
3446
200: ReadinessResponse.openapi_response(),
3547
401: UnauthorizedResponse.openapi_response(

src/app/endpoints/models.py

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,41 @@
2626
router = APIRouter(tags=["models"])
2727

2828

29+
def parse_llama_stack_model(model: Any) -> dict[str, Any]:
30+
"""
31+
Parse llama-stack model.
32+
33+
Converting the new llama-stack model format (0.4.x) with custom_metadata.
34+
35+
Args:
36+
model: Model object from llama-stack (has id, custom_metadata, object fields)
37+
38+
Returns:
39+
dict: Model in legacy format with identifier, provider_id, model_type, etc.
40+
"""
41+
custom_metadata = getattr(model, "custom_metadata", {}) or {}
42+
43+
model_type = str(custom_metadata.get("model_type", "unknown"))
44+
45+
metadata = {
46+
k: v
47+
for k, v in custom_metadata.items()
48+
if k not in ("provider_id", "provider_resource_id", "model_type")
49+
}
50+
51+
legacy_model = {
52+
"identifier": getattr(model, "id", ""),
53+
"metadata": metadata,
54+
"api_model_type": model_type,
55+
"provider_id": str(custom_metadata.get("provider_id", "")),
56+
"type": getattr(model, "object", "model"),
57+
"provider_resource_id": str(custom_metadata.get("provider_resource_id", "")),
58+
"model_type": model_type,
59+
}
60+
61+
return legacy_model
62+
63+
2964
models_responses: dict[int | str, dict[str, Any]] = {
3065
200: ModelsResponse.openapi_response(),
3166
401: UnauthorizedResponse.openapi_response(
@@ -72,8 +107,9 @@ async def models_endpoint_handler(
72107
client = AsyncLlamaStackClientHolder().get_client()
73108
# retrieve models
74109
models = await client.models.list()
75-
m = [dict(m) for m in models]
76-
return ModelsResponse(models=m)
110+
# Parse models to legacy format
111+
parsed_models = [parse_llama_stack_model(model) for model in models]
112+
return ModelsResponse(models=parsed_models)
77113

78114
# Connection to Llama Stack server failed
79115
except APIConnectionError as e:

0 commit comments

Comments
 (0)