Skip to content

Commit 3cfb68c

Browse files
committed
LCORE-1872: Launch Llama Stack container via Makefile orchestration
Enables LCORE to automatically launch Llama Stack as a containerized service through Makefile orchestration. Running make run now handles all infrastructure setup: i) building the container image ii) stopping any existing instance iii) launching a fresh llama-stack container iv) waiting for started container's health check v) and finally starting the lightspeed-stack service. Signed-off-by: Anik Bhattacharjee <anbhatta@redhat.com>
1 parent 73637c6 commit 3cfb68c

3 files changed

Lines changed: 109 additions & 25 deletions

File tree

Makefile

Lines changed: 88 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,96 @@ PYTHON_REGISTRY = pypi
1111
CONFIG ?= lightspeed-stack.yaml
1212
LLAMA_STACK_CONFIG ?= run.yaml
1313

14-
run: ## Run the service locally
14+
# Container configuration
15+
LLAMA_STACK_CONTAINER_NAME ?= lightspeed-llama-stack
16+
LLAMA_STACK_IMAGE ?= lightspeed-llama-stack:local
17+
LLAMA_STACK_PORT ?= 8321
18+
CONTAINER_RUNTIME ?= $(shell command -v podman 2>/dev/null || command -v docker 2>/dev/null)
19+
20+
.PHONY: run build-llama-stack-image remove-llama-stack-container start-llama-stack-container wait-for-llama-stack-health clean-llama-stack
21+
22+
run: start-llama-stack-container ## Run the service locally with llama-stack container
23+
@echo "Starting Lightspeed Core Stack..."
1524
uv run src/lightspeed_stack.py -c $(CONFIG)
1625

17-
run-llama-stack: ## Start Llama Stack with enriched config (for local service mode)
18-
uv run src/llama_stack_configuration.py -c $(CONFIG) -i $(LLAMA_STACK_CONFIG) -o $(LLAMA_STACK_CONFIG) && \
19-
AZURE_API_KEY=$$(grep '^AZURE_API_KEY=' .env | cut -d'=' -f2-) \
20-
uv run llama stack run $(LLAMA_STACK_CONFIG)
26+
build-llama-stack-image: remove-llama-stack-container ## Build llama-stack container image
27+
@echo "Building llama-stack container image..."
28+
@if [ -z "$(CONTAINER_RUNTIME)" ]; then \
29+
echo "ERROR: No container runtime found. Install podman or docker."; \
30+
exit 1; \
31+
fi
32+
$(CONTAINER_RUNTIME) build -f deploy/llama-stack/test.containerfile -t $(LLAMA_STACK_IMAGE) .
33+
34+
remove-llama-stack-container: ## Remove existing llama-stack container
35+
@if [ -n "$(CONTAINER_RUNTIME)" ] && $(CONTAINER_RUNTIME) inspect $(LLAMA_STACK_CONTAINER_NAME) >/dev/null 2>&1; then \
36+
echo "Removing existing llama-stack container..."; \
37+
$(CONTAINER_RUNTIME) rm -f $(LLAMA_STACK_CONTAINER_NAME); \
38+
fi
39+
40+
start-llama-stack-container: build-llama-stack-image ## Start llama-stack container
41+
@echo "Starting llama-stack container..."
42+
$(CONTAINER_RUNTIME) run -d \
43+
--name $(LLAMA_STACK_CONTAINER_NAME) \
44+
-p $(LLAMA_STACK_PORT):8321 \
45+
--health-cmd "curl -f http://localhost:8321/v1/health || exit 1" \
46+
--health-interval 10s \
47+
--health-timeout 5s \
48+
--health-retries 3 \
49+
--health-start-period 15s \
50+
-v $(PWD)/$(LLAMA_STACK_CONFIG):/opt/app-root/run.yaml:ro,z \
51+
-v $(PWD)/$(CONFIG):/opt/app-root/lightspeed-stack.yaml:ro,z \
52+
-v $(PWD)/scripts/llama-stack-entrypoint.sh:/opt/app-root/enrich-entrypoint.sh:ro,z \
53+
-v $(PWD)/src/llama_stack_configuration.py:/opt/app-root/llama_stack_configuration.py:ro,z \
54+
-e OPENAI_API_KEY \
55+
-e EXTERNAL_PROVIDERS_DIR=$${EXTERNAL_PROVIDERS_DIR:-/opt/app-root/external_providers} \
56+
-e BRAVE_SEARCH_API_KEY \
57+
-e TAVILY_SEARCH_API_KEY \
58+
-e E2E_OPENAI_MODEL=$${E2E_OPENAI_MODEL:-gpt-4o-mini} \
59+
-e TENANT_ID=$${TENANT_ID:-} \
60+
-e CLIENT_ID=$${CLIENT_ID:-} \
61+
-e CLIENT_SECRET \
62+
-e RHAIIS_URL=$${RHAIIS_URL:-} \
63+
-e RHAIIS_PORT=$${RHAIIS_PORT:-} \
64+
-e RHAIIS_API_KEY \
65+
-e RHAIIS_MODEL=$${RHAIIS_MODEL:-} \
66+
-e RHEL_AI_URL=$${RHEL_AI_URL:-} \
67+
-e RHEL_AI_PORT=$${RHEL_AI_PORT:-} \
68+
-e RHEL_AI_API_KEY \
69+
-e RHEL_AI_MODEL=$${RHEL_AI_MODEL:-} \
70+
-e GOOGLE_APPLICATION_CREDENTIALS \
71+
-e VERTEX_AI_PROJECT=$${VERTEX_AI_PROJECT:-} \
72+
-e VERTEX_AI_LOCATION=$${VERTEX_AI_LOCATION:-} \
73+
-e WATSONX_BASE_URL=$${WATSONX_BASE_URL:-} \
74+
-e WATSONX_PROJECT_ID=$${WATSONX_PROJECT_ID:-} \
75+
-e WATSONX_API_KEY \
76+
-e LITELLM_DROP_PARAMS=true \
77+
-e AWS_BEARER_TOKEN_BEDROCK \
78+
-e LLAMA_STACK_LOGGING=$${LLAMA_STACK_LOGGING:-} \
79+
-e FAISS_VECTOR_STORE_ID=$${FAISS_VECTOR_STORE_ID:-} \
80+
$(LLAMA_STACK_IMAGE)
81+
@$(MAKE) wait-for-llama-stack-health
82+
83+
wait-for-llama-stack-health: ## Wait for llama-stack container to be healthy
84+
@echo "Waiting for llama-stack container to be healthy..."
85+
@for i in {1..30}; do \
86+
STATUS=$$($(CONTAINER_RUNTIME) inspect --format='{{.State.Health.Status}}' $(LLAMA_STACK_CONTAINER_NAME) 2>/dev/null || echo "no-healthcheck"); \
87+
if [ "$$STATUS" = "healthy" ]; then \
88+
echo "✓ Llama-stack is healthy and ready!"; \
89+
exit 0; \
90+
fi; \
91+
echo " Health status: $$STATUS (attempt $$i/30)"; \
92+
sleep 2; \
93+
done; \
94+
echo "✗ ERROR: Llama-stack did not become healthy within 60 seconds"; \
95+
echo "Container logs:"; \
96+
$(CONTAINER_RUNTIME) logs $(LLAMA_STACK_CONTAINER_NAME); \
97+
exit 1
98+
99+
clean-llama-stack: remove-llama-stack-container ## Remove container and image
100+
@if [ -n "$(CONTAINER_RUNTIME)" ] && $(CONTAINER_RUNTIME) images -q $(LLAMA_STACK_IMAGE) | grep -q .; then \
101+
echo "Removing llama-stack image..."; \
102+
$(CONTAINER_RUNTIME) rmi $(LLAMA_STACK_IMAGE); \
103+
fi
21104

22105
test-unit: ## Run the unit tests
23106
@echo "Running unit tests..."

README.md

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -178,20 +178,22 @@ To quickly get hands on LCS, we can run it using the default configurations prov
178178
```bash
179179
export OPENAI_API_KEY=sk-xxxxx
180180
```
181-
3. start Llama stack server
181+
3. start LCS server
182182
```bash
183-
uv run llama stack run local-run.yaml
184-
```
185-
4. [Optional] If you're new to Llama stack, run through a quick tutorial to learn the basics of what the server is used for, by running the interactive tutorial script
186-
```bash
187-
./scripts/llama_stack_tutorial.sh
188-
```
189-
5. check the LCS settings in [lightspeed-stack.yaml](lightspeed-stack.yaml). `llama_stack.url` should be `url: http://localhost:8321`
190-
6. start LCS server
191-
```
192183
make run
193-
```
194-
7. access LCS web UI at [http://localhost:8080/](http://localhost:8080/)
184+
```
185+
4. access LCS web UI at [http://localhost:8080/](http://localhost:8080/)
186+
187+
**Note**: `make run` uses containerized llama-stack (service mode). To run llama-stack manually instead, see the [Llama Stack as separate server](#llama-stack-as-separate-server) section below.
188+
189+
## Container Runtime Requirements
190+
191+
The Makefile requires either Podman or Docker to launch the Llama Stack container:
192+
193+
- **Podman** (recommended for RHEL/Fedora): `sudo dnf install podman`
194+
- **Docker**: Install from [docker.com](https://docs.docker.com/get-docker/)
195+
196+
The Makefile will auto-detect which runtime is available.
195197

196198

197199
# Configuration
@@ -831,7 +833,6 @@ Usage: make <OPTIONS> ... <TARGETS>
831833
Available targets are:
832834
833835
run Run the service locally
834-
run-llama-stack Start Llama Stack with enriched config (for local service mode)
835836
test-unit Run the unit tests
836837
test-integration Run integration tests tests
837838
test-e2e Run end to end tests for the service

lightspeed-stack.yaml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,15 @@ service:
77
workers: 1
88
color_log: true
99
access_log: true
10+
# llama_stack configuration
11+
# When using 'make run', a container is ALWAYS launched at http://localhost:8321 (hardcoded in Makefile).
12+
# This llama_stack section controls where lightspeed-core connects to llama-stack.
13+
# To use a different port: override with 'make run LLAMA_STACK_PORT=<port>' and update the url below,
14+
# or run llama-stack manually and don't use 'make run'.
1015
llama_stack:
11-
# Uses a remote llama-stack service
12-
# The instance would have already been started with a llama-stack-run.yaml file
1316
use_as_library_client: false
14-
# Alternative for "as library use"
15-
# use_as_library_client: true
16-
# library_client_config_path: <path-to-llama-stack-run.yaml-file>
17-
url: http://llama-stack:8321
18-
api_key: xyzzy
17+
url: http://localhost:8321
18+
# api_key: custom-key # Uncomment if your llama-stack requires authentication
1919
user_data_collection:
2020
feedback_enabled: true
2121
feedback_storage: "/tmp/data/feedback"

0 commit comments

Comments
 (0)