Skip to content

Commit 546efbf

Browse files
committed
LCORE-1872: Launch Llama Stack container via Makefile orchestration
Enables LCORE to automatically launch Llama Stack as a containerized service through Makefile orchestration. Running make run now handles all infrastructure setup: i) building the container image ii) stopping any existing instance iii) launching a fresh llama-stack container iv) waiting for started container's health check v) and finally starting the lightspeed-stack service. Signed-off-by: Anik Bhattacharjee <anbhatta@redhat.com>
1 parent 73637c6 commit 546efbf

3 files changed

Lines changed: 109 additions & 24 deletions

File tree

Makefile

Lines changed: 90 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,98 @@ PYTHON_REGISTRY = pypi
1111
CONFIG ?= lightspeed-stack.yaml
1212
LLAMA_STACK_CONFIG ?= run.yaml
1313

14-
run: ## Run the service locally
14+
# Container configuration
15+
LLAMA_STACK_CONTAINER_NAME ?= lightspeed-llama-stack
16+
LLAMA_STACK_IMAGE ?= lightspeed-llama-stack:local
17+
LLAMA_STACK_PORT ?= 8321
18+
CONTAINER_RUNTIME ?= $(shell command -v podman 2>/dev/null || command -v docker 2>/dev/null)
19+
20+
.PHONY: run ensure-llama-stack-container build-llama-stack-image stop-llama-stack-container start-llama-stack-container wait-for-llama-stack-health clean-llama-stack
21+
22+
run: ensure-llama-stack-container ## Run the service locally with llama-stack container
23+
@echo "Starting Lightspeed Core Stack..."
1524
uv run src/lightspeed_stack.py -c $(CONFIG)
1625

17-
run-llama-stack: ## Start Llama Stack with enriched config (for local service mode)
18-
uv run src/llama_stack_configuration.py -c $(CONFIG) -i $(LLAMA_STACK_CONFIG) -o $(LLAMA_STACK_CONFIG) && \
19-
AZURE_API_KEY=$$(grep '^AZURE_API_KEY=' .env | cut -d'=' -f2-) \
20-
uv run llama stack run $(LLAMA_STACK_CONFIG)
26+
ensure-llama-stack-container: stop-llama-stack-container build-llama-stack-image start-llama-stack-container
27+
28+
build-llama-stack-image: ## Build llama-stack container image
29+
@echo "Building llama-stack container image..."
30+
@if [ -z "$(CONTAINER_RUNTIME)" ]; then \
31+
echo "ERROR: No container runtime found. Install podman or docker."; \
32+
exit 1; \
33+
fi
34+
$(CONTAINER_RUNTIME) build -f deploy/llama-stack/test.containerfile -t $(LLAMA_STACK_IMAGE) .
35+
36+
stop-llama-stack-container: ## Stop and remove existing llama-stack container
37+
@if [ -n "$(CONTAINER_RUNTIME)" ] && $(CONTAINER_RUNTIME) inspect $(LLAMA_STACK_CONTAINER_NAME) >/dev/null 2>&1; then \
38+
echo "Stopping existing llama-stack container..."; \
39+
$(CONTAINER_RUNTIME) rm -f $(LLAMA_STACK_CONTAINER_NAME); \
40+
fi
41+
42+
start-llama-stack-container: ## Start llama-stack container
43+
@echo "Starting llama-stack container..."
44+
$(CONTAINER_RUNTIME) run -d \
45+
--name $(LLAMA_STACK_CONTAINER_NAME) \
46+
-p $(LLAMA_STACK_PORT):8321 \
47+
--health-cmd "curl -f http://localhost:8321/v1/health || exit 1" \
48+
--health-interval 10s \
49+
--health-timeout 5s \
50+
--health-retries 3 \
51+
--health-start-period 15s \
52+
-v $(PWD)/$(LLAMA_STACK_CONFIG):/opt/app-root/run.yaml:ro,z \
53+
-v $(PWD)/$(CONFIG):/opt/app-root/lightspeed-stack.yaml:ro,z \
54+
-v $(PWD)/scripts/llama-stack-entrypoint.sh:/opt/app-root/enrich-entrypoint.sh:ro,z \
55+
-v $(PWD)/src/llama_stack_configuration.py:/opt/app-root/llama_stack_configuration.py:ro,z \
56+
-e OPENAI_API_KEY \
57+
-e EXTERNAL_PROVIDERS_DIR=$${EXTERNAL_PROVIDERS_DIR:-/opt/app-root/external_providers} \
58+
-e BRAVE_SEARCH_API_KEY \
59+
-e TAVILY_SEARCH_API_KEY \
60+
-e E2E_OPENAI_MODEL=$${E2E_OPENAI_MODEL:-gpt-4o-mini} \
61+
-e TENANT_ID=$${TENANT_ID:-} \
62+
-e CLIENT_ID=$${CLIENT_ID:-} \
63+
-e CLIENT_SECRET \
64+
-e RHAIIS_URL=$${RHAIIS_URL:-} \
65+
-e RHAIIS_PORT=$${RHAIIS_PORT:-} \
66+
-e RHAIIS_API_KEY \
67+
-e RHAIIS_MODEL=$${RHAIIS_MODEL:-} \
68+
-e RHEL_AI_URL=$${RHEL_AI_URL:-} \
69+
-e RHEL_AI_PORT=$${RHEL_AI_PORT:-} \
70+
-e RHEL_AI_API_KEY \
71+
-e RHEL_AI_MODEL=$${RHEL_AI_MODEL:-} \
72+
-e GOOGLE_APPLICATION_CREDENTIALS \
73+
-e VERTEX_AI_PROJECT=$${VERTEX_AI_PROJECT:-} \
74+
-e VERTEX_AI_LOCATION=$${VERTEX_AI_LOCATION:-} \
75+
-e WATSONX_BASE_URL=$${WATSONX_BASE_URL:-} \
76+
-e WATSONX_PROJECT_ID=$${WATSONX_PROJECT_ID:-} \
77+
-e WATSONX_API_KEY \
78+
-e LITELLM_DROP_PARAMS=true \
79+
-e AWS_BEARER_TOKEN_BEDROCK \
80+
-e LLAMA_STACK_LOGGING=$${LLAMA_STACK_LOGGING:-} \
81+
-e FAISS_VECTOR_STORE_ID=$${FAISS_VECTOR_STORE_ID:-} \
82+
$(LLAMA_STACK_IMAGE)
83+
@$(MAKE) wait-for-llama-stack-health
84+
85+
wait-for-llama-stack-health: ## Wait for llama-stack container to be healthy
86+
@echo "Waiting for llama-stack container to be healthy..."
87+
@for i in {1..30}; do \
88+
STATUS=$$($(CONTAINER_RUNTIME) inspect --format='{{.State.Health.Status}}' $(LLAMA_STACK_CONTAINER_NAME) 2>/dev/null || echo "no-healthcheck"); \
89+
if [ "$$STATUS" = "healthy" ]; then \
90+
echo "✓ Llama-stack is healthy and ready!"; \
91+
exit 0; \
92+
fi; \
93+
echo " Health status: $$STATUS (attempt $$i/30)"; \
94+
sleep 2; \
95+
done; \
96+
echo "✗ ERROR: Llama-stack did not become healthy within 60 seconds"; \
97+
echo "Container logs:"; \
98+
$(CONTAINER_RUNTIME) logs --tail 50 $(LLAMA_STACK_CONTAINER_NAME); \
99+
exit 1
100+
101+
clean-llama-stack: stop-llama-stack-container ## Stop container and remove image
102+
@if [ -n "$(CONTAINER_RUNTIME)" ] && $(CONTAINER_RUNTIME) images -q $(LLAMA_STACK_IMAGE) | grep -q .; then \
103+
echo "Removing llama-stack image..."; \
104+
$(CONTAINER_RUNTIME) rmi $(LLAMA_STACK_IMAGE); \
105+
fi
21106

22107
test-unit: ## Run the unit tests
23108
@echo "Running unit tests..."

README.md

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -178,20 +178,22 @@ To quickly get hands on LCS, we can run it using the default configurations prov
178178
```bash
179179
export OPENAI_API_KEY=sk-xxxxx
180180
```
181-
3. start Llama stack server
181+
3. start LCS server
182182
```bash
183-
uv run llama stack run local-run.yaml
184-
```
185-
4. [Optional] If you're new to Llama stack, run through a quick tutorial to learn the basics of what the server is used for, by running the interactive tutorial script
186-
```bash
187-
./scripts/llama_stack_tutorial.sh
188-
```
189-
5. check the LCS settings in [lightspeed-stack.yaml](lightspeed-stack.yaml). `llama_stack.url` should be `url: http://localhost:8321`
190-
6. start LCS server
191-
```
192183
make run
193-
```
194-
7. access LCS web UI at [http://localhost:8080/](http://localhost:8080/)
184+
```
185+
4. access LCS web UI at [http://localhost:8080/](http://localhost:8080/)
186+
187+
**Note**: `make run` uses containerized llama-stack (service mode). To run llama-stack manually instead, see the [Llama Stack as separate server](#llama-stack-as-separate-server) section below.
188+
189+
## Container Runtime Requirements
190+
191+
The Makefile requires either Podman or Docker to launch the Llama Stack container:
192+
193+
- **Podman** (recommended for RHEL/Fedora): `sudo dnf install podman`
194+
- **Docker**: Install from [docker.com](https://docs.docker.com/get-docker/)
195+
196+
The Makefile will auto-detect which runtime is available.
195197

196198

197199
# Configuration

lightspeed-stack.yaml

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,13 @@ service:
77
workers: 1
88
color_log: true
99
access_log: true
10+
# llama_stack configuration
11+
# When using 'make run', a container is launched automatically at http://localhost:8321
12+
# and this configuration is ignored.
1013
llama_stack:
11-
# Uses a remote llama-stack service
12-
# The instance would have already been started with a llama-stack-run.yaml file
1314
use_as_library_client: false
14-
# Alternative for "as library use"
15-
# use_as_library_client: true
16-
# library_client_config_path: <path-to-llama-stack-run.yaml-file>
17-
url: http://llama-stack:8321
18-
api_key: xyzzy
15+
url: http://localhost:8321
16+
# api_key: custom-key # Uncomment if your llama-stack requires authentication
1917
user_data_collection:
2018
feedback_enabled: true
2119
feedback_storage: "/tmp/data/feedback"

0 commit comments

Comments
 (0)