Skip to content

Commit ad72ddd

Browse files
committed
LCORE-1872: Launch Llama Stack container via Makefile orchestration
Enables LCORE to automatically launch Llama Stack as a containerized service through Makefile orchestration. Running `make run` now handles all infrastructure setup: i) building the container image ii) stopping any existing instance iii) launching a fresh llama-stack container iv) waiting for started container's health check v) and finally starting the lightspeed-stack service. Signed-off-by: Anik Bhattacharjee <anbhatta@redhat.com>
1 parent 73637c6 commit ad72ddd

6 files changed

Lines changed: 153 additions & 414 deletions

File tree

Makefile

Lines changed: 88 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,96 @@ PYTHON_REGISTRY = pypi
1111
CONFIG ?= lightspeed-stack.yaml
1212
LLAMA_STACK_CONFIG ?= run.yaml
1313

14-
run: ## Run the service locally
14+
# Container configuration
15+
LLAMA_STACK_CONTAINER_NAME ?= lightspeed-llama-stack
16+
LLAMA_STACK_IMAGE ?= lightspeed-llama-stack:local
17+
LLAMA_STACK_PORT ?= 8321
18+
CONTAINER_RUNTIME ?= $(shell command -v podman 2>/dev/null || command -v docker 2>/dev/null)
19+
20+
run: ensure-llama-stack-container ## Run the service locally with llama-stack container
21+
@echo "Starting Lightspeed Core Stack..."
1522
uv run src/lightspeed_stack.py -c $(CONFIG)
1623

17-
run-llama-stack: ## Start Llama Stack with enriched config (for local service mode)
18-
uv run src/llama_stack_configuration.py -c $(CONFIG) -i $(LLAMA_STACK_CONFIG) -o $(LLAMA_STACK_CONFIG) && \
19-
AZURE_API_KEY=$$(grep '^AZURE_API_KEY=' .env | cut -d'=' -f2-) \
20-
uv run llama stack run $(LLAMA_STACK_CONFIG)
24+
ensure-llama-stack-container: stop-llama-stack-container build-llama-stack-image start-llama-stack-container
25+
26+
build-llama-stack-image: ## Build llama-stack container image
27+
@echo "Building llama-stack container image..."
28+
@if [ -z "$(CONTAINER_RUNTIME)" ]; then \
29+
echo "ERROR: No container runtime found. Install podman or docker."; \
30+
exit 1; \
31+
fi
32+
$(CONTAINER_RUNTIME) build -f deploy/llama-stack/test.containerfile -t $(LLAMA_STACK_IMAGE) .
33+
34+
stop-llama-stack-container: ## Stop and remove existing llama-stack container
35+
@if [ -n "$(CONTAINER_RUNTIME)" ] && $(CONTAINER_RUNTIME) ps -a --filter "name=$(LLAMA_STACK_CONTAINER_NAME)" --format "{{.Names}}" | grep -q $(LLAMA_STACK_CONTAINER_NAME); then \
36+
echo "Stopping existing llama-stack container..."; \
37+
$(CONTAINER_RUNTIME) rm -f $(LLAMA_STACK_CONTAINER_NAME); \
38+
fi
39+
40+
start-llama-stack-container: ## Start llama-stack container
41+
@echo "Starting llama-stack container..."
42+
$(CONTAINER_RUNTIME) run -d \
43+
--name $(LLAMA_STACK_CONTAINER_NAME) \
44+
-p $(LLAMA_STACK_PORT):8321 \
45+
--health-cmd "curl -f http://localhost:8321/v1/health || exit 1" \
46+
--health-interval 10s \
47+
--health-timeout 5s \
48+
--health-retries 3 \
49+
--health-start-period 15s \
50+
-v $(PWD)/run.yaml:/opt/app-root/run.yaml:ro,z \
51+
-v $(PWD)/lightspeed-stack.yaml:/opt/app-root/lightspeed-stack.yaml:ro,z \
52+
-v $(PWD)/scripts/llama-stack-entrypoint.sh:/opt/app-root/enrich-entrypoint.sh:ro,z \
53+
-v $(PWD)/src/llama_stack_configuration.py:/opt/app-root/llama_stack_configuration.py:ro,z \
54+
-e OPENAI_API_KEY=$${OPENAI_API_KEY} \
55+
-e EXTERNAL_PROVIDERS_DIR=$${EXTERNAL_PROVIDERS_DIR:-/opt/app-root/external_providers} \
56+
-e BRAVE_SEARCH_API_KEY=$${BRAVE_SEARCH_API_KEY:-} \
57+
-e TAVILY_SEARCH_API_KEY=$${TAVILY_SEARCH_API_KEY:-} \
58+
-e E2E_OPENAI_MODEL=$${E2E_OPENAI_MODEL:-gpt-4o-mini} \
59+
-e TENANT_ID=$${TENANT_ID:-} \
60+
-e CLIENT_ID=$${CLIENT_ID:-} \
61+
-e CLIENT_SECRET=$${CLIENT_SECRET:-} \
62+
-e RHAIIS_URL=$${RHAIIS_URL:-} \
63+
-e RHAIIS_PORT=$${RHAIIS_PORT:-} \
64+
-e RHAIIS_API_KEY=$${RHAIIS_API_KEY:-} \
65+
-e RHAIIS_MODEL=$${RHAIIS_MODEL:-} \
66+
-e RHEL_AI_URL=$${RHEL_AI_URL:-} \
67+
-e RHEL_AI_PORT=$${RHEL_AI_PORT:-} \
68+
-e RHEL_AI_API_KEY=$${RHEL_AI_API_KEY:-} \
69+
-e RHEL_AI_MODEL=$${RHEL_AI_MODEL:-} \
70+
-e GOOGLE_APPLICATION_CREDENTIALS=$${GOOGLE_APPLICATION_CREDENTIALS:-} \
71+
-e VERTEX_AI_PROJECT=$${VERTEX_AI_PROJECT:-} \
72+
-e VERTEX_AI_LOCATION=$${VERTEX_AI_LOCATION:-} \
73+
-e WATSONX_BASE_URL=$${WATSONX_BASE_URL:-} \
74+
-e WATSONX_PROJECT_ID=$${WATSONX_PROJECT_ID:-} \
75+
-e WATSONX_API_KEY=$${WATSONX_API_KEY:-} \
76+
-e LITELLM_DROP_PARAMS=true \
77+
-e AWS_BEARER_TOKEN_BEDROCK=$${AWS_BEARER_TOKEN_BEDROCK:-} \
78+
-e LLAMA_STACK_LOGGING=$${LLAMA_STACK_LOGGING:-} \
79+
-e FAISS_VECTOR_STORE_ID=$${FAISS_VECTOR_STORE_ID:-} \
80+
$(LLAMA_STACK_IMAGE)
81+
@$(MAKE) wait-for-llama-stack-health
82+
83+
wait-for-llama-stack-health: ## Wait for llama-stack container to be healthy
84+
@echo "Waiting for llama-stack container to be healthy..."
85+
@for i in {1..30}; do \
86+
STATUS=$$($(CONTAINER_RUNTIME) inspect --format='{{.State.Health.Status}}' $(LLAMA_STACK_CONTAINER_NAME) 2>/dev/null || echo "no-healthcheck"); \
87+
if [ "$$STATUS" = "healthy" ]; then \
88+
echo "✓ Llama-stack is healthy and ready!"; \
89+
exit 0; \
90+
fi; \
91+
echo " Health status: $$STATUS (attempt $$i/30)"; \
92+
sleep 2; \
93+
done; \
94+
echo "✗ ERROR: Llama-stack did not become healthy within 60 seconds"; \
95+
echo "Container logs:"; \
96+
$(CONTAINER_RUNTIME) logs --tail 50 $(LLAMA_STACK_CONTAINER_NAME); \
97+
exit 1
98+
99+
clean-llama-stack: stop-llama-stack-container ## Stop container and remove image
100+
@if [ -n "$(CONTAINER_RUNTIME)" ] && $(CONTAINER_RUNTIME) images -q $(LLAMA_STACK_IMAGE) | grep -q .; then \
101+
echo "Removing llama-stack image..."; \
102+
$(CONTAINER_RUNTIME) rmi $(LLAMA_STACK_IMAGE); \
103+
fi
21104

22105
test-unit: ## Run the unit tests
23106
@echo "Running unit tests..."

README.md

Lines changed: 12 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -178,21 +178,21 @@ To quickly get hands on LCS, we can run it using the default configurations prov
178178
```bash
179179
export OPENAI_API_KEY=sk-xxxxx
180180
```
181-
3. start Llama stack server
181+
3. start LCS server
182182
```bash
183-
uv run llama stack run local-run.yaml
184-
```
185-
4. [Optional] If you're new to Llama stack, run through a quick tutorial to learn the basics of what the server is used for, by running the interactive tutorial script
186-
```bash
187-
./scripts/llama_stack_tutorial.sh
188-
```
189-
5. check the LCS settings in [lightspeed-stack.yaml](lightspeed-stack.yaml). `llama_stack.url` should be `url: http://localhost:8321`
190-
6. start LCS server
191-
```
192183
make run
193-
```
194-
7. access LCS web UI at [http://localhost:8080/](http://localhost:8080/)
184+
```
185+
186+
4. access LCS web UI at [http://localhost:8080/](http://localhost:8080/)
187+
188+
## Container Runtime Requirements
189+
190+
The service requires either Podman or Docker to launch the Llama Stack container:
195191

192+
- **Podman** (recommended for RHEL/Fedora): `sudo dnf install podman`
193+
- **Docker**: Install from [docker.com](https://docs.docker.com/get-docker/)
194+
195+
The Makefile will auto-detect which runtime is available.
196196

197197
# Configuration
198198

@@ -289,43 +289,6 @@ These settings will be used when no provider or model are specified in REST API
289289
290290
For a comprehensive list of supported providers, take a look [here](docs/providers.md).
291291
292-
## Integration with Llama Stack
293-
294-
The Llama Stack can be run as a standalone server and accessed via its the REST
295-
API. However, instead of direct communication via the REST API (and JSON
296-
format), there is an even better alternative. It is based on the so-called
297-
Llama Stack Client. It is a library available for Python, Swift, Node.js or
298-
Kotlin, which "wraps" the REST API stack in a suitable way, which is easier for
299-
many applications.
300-
301-
302-
![Integration with Llama Stack](docs/core2llama-stack_interface.png)
303-
304-
305-
306-
## Llama Stack as separate server
307-
308-
If Llama Stack runs as a separate server, the Lightspeed service needs to be configured to be able to access it. For example, if server runs on localhost:8321, the service configuration stored in file `lightspeed-stack.yaml` should look like:
309-
310-
```yaml
311-
name: foo bar baz
312-
service:
313-
host: localhost
314-
port: 8080
315-
auth_enabled: false
316-
workers: 1
317-
color_log: true
318-
access_log: true
319-
llama_stack:
320-
use_as_library_client: false
321-
url: http://localhost:8321
322-
user_data_collection:
323-
feedback_enabled: true
324-
feedback_storage: "/tmp/data/feedback"
325-
transcripts_enabled: true
326-
transcripts_storage: "/tmp/data/transcripts"
327-
```
328-
329292
### MCP Server and Tool Configuration
330293
331294
**Note**: The `run.yaml` configuration is currently an implementation detail. In the future, all configuration will be available directly from the lightspeed-core config.

lightspeed-stack.yaml

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,12 @@ service:
77
workers: 1
88
color_log: true
99
access_log: true
10-
llama_stack:
11-
# Uses a remote llama-stack service
12-
# The instance would have already been started with a llama-stack-run.yaml file
13-
use_as_library_client: false
14-
# Alternative for "as library use"
15-
# use_as_library_client: true
16-
# library_client_config_path: <path-to-llama-stack-run.yaml-file>
17-
url: http://llama-stack:8321
18-
api_key: xyzzy
10+
# llama_stack section is optional - defaults to http://localhost:8321
11+
# The Makefile launches a container at this URL when you run 'make run'
12+
# Override if needed:
13+
# llama_stack:
14+
# url: http://custom-host:9321
15+
# api_key: custom-key
1916
user_data_collection:
2017
feedback_enabled: true
2118
feedback_storage: "/tmp/data/feedback"

0 commit comments

Comments
 (0)