From 3cfb68cb72a7227d59bf88f00b81c11cf05ca2b1 Mon Sep 17 00:00:00 2001 From: Anik Bhattacharjee Date: Mon, 18 May 2026 15:25:16 -0400 Subject: [PATCH] LCORE-1872: Launch Llama Stack container via Makefile orchestration Enables LCORE to automatically launch Llama Stack as a containerized service through Makefile orchestration. Running make run now handles all infrastructure setup: i) building the container image ii) stopping any existing instance iii) launching a fresh llama-stack container iv) waiting for started container's health check v) and finally starting the lightspeed-stack service. Signed-off-by: Anik Bhattacharjee --- Makefile | 93 ++++++++++++++++++++++++++++++++++++++++--- README.md | 27 +++++++------ lightspeed-stack.yaml | 14 +++---- 3 files changed, 109 insertions(+), 25 deletions(-) diff --git a/Makefile b/Makefile index 8e936c0cc..ad50242be 100644 --- a/Makefile +++ b/Makefile @@ -11,13 +11,96 @@ PYTHON_REGISTRY = pypi CONFIG ?= lightspeed-stack.yaml LLAMA_STACK_CONFIG ?= run.yaml -run: ## Run the service locally +# Container configuration +LLAMA_STACK_CONTAINER_NAME ?= lightspeed-llama-stack +LLAMA_STACK_IMAGE ?= lightspeed-llama-stack:local +LLAMA_STACK_PORT ?= 8321 +CONTAINER_RUNTIME ?= $(shell command -v podman 2>/dev/null || command -v docker 2>/dev/null) + +.PHONY: run build-llama-stack-image remove-llama-stack-container start-llama-stack-container wait-for-llama-stack-health clean-llama-stack + +run: start-llama-stack-container ## Run the service locally with llama-stack container + @echo "Starting Lightspeed Core Stack..." uv run src/lightspeed_stack.py -c $(CONFIG) -run-llama-stack: ## Start Llama Stack with enriched config (for local service mode) - uv run src/llama_stack_configuration.py -c $(CONFIG) -i $(LLAMA_STACK_CONFIG) -o $(LLAMA_STACK_CONFIG) && \ - AZURE_API_KEY=$$(grep '^AZURE_API_KEY=' .env | cut -d'=' -f2-) \ - uv run llama stack run $(LLAMA_STACK_CONFIG) +build-llama-stack-image: remove-llama-stack-container ## Build llama-stack container image + @echo "Building llama-stack container image..." + @if [ -z "$(CONTAINER_RUNTIME)" ]; then \ + echo "ERROR: No container runtime found. Install podman or docker."; \ + exit 1; \ + fi + $(CONTAINER_RUNTIME) build -f deploy/llama-stack/test.containerfile -t $(LLAMA_STACK_IMAGE) . + +remove-llama-stack-container: ## Remove existing llama-stack container + @if [ -n "$(CONTAINER_RUNTIME)" ] && $(CONTAINER_RUNTIME) inspect $(LLAMA_STACK_CONTAINER_NAME) >/dev/null 2>&1; then \ + echo "Removing existing llama-stack container..."; \ + $(CONTAINER_RUNTIME) rm -f $(LLAMA_STACK_CONTAINER_NAME); \ + fi + +start-llama-stack-container: build-llama-stack-image ## Start llama-stack container + @echo "Starting llama-stack container..." + $(CONTAINER_RUNTIME) run -d \ + --name $(LLAMA_STACK_CONTAINER_NAME) \ + -p $(LLAMA_STACK_PORT):8321 \ + --health-cmd "curl -f http://localhost:8321/v1/health || exit 1" \ + --health-interval 10s \ + --health-timeout 5s \ + --health-retries 3 \ + --health-start-period 15s \ + -v $(PWD)/$(LLAMA_STACK_CONFIG):/opt/app-root/run.yaml:ro,z \ + -v $(PWD)/$(CONFIG):/opt/app-root/lightspeed-stack.yaml:ro,z \ + -v $(PWD)/scripts/llama-stack-entrypoint.sh:/opt/app-root/enrich-entrypoint.sh:ro,z \ + -v $(PWD)/src/llama_stack_configuration.py:/opt/app-root/llama_stack_configuration.py:ro,z \ + -e OPENAI_API_KEY \ + -e EXTERNAL_PROVIDERS_DIR=$${EXTERNAL_PROVIDERS_DIR:-/opt/app-root/external_providers} \ + -e BRAVE_SEARCH_API_KEY \ + -e TAVILY_SEARCH_API_KEY \ + -e E2E_OPENAI_MODEL=$${E2E_OPENAI_MODEL:-gpt-4o-mini} \ + -e TENANT_ID=$${TENANT_ID:-} \ + -e CLIENT_ID=$${CLIENT_ID:-} \ + -e CLIENT_SECRET \ + -e RHAIIS_URL=$${RHAIIS_URL:-} \ + -e RHAIIS_PORT=$${RHAIIS_PORT:-} \ + -e RHAIIS_API_KEY \ + -e RHAIIS_MODEL=$${RHAIIS_MODEL:-} \ + -e RHEL_AI_URL=$${RHEL_AI_URL:-} \ + -e RHEL_AI_PORT=$${RHEL_AI_PORT:-} \ + -e RHEL_AI_API_KEY \ + -e RHEL_AI_MODEL=$${RHEL_AI_MODEL:-} \ + -e GOOGLE_APPLICATION_CREDENTIALS \ + -e VERTEX_AI_PROJECT=$${VERTEX_AI_PROJECT:-} \ + -e VERTEX_AI_LOCATION=$${VERTEX_AI_LOCATION:-} \ + -e WATSONX_BASE_URL=$${WATSONX_BASE_URL:-} \ + -e WATSONX_PROJECT_ID=$${WATSONX_PROJECT_ID:-} \ + -e WATSONX_API_KEY \ + -e LITELLM_DROP_PARAMS=true \ + -e AWS_BEARER_TOKEN_BEDROCK \ + -e LLAMA_STACK_LOGGING=$${LLAMA_STACK_LOGGING:-} \ + -e FAISS_VECTOR_STORE_ID=$${FAISS_VECTOR_STORE_ID:-} \ + $(LLAMA_STACK_IMAGE) + @$(MAKE) wait-for-llama-stack-health + +wait-for-llama-stack-health: ## Wait for llama-stack container to be healthy + @echo "Waiting for llama-stack container to be healthy..." + @for i in {1..30}; do \ + STATUS=$$($(CONTAINER_RUNTIME) inspect --format='{{.State.Health.Status}}' $(LLAMA_STACK_CONTAINER_NAME) 2>/dev/null || echo "no-healthcheck"); \ + if [ "$$STATUS" = "healthy" ]; then \ + echo "✓ Llama-stack is healthy and ready!"; \ + exit 0; \ + fi; \ + echo " Health status: $$STATUS (attempt $$i/30)"; \ + sleep 2; \ + done; \ + echo "✗ ERROR: Llama-stack did not become healthy within 60 seconds"; \ + echo "Container logs:"; \ + $(CONTAINER_RUNTIME) logs $(LLAMA_STACK_CONTAINER_NAME); \ + exit 1 + +clean-llama-stack: remove-llama-stack-container ## Remove container and image + @if [ -n "$(CONTAINER_RUNTIME)" ] && $(CONTAINER_RUNTIME) images -q $(LLAMA_STACK_IMAGE) | grep -q .; then \ + echo "Removing llama-stack image..."; \ + $(CONTAINER_RUNTIME) rmi $(LLAMA_STACK_IMAGE); \ + fi test-unit: ## Run the unit tests @echo "Running unit tests..." diff --git a/README.md b/README.md index ecf6aa7da..b1a909699 100644 --- a/README.md +++ b/README.md @@ -178,20 +178,22 @@ To quickly get hands on LCS, we can run it using the default configurations prov ```bash export OPENAI_API_KEY=sk-xxxxx ``` -3. start Llama stack server +3. start LCS server ```bash - uv run llama stack run local-run.yaml - ``` -4. [Optional] If you're new to Llama stack, run through a quick tutorial to learn the basics of what the server is used for, by running the interactive tutorial script - ```bash - ./scripts/llama_stack_tutorial.sh - ``` -5. check the LCS settings in [lightspeed-stack.yaml](lightspeed-stack.yaml). `llama_stack.url` should be `url: http://localhost:8321` -6. start LCS server - ``` make run - ``` -7. access LCS web UI at [http://localhost:8080/](http://localhost:8080/) + ``` +4. access LCS web UI at [http://localhost:8080/](http://localhost:8080/) + +**Note**: `make run` uses containerized llama-stack (service mode). To run llama-stack manually instead, see the [Llama Stack as separate server](#llama-stack-as-separate-server) section below. + +## Container Runtime Requirements + +The Makefile requires either Podman or Docker to launch the Llama Stack container: + +- **Podman** (recommended for RHEL/Fedora): `sudo dnf install podman` +- **Docker**: Install from [docker.com](https://docs.docker.com/get-docker/) + +The Makefile will auto-detect which runtime is available. # Configuration @@ -831,7 +833,6 @@ Usage: make ... Available targets are: run Run the service locally -run-llama-stack Start Llama Stack with enriched config (for local service mode) test-unit Run the unit tests test-integration Run integration tests tests test-e2e Run end to end tests for the service diff --git a/lightspeed-stack.yaml b/lightspeed-stack.yaml index 804939586..b87cd8bf4 100644 --- a/lightspeed-stack.yaml +++ b/lightspeed-stack.yaml @@ -7,15 +7,15 @@ service: workers: 1 color_log: true access_log: true +# llama_stack configuration +# When using 'make run', a container is ALWAYS launched at http://localhost:8321 (hardcoded in Makefile). +# This llama_stack section controls where lightspeed-core connects to llama-stack. +# To use a different port: override with 'make run LLAMA_STACK_PORT=' and update the url below, +# or run llama-stack manually and don't use 'make run'. llama_stack: - # Uses a remote llama-stack service - # The instance would have already been started with a llama-stack-run.yaml file use_as_library_client: false - # Alternative for "as library use" - # use_as_library_client: true - # library_client_config_path: - url: http://llama-stack:8321 - api_key: xyzzy + url: http://localhost:8321 + # api_key: custom-key # Uncomment if your llama-stack requires authentication user_data_collection: feedback_enabled: true feedback_storage: "/tmp/data/feedback"