LCORE-1872: Launch Llama Stack container via Makefile orchestration

anik120 · anik120 · commit 3cfb68cb72a7 · 2026-05-19T13:58:22.000-04:00
Enables LCORE to automatically launch Llama Stack as a containerized service
through Makefile orchestration. Running make run now handles all infrastructure setup:
i) building the container image
ii) stopping any existing instance
iii) launching a fresh llama-stack container
iv) waiting for started container's health check
v) and finally starting the lightspeed-stack service.

Signed-off-by: Anik Bhattacharjee &lt;anbhatta@redhat.com&gt;
diff --git a/Makefile b/Makefile
@@ -11,13 +11,96 @@ PYTHON_REGISTRY = pypi
 CONFIG ?= lightspeed-stack.yaml
 LLAMA_STACK_CONFIG ?= run.yaml
 
-run: ## Run the service locally
+# Container configuration
+LLAMA_STACK_CONTAINER_NAME ?= lightspeed-llama-stack
+LLAMA_STACK_IMAGE ?= lightspeed-llama-stack:local
+LLAMA_STACK_PORT ?= 8321
+CONTAINER_RUNTIME ?= $(shell command -v podman 2>/dev/null || command -v docker 2>/dev/null)
+
+.PHONY: run build-llama-stack-image remove-llama-stack-container start-llama-stack-container wait-for-llama-stack-health clean-llama-stack
+
+run: start-llama-stack-container ## Run the service locally with llama-stack container
+	@echo "Starting Lightspeed Core Stack..."
 	uv run src/lightspeed_stack.py -c $(CONFIG)
 
-run-llama-stack: ## Start Llama Stack with enriched config (for local service mode)
-	uv run src/llama_stack_configuration.py -c $(CONFIG) -i $(LLAMA_STACK_CONFIG) -o $(LLAMA_STACK_CONFIG) && \
-	AZURE_API_KEY=$$(grep '^AZURE_API_KEY=' .env | cut -d'=' -f2-) \
-	uv run llama stack run $(LLAMA_STACK_CONFIG)
+build-llama-stack-image: remove-llama-stack-container ## Build llama-stack container image
+	@echo "Building llama-stack container image..."
+	@if [ -z "$(CONTAINER_RUNTIME)" ]; then \
+		echo "ERROR: No container runtime found. Install podman or docker."; \
+		exit 1; \
+	fi
+	$(CONTAINER_RUNTIME) build -f deploy/llama-stack/test.containerfile -t $(LLAMA_STACK_IMAGE) .
+
+remove-llama-stack-container: ## Remove existing llama-stack container
+	@if [ -n "$(CONTAINER_RUNTIME)" ] && $(CONTAINER_RUNTIME) inspect $(LLAMA_STACK_CONTAINER_NAME) >/dev/null 2>&1; then \
+		echo "Removing existing llama-stack container..."; \
+		$(CONTAINER_RUNTIME) rm -f $(LLAMA_STACK_CONTAINER_NAME); \
+	fi
+
+start-llama-stack-container: build-llama-stack-image ## Start llama-stack container
+	@echo "Starting llama-stack container..."
+	$(CONTAINER_RUNTIME) run -d \
+		--name $(LLAMA_STACK_CONTAINER_NAME) \
+		-p $(LLAMA_STACK_PORT):8321 \
+		--health-cmd "curl -f http://localhost:8321/v1/health || exit 1" \
+		--health-interval 10s \
+		--health-timeout 5s \
+		--health-retries 3 \
+		--health-start-period 15s \
+		-v $(PWD)/$(LLAMA_STACK_CONFIG):/opt/app-root/run.yaml:ro,z \
+		-v $(PWD)/$(CONFIG):/opt/app-root/lightspeed-stack.yaml:ro,z \
+		-v $(PWD)/scripts/llama-stack-entrypoint.sh:/opt/app-root/enrich-entrypoint.sh:ro,z \
+		-v $(PWD)/src/llama_stack_configuration.py:/opt/app-root/llama_stack_configuration.py:ro,z \
+		-e OPENAI_API_KEY \
+		-e EXTERNAL_PROVIDERS_DIR=$${EXTERNAL_PROVIDERS_DIR:-/opt/app-root/external_providers} \
+		-e BRAVE_SEARCH_API_KEY \
+		-e TAVILY_SEARCH_API_KEY \
+		-e E2E_OPENAI_MODEL=$${E2E_OPENAI_MODEL:-gpt-4o-mini} \
+		-e TENANT_ID=$${TENANT_ID:-} \
+		-e CLIENT_ID=$${CLIENT_ID:-} \
+		-e CLIENT_SECRET \
+		-e RHAIIS_URL=$${RHAIIS_URL:-} \
+		-e RHAIIS_PORT=$${RHAIIS_PORT:-} \
+		-e RHAIIS_API_KEY \
+		-e RHAIIS_MODEL=$${RHAIIS_MODEL:-} \
+		-e RHEL_AI_URL=$${RHEL_AI_URL:-} \
+		-e RHEL_AI_PORT=$${RHEL_AI_PORT:-} \
+		-e RHEL_AI_API_KEY \
+		-e RHEL_AI_MODEL=$${RHEL_AI_MODEL:-} \
+		-e GOOGLE_APPLICATION_CREDENTIALS \
+		-e VERTEX_AI_PROJECT=$${VERTEX_AI_PROJECT:-} \
+		-e VERTEX_AI_LOCATION=$${VERTEX_AI_LOCATION:-} \
+		-e WATSONX_BASE_URL=$${WATSONX_BASE_URL:-} \
+		-e WATSONX_PROJECT_ID=$${WATSONX_PROJECT_ID:-} \
+		-e WATSONX_API_KEY \
+		-e LITELLM_DROP_PARAMS=true \
+		-e AWS_BEARER_TOKEN_BEDROCK \
+		-e LLAMA_STACK_LOGGING=$${LLAMA_STACK_LOGGING:-} \
+		-e FAISS_VECTOR_STORE_ID=$${FAISS_VECTOR_STORE_ID:-} \
+		$(LLAMA_STACK_IMAGE)
+	@$(MAKE) wait-for-llama-stack-health
+
+wait-for-llama-stack-health: ## Wait for llama-stack container to be healthy
+	@echo "Waiting for llama-stack container to be healthy..."
+	@for i in {1..30}; do \
+		STATUS=$$($(CONTAINER_RUNTIME) inspect --format='{{.State.Health.Status}}' $(LLAMA_STACK_CONTAINER_NAME) 2>/dev/null || echo "no-healthcheck"); \
+		if [ "$$STATUS" = "healthy" ]; then \
+			echo "✓ Llama-stack is healthy and ready!"; \
+			exit 0; \
+		fi; \
+		echo "  Health status: $$STATUS (attempt $$i/30)"; \
+		sleep 2; \
+	done; \
+	echo "✗ ERROR: Llama-stack did not become healthy within 60 seconds"; \
+	echo "Container logs:"; \
+	$(CONTAINER_RUNTIME) logs $(LLAMA_STACK_CONTAINER_NAME); \
+	exit 1
+
+clean-llama-stack: remove-llama-stack-container ## Remove container and image
+	@if [ -n "$(CONTAINER_RUNTIME)" ] && $(CONTAINER_RUNTIME) images -q $(LLAMA_STACK_IMAGE) | grep -q .; then \
+		echo "Removing llama-stack image..."; \
+		$(CONTAINER_RUNTIME) rmi $(LLAMA_STACK_IMAGE); \
+	fi
 
 test-unit: ## Run the unit tests
 	@echo "Running unit tests..."
diff --git a/README.md b/README.md
@@ -178,20 +178,22 @@ To quickly get hands on LCS, we can run it using the default configurations prov
    ```bash
    export OPENAI_API_KEY=sk-xxxxx
    ```
-3. start Llama stack server
+3. start LCS server 
    ```bash
-   uv run llama stack run local-run.yaml
-   ```
-4. [Optional] If you're new to Llama stack, run through a quick tutorial to learn the basics of what the server is used for, by running the interactive tutorial script
-   ```bash
-   ./scripts/llama_stack_tutorial.sh
-   ```
-5. check the LCS settings in [lightspeed-stack.yaml](lightspeed-stack.yaml). `llama_stack.url` should be `url: http://localhost:8321`
-6. start LCS server
-    ```
    make run
-    ```
-7. access LCS web UI at [http://localhost:8080/](http://localhost:8080/)
+   ```
+4. access LCS web UI at [http://localhost:8080/](http://localhost:8080/)
+
+**Note**: `make run` uses containerized llama-stack (service mode). To run llama-stack manually instead, see the [Llama Stack as separate server](#llama-stack-as-separate-server) section below.
+
+## Container Runtime Requirements
+
+The Makefile requires either Podman or Docker to launch the Llama Stack container:
+
+- **Podman** (recommended for RHEL/Fedora): `sudo dnf install podman`
+- **Docker**: Install from [docker.com](https://docs.docker.com/get-docker/)
+
+The Makefile will auto-detect which runtime is available.
 
 
 # Configuration
@@ -831,7 +833,6 @@ Usage: make <OPTIONS> ... <TARGETS>
 Available targets are:
 
 run                               Run the service locally
-run-llama-stack                   Start Llama Stack with enriched config (for local service mode)
 test-unit                         Run the unit tests
 test-integration                  Run integration tests tests
 test-e2e                          Run end to end tests for the service
diff --git a/lightspeed-stack.yaml b/lightspeed-stack.yaml
@@ -7,15 +7,15 @@ service:
   workers: 1
   color_log: true
   access_log: true
+# llama_stack configuration
+# When using 'make run', a container is ALWAYS launched at http://localhost:8321 (hardcoded in Makefile).
+# This llama_stack section controls where lightspeed-core connects to llama-stack.
+# To use a different port: override with 'make run LLAMA_STACK_PORT=<port>' and update the url below,
+# or run llama-stack manually and don't use 'make run'.
 llama_stack:
-  # Uses a remote llama-stack service
-  # The instance would have already been started with a llama-stack-run.yaml file
   use_as_library_client: false
-  # Alternative for "as library use"
-  # use_as_library_client: true
-  # library_client_config_path: <path-to-llama-stack-run.yaml-file>
-  url: http://llama-stack:8321
-  api_key: xyzzy
+  url: http://localhost:8321
+  # api_key: custom-key  # Uncomment if your llama-stack requires authentication
 user_data_collection:
   feedback_enabled: true
   feedback_storage: "/tmp/data/feedback"