Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 88 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,96 @@ PYTHON_REGISTRY = pypi
CONFIG ?= lightspeed-stack.yaml
LLAMA_STACK_CONFIG ?= run.yaml

run: ## Run the service locally
# Container configuration
LLAMA_STACK_CONTAINER_NAME ?= lightspeed-llama-stack
LLAMA_STACK_IMAGE ?= lightspeed-llama-stack:local
LLAMA_STACK_PORT ?= 8321
CONTAINER_RUNTIME ?= $(shell command -v podman 2>/dev/null || command -v docker 2>/dev/null)

.PHONY: run build-llama-stack-image remove-llama-stack-container start-llama-stack-container wait-for-llama-stack-health clean-llama-stack

run: start-llama-stack-container ## Run the service locally with llama-stack container
@echo "Starting Lightspeed Core Stack..."
uv run src/lightspeed_stack.py -c $(CONFIG)

run-llama-stack: ## Start Llama Stack with enriched config (for local service mode)
uv run src/llama_stack_configuration.py -c $(CONFIG) -i $(LLAMA_STACK_CONFIG) -o $(LLAMA_STACK_CONFIG) && \
AZURE_API_KEY=$$(grep '^AZURE_API_KEY=' .env | cut -d'=' -f2-) \
uv run llama stack run $(LLAMA_STACK_CONFIG)
build-llama-stack-image: remove-llama-stack-container ## Build llama-stack container image
@echo "Building llama-stack container image..."
@if [ -z "$(CONTAINER_RUNTIME)" ]; then \
echo "ERROR: No container runtime found. Install podman or docker."; \
exit 1; \
fi
$(CONTAINER_RUNTIME) build -f deploy/llama-stack/test.containerfile -t $(LLAMA_STACK_IMAGE) .

remove-llama-stack-container: ## Remove existing llama-stack container
@if [ -n "$(CONTAINER_RUNTIME)" ] && $(CONTAINER_RUNTIME) inspect $(LLAMA_STACK_CONTAINER_NAME) >/dev/null 2>&1; then \
echo "Removing existing llama-stack container..."; \
$(CONTAINER_RUNTIME) rm -f $(LLAMA_STACK_CONTAINER_NAME); \
fi

start-llama-stack-container: build-llama-stack-image ## Start llama-stack container
@echo "Starting llama-stack container..."
$(CONTAINER_RUNTIME) run -d \
--name $(LLAMA_STACK_CONTAINER_NAME) \
-p $(LLAMA_STACK_PORT):8321 \
--health-cmd "curl -f http://localhost:8321/v1/health || exit 1" \
--health-interval 10s \
--health-timeout 5s \
--health-retries 3 \
--health-start-period 15s \
-v $(PWD)/$(LLAMA_STACK_CONFIG):/opt/app-root/run.yaml:ro,z \
-v $(PWD)/$(CONFIG):/opt/app-root/lightspeed-stack.yaml:ro,z \
-v $(PWD)/scripts/llama-stack-entrypoint.sh:/opt/app-root/enrich-entrypoint.sh:ro,z \
-v $(PWD)/src/llama_stack_configuration.py:/opt/app-root/llama_stack_configuration.py:ro,z \
-e OPENAI_API_KEY \
-e EXTERNAL_PROVIDERS_DIR=$${EXTERNAL_PROVIDERS_DIR:-/opt/app-root/external_providers} \
-e BRAVE_SEARCH_API_KEY \
-e TAVILY_SEARCH_API_KEY \
-e E2E_OPENAI_MODEL=$${E2E_OPENAI_MODEL:-gpt-4o-mini} \
-e TENANT_ID=$${TENANT_ID:-} \
-e CLIENT_ID=$${CLIENT_ID:-} \
-e CLIENT_SECRET \
-e RHAIIS_URL=$${RHAIIS_URL:-} \
-e RHAIIS_PORT=$${RHAIIS_PORT:-} \
-e RHAIIS_API_KEY \
-e RHAIIS_MODEL=$${RHAIIS_MODEL:-} \
-e RHEL_AI_URL=$${RHEL_AI_URL:-} \
-e RHEL_AI_PORT=$${RHEL_AI_PORT:-} \
-e RHEL_AI_API_KEY \
-e RHEL_AI_MODEL=$${RHEL_AI_MODEL:-} \
-e GOOGLE_APPLICATION_CREDENTIALS \
-e VERTEX_AI_PROJECT=$${VERTEX_AI_PROJECT:-} \
-e VERTEX_AI_LOCATION=$${VERTEX_AI_LOCATION:-} \
-e WATSONX_BASE_URL=$${WATSONX_BASE_URL:-} \
-e WATSONX_PROJECT_ID=$${WATSONX_PROJECT_ID:-} \
-e WATSONX_API_KEY \
-e LITELLM_DROP_PARAMS=true \
-e AWS_BEARER_TOKEN_BEDROCK \
-e LLAMA_STACK_LOGGING=$${LLAMA_STACK_LOGGING:-} \
-e FAISS_VECTOR_STORE_ID=$${FAISS_VECTOR_STORE_ID:-} \
$(LLAMA_STACK_IMAGE)
@$(MAKE) wait-for-llama-stack-health

wait-for-llama-stack-health: ## Wait for llama-stack container to be healthy
@echo "Waiting for llama-stack container to be healthy..."
@for i in {1..30}; do \
STATUS=$$($(CONTAINER_RUNTIME) inspect --format='{{.State.Health.Status}}' $(LLAMA_STACK_CONTAINER_NAME) 2>/dev/null || echo "no-healthcheck"); \
if [ "$$STATUS" = "healthy" ]; then \
echo "✓ Llama-stack is healthy and ready!"; \
exit 0; \
fi; \
echo " Health status: $$STATUS (attempt $$i/30)"; \
sleep 2; \
done; \
echo "✗ ERROR: Llama-stack did not become healthy within 60 seconds"; \
echo "Container logs:"; \
$(CONTAINER_RUNTIME) logs $(LLAMA_STACK_CONTAINER_NAME); \
exit 1

clean-llama-stack: remove-llama-stack-container ## Remove container and image
@if [ -n "$(CONTAINER_RUNTIME)" ] && $(CONTAINER_RUNTIME) images -q $(LLAMA_STACK_IMAGE) | grep -q .; then \
echo "Removing llama-stack image..."; \
$(CONTAINER_RUNTIME) rmi $(LLAMA_STACK_IMAGE); \
fi

test-unit: ## Run the unit tests
@echo "Running unit tests..."
Expand Down
27 changes: 14 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -178,20 +178,22 @@ To quickly get hands on LCS, we can run it using the default configurations prov
```bash
export OPENAI_API_KEY=sk-xxxxx
```
3. start Llama stack server
3. start LCS server
```bash
uv run llama stack run local-run.yaml
```
4. [Optional] If you're new to Llama stack, run through a quick tutorial to learn the basics of what the server is used for, by running the interactive tutorial script
```bash
./scripts/llama_stack_tutorial.sh
```
5. check the LCS settings in [lightspeed-stack.yaml](lightspeed-stack.yaml). `llama_stack.url` should be `url: http://localhost:8321`
6. start LCS server
```
make run
```
7. access LCS web UI at [http://localhost:8080/](http://localhost:8080/)
```
Comment thread
coderabbitai[bot] marked this conversation as resolved.
4. access LCS web UI at [http://localhost:8080/](http://localhost:8080/)

**Note**: `make run` uses containerized llama-stack (service mode). To run llama-stack manually instead, see the [Llama Stack as separate server](#llama-stack-as-separate-server) section below.

## Container Runtime Requirements

The Makefile requires either Podman or Docker to launch the Llama Stack container:

- **Podman** (recommended for RHEL/Fedora): `sudo dnf install podman`
- **Docker**: Install from [docker.com](https://docs.docker.com/get-docker/)

The Makefile will auto-detect which runtime is available.

Comment thread
coderabbitai[bot] marked this conversation as resolved.

# Configuration
Expand Down Expand Up @@ -831,7 +833,6 @@ Usage: make <OPTIONS> ... <TARGETS>
Available targets are:

run Run the service locally
run-llama-stack Start Llama Stack with enriched config (for local service mode)
test-unit Run the unit tests
test-integration Run integration tests tests
test-e2e Run end to end tests for the service
Expand Down
14 changes: 7 additions & 7 deletions lightspeed-stack.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@ service:
workers: 1
color_log: true
access_log: true
# llama_stack configuration
# When using 'make run', a container is ALWAYS launched at http://localhost:8321 (hardcoded in Makefile).
# This llama_stack section controls where lightspeed-core connects to llama-stack.
# To use a different port: override with 'make run LLAMA_STACK_PORT=<port>' and update the url below,
# or run llama-stack manually and don't use 'make run'.
llama_stack:
# Uses a remote llama-stack service
# The instance would have already been started with a llama-stack-run.yaml file
use_as_library_client: false
# Alternative for "as library use"
# use_as_library_client: true
# library_client_config_path: <path-to-llama-stack-run.yaml-file>
url: http://llama-stack:8321
api_key: xyzzy
url: http://localhost:8321
# api_key: custom-key # Uncomment if your llama-stack requires authentication
user_data_collection:
feedback_enabled: true
feedback_storage: "/tmp/data/feedback"
Expand Down
Loading