Skip to content

Test / llama_stack #472

Test / llama_stack

Test / llama_stack #472

Workflow file for this run

# This workflow comes from https://github.com/ofek/hatch-mypyc
# https://github.com/ofek/hatch-mypyc/blob/5a198c0ba8660494d02716cfc9d79ce4adfb1442/.github/workflows/test.yml
name: Test / llama_stack
on:
schedule:
- cron: "0 0 * * *"
pull_request:
paths:
- "integrations/llama_stack/**"
- "!integrations/llama_stack/*.md"
- ".github/workflows/llama_stack.yml"
push:
branches:
- main
paths:
- "integrations/llama_stack/**"
- "!integrations/llama_stack/*.md"
- ".github/workflows/llama_stack.yml"
defaults:
run:
working-directory: integrations/llama_stack
concurrency:
group: llama_stack-${{ github.head_ref || github.sha }}
cancel-in-progress: true
env:
PYTHONUNBUFFERED: "1"
FORCE_COLOR: "1"
TEST_MATRIX_OS: '["ubuntu-latest"]'
TEST_MATRIX_PYTHON: '["3.12", "3.14"]'
jobs:
compute-test-matrix:
runs-on: ubuntu-slim
defaults:
run:
working-directory: .
outputs:
os: ${{ steps.set.outputs.os }}
python-version: ${{ steps.set.outputs.python-version }}
steps:
- id: set
run: |
echo 'os=${{ github.event_name == 'push' && '["ubuntu-latest"]' || env.TEST_MATRIX_OS }}' >> "$GITHUB_OUTPUT"
echo 'python-version=${{ github.event_name == 'push' && '["3.10"]' || env.TEST_MATRIX_PYTHON }}' >> "$GITHUB_OUTPUT"
run:
name: Python ${{ matrix.python-version }} on ${{ startsWith(matrix.os, 'macos-') && 'macOS' || startsWith(matrix.os, 'windows-') && 'Windows' || 'Linux' }}
needs: compute-test-matrix
permissions:
contents: write
pull-requests: write
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: ${{ fromJSON(needs.compute-test-matrix.outputs.os) }}
python-version: ${{ fromJSON(needs.compute-test-matrix.outputs.python-version) }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Install and run Ollama Server as inference provider (needed for Llama Stack Server)
uses: nick-fields/retry@ad984534de44a9489a53aefd81eb77f87c70dc60 # v4.0.0
with:
timeout_minutes: 4
max_attempts: 3
command: |
curl -fsSL https://ollama.com/install.sh | sh
nohup ollama serve > ollama.log 2>&1 &
# Check if the service is up and running with a timeout of 60 seconds
timeout=60
while [ $timeout -gt 0 ] && ! curl -sSf http://localhost:11434/ > /dev/null; do
echo "Waiting for Ollama service to start..."
sleep 5
((timeout-=5))
done
if [ $timeout -eq 0 ]; then
echo "Timed out waiting for Ollama service to start."
exit 1
fi
echo "Ollama service started successfully."
- name: Pull models
uses: nick-fields/retry@ad984534de44a9489a53aefd81eb77f87c70dc60 # v4.0.0
with:
timeout_minutes: 2
max_attempts: 5
command: |
ollama pull llama3.2:3b
ollama list | grep -q "llama3.2:3b" || { echo "Model llama3.2:3b not pulled."; exit 1; }
echo "Models pulled successfully."
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: ${{ matrix.python-version }}
- name: Test Llama Stack Server
env:
OLLAMA_INFERENCE_MODEL: llama3.2:3b
# Llama Stack's Ollama provider expects an OpenAI-compatible base URL.
# Ollama serves OpenAI-compatible endpoints under `/v1`, so include it here.
OLLAMA_URL: http://localhost:11434/v1
# The PyTorch wheel index does not include upload dates for sympy; uv falls back to the
# cutoff date itself as a placeholder, placing those packages exactly at the boundary
# and excluding them regardless of what date is set. UV_NO_CONFIG bypasses uv.toml
# (which sets exclude-newer = "24 hours") for this step only.
UV_NO_CONFIG: "1"
shell: bash
run: |
set -euo pipefail
python -m pip install --upgrade pip
pip install -q uv --uploaded-prior-to=P1D
# Install the starter distro's deps into the uv environment
uv run --with llama-stack bash -lc 'llama stack list-deps starter | xargs -L1 uv pip install'
# Start Llama Stack (no more --image-type flag)
uv run --with llama-stack llama stack run starter > server.log 2>&1 &
SERVER_PID=$!
# Wait up to ~120s for health; fail fast if process dies
for _ in {1..60}; do
if curl -fsS http://localhost:8321/v1/models >/dev/null; then
echo "Llama Stack Server started successfully."
break
fi
if ! kill -0 "$SERVER_PID" 2>/dev/null; then
echo "Server exited early. Logs:"; cat server.log; exit 1
fi
sleep 2
done
# Final health check
curl -fsS http://localhost:8321/v1/models || { echo "Health check failed. Logs:"; cat server.log; exit 1; }
- name: Install Hatch
run: |
python -m pip install --upgrade pip
pip install hatch --uploaded-prior-to=P1D
- name: Lint
if: matrix.python-version == '3.12' && runner.os == 'Linux'
run: hatch run fmt-check && hatch run test:types
- name: Run unit tests
run: hatch run test:unit-cov-retry
# On PR: posts coverage comment (directly on same-repo PRs; via artifact for fork PRs). On push to main: stores coverage baseline on data branch.
- name: Store unit tests coverage
id: coverage_comment
if: matrix.python-version == '3.10' && runner.os == 'Linux' && github.event_name != 'schedule'
uses: py-cov-action/python-coverage-comment-action@63f52f4fbbffada6e8dee8ec432de7e01df9ba79 # v3.41
with:
GITHUB_TOKEN: ${{ github.token }}
COVERAGE_PATH: integrations/llama_stack
SUBPROJECT_ID: llama_stack
MINIMUM_GREEN: 90
MINIMUM_ORANGE: 60
- name: Upload coverage comment to be posted
if: matrix.python-version == '3.10' && runner.os == 'Linux' && github.event_name == 'pull_request' && steps.coverage_comment.outputs.COMMENT_FILE_WRITTEN == 'true'
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: coverage-comment-llama_stack
path: python-coverage-comment-action-llama_stack.txt
- name: Run integration tests
run: hatch run test:integration-cov-append-retry
- name: Store combined coverage
if: github.event_name == 'push'
uses: py-cov-action/python-coverage-comment-action@63f52f4fbbffada6e8dee8ec432de7e01df9ba79 # v3.41
with:
GITHUB_TOKEN: ${{ github.token }}
COVERAGE_PATH: integrations/llama_stack
SUBPROJECT_ID: llama_stack-combined
MINIMUM_GREEN: 90
MINIMUM_ORANGE: 60
- name: Run unit tests with lowest direct dependencies
if: github.event_name != 'push'
run: |
hatch run uv pip compile pyproject.toml --resolution lowest-direct --output-file requirements_lowest_direct.txt
hatch -e test env run -- uv pip install -r requirements_lowest_direct.txt
hatch run test:unit
# Since this integration inherits from OpenAIChatGenerator, we run ALL tests with Haystack main branch to catch regressions
- name: Nightly - run tests with Haystack main branch
if: github.event_name == 'schedule'
run: |
hatch env prune
hatch -e test env run -- uv pip install git+https://github.com/deepset-ai/haystack.git@main
hatch run test:unit-cov-retry
hatch run test:integration-cov-append-retry
notify-slack-on-failure:
needs: run
if: failure() && github.event_name == 'schedule'
runs-on: ubuntu-slim
steps:
- uses: deepset-ai/notify-slack-action@a65def0c8bf91d6520286ab34280151c76a5a008 # v1.1.0
with:
slack-webhook-url: ${{ secrets.SLACK_WEBHOOK_URL_NOTIFICATIONS }}