Test / llama_stack #472

Workflow file for this run

.github/workflows/llama_stack.yml at 92810ad

	# This workflow comes from https://github.com/ofek/hatch-mypyc
	# https://github.com/ofek/hatch-mypyc/blob/5a198c0ba8660494d02716cfc9d79ce4adfb1442/.github/workflows/test.yml
	name: Test / llama_stack

	on:
	schedule:
	- cron: "0 0 * * *"
	pull_request:
	paths:
	- "integrations/llama_stack/**"
	- "!integrations/llama_stack/*.md"
	- ".github/workflows/llama_stack.yml"
	push:
	branches:
	- main
	paths:
	- "integrations/llama_stack/**"
	- "!integrations/llama_stack/*.md"
	- ".github/workflows/llama_stack.yml"

	defaults:
	run:
	working-directory: integrations/llama_stack

	concurrency:
	group: llama_stack-${{ github.head_ref \|\| github.sha }}
	cancel-in-progress: true

	env:
	PYTHONUNBUFFERED: "1"
	FORCE_COLOR: "1"
	TEST_MATRIX_OS: '["ubuntu-latest"]'
	TEST_MATRIX_PYTHON: '["3.12", "3.14"]'

	jobs:
	compute-test-matrix:
	runs-on: ubuntu-slim
	defaults:
	run:
	working-directory: .
	outputs:
	os: ${{ steps.set.outputs.os }}
	python-version: ${{ steps.set.outputs.python-version }}
	steps:
	- id: set
	run: \|
	echo 'os=${{ github.event_name == 'push' && '["ubuntu-latest"]' \|\| env.TEST_MATRIX_OS }}' >> "$GITHUB_OUTPUT"
	echo 'python-version=${{ github.event_name == 'push' && '["3.10"]' \|\| env.TEST_MATRIX_PYTHON }}' >> "$GITHUB_OUTPUT"

	run:
	name: Python ${{ matrix.python-version }} on ${{ startsWith(matrix.os, 'macos-') && 'macOS' \|\| startsWith(matrix.os, 'windows-') && 'Windows' \|\| 'Linux' }}
	needs: compute-test-matrix
	permissions:
	contents: write
	pull-requests: write
	runs-on: ${{ matrix.os }}
	strategy:
	fail-fast: false
	matrix:
	os: ${{ fromJSON(needs.compute-test-matrix.outputs.os) }}
	python-version: ${{ fromJSON(needs.compute-test-matrix.outputs.python-version) }}

	steps:
	- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

	- name: Install and run Ollama Server as inference provider (needed for Llama Stack Server)
	uses: nick-fields/retry@ad984534de44a9489a53aefd81eb77f87c70dc60 # v4.0.0
	with:
	timeout_minutes: 4
	max_attempts: 3
	command: \|
	curl -fsSL https://ollama.com/install.sh \| sh
	nohup ollama serve > ollama.log 2>&1 &

	# Check if the service is up and running with a timeout of 60 seconds
	timeout=60
	while [ $timeout -gt 0 ] && ! curl -sSf http://localhost:11434/ > /dev/null; do
	echo "Waiting for Ollama service to start..."
	sleep 5
	((timeout-=5))
	done

	if [ $timeout -eq 0 ]; then
	echo "Timed out waiting for Ollama service to start."
	exit 1
	fi

	echo "Ollama service started successfully."

	- name: Pull models
	uses: nick-fields/retry@ad984534de44a9489a53aefd81eb77f87c70dc60 # v4.0.0
	with:
	timeout_minutes: 2
	max_attempts: 5
	command: \|
	ollama pull llama3.2:3b
	ollama list \| grep -q "llama3.2:3b" \|\| { echo "Model llama3.2:3b not pulled."; exit 1; }

	echo "Models pulled successfully."

	- name: Set up Python ${{ matrix.python-version }}
	uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
	with:
	python-version: ${{ matrix.python-version }}

	- name: Test Llama Stack Server
	env:
	OLLAMA_INFERENCE_MODEL: llama3.2:3b
	# Llama Stack's Ollama provider expects an OpenAI-compatible base URL.
	# Ollama serves OpenAI-compatible endpoints under `/v1`, so include it here.
	OLLAMA_URL: http://localhost:11434/v1
	# The PyTorch wheel index does not include upload dates for sympy; uv falls back to the
	# cutoff date itself as a placeholder, placing those packages exactly at the boundary
	# and excluding them regardless of what date is set. UV_NO_CONFIG bypasses uv.toml
	# (which sets exclude-newer = "24 hours") for this step only.
	UV_NO_CONFIG: "1"
	shell: bash
	run: \|
	set -euo pipefail
	python -m pip install --upgrade pip
	pip install -q uv --uploaded-prior-to=P1D

	# Install the starter distro's deps into the uv environment
	uv run --with llama-stack bash -lc 'llama stack list-deps starter \| xargs -L1 uv pip install'

	# Start Llama Stack (no more --image-type flag)
	uv run --with llama-stack llama stack run starter > server.log 2>&1 &
	SERVER_PID=$!

	# Wait up to ~120s for health; fail fast if process dies
	for _ in {1..60}; do
	if curl -fsS http://localhost:8321/v1/models >/dev/null; then
	echo "Llama Stack Server started successfully."
	break
	fi
	if ! kill -0 "$SERVER_PID" 2>/dev/null; then
	echo "Server exited early. Logs:"; cat server.log; exit 1
	fi
	sleep 2
	done

	# Final health check
	curl -fsS http://localhost:8321/v1/models \|\| { echo "Health check failed. Logs:"; cat server.log; exit 1; }

	- name: Install Hatch
	run: \|
	python -m pip install --upgrade pip
	pip install hatch --uploaded-prior-to=P1D

	- name: Lint
	if: matrix.python-version == '3.12' && runner.os == 'Linux'
	run: hatch run fmt-check && hatch run test:types

	- name: Run unit tests
	run: hatch run test:unit-cov-retry

	# On PR: posts coverage comment (directly on same-repo PRs; via artifact for fork PRs). On push to main: stores coverage baseline on data branch.
	- name: Store unit tests coverage
	id: coverage_comment
	if: matrix.python-version == '3.10' && runner.os == 'Linux' && github.event_name != 'schedule'
	uses: py-cov-action/python-coverage-comment-action@63f52f4fbbffada6e8dee8ec432de7e01df9ba79 # v3.41
	with:
	GITHUB_TOKEN: ${{ github.token }}
	COVERAGE_PATH: integrations/llama_stack
	SUBPROJECT_ID: llama_stack
	MINIMUM_GREEN: 90
	MINIMUM_ORANGE: 60

	- name: Upload coverage comment to be posted
	if: matrix.python-version == '3.10' && runner.os == 'Linux' && github.event_name == 'pull_request' && steps.coverage_comment.outputs.COMMENT_FILE_WRITTEN == 'true'
	uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
	with:
	name: coverage-comment-llama_stack
	path: python-coverage-comment-action-llama_stack.txt

	- name: Run integration tests
	run: hatch run test:integration-cov-append-retry

	- name: Store combined coverage
	if: github.event_name == 'push'
	uses: py-cov-action/python-coverage-comment-action@63f52f4fbbffada6e8dee8ec432de7e01df9ba79 # v3.41
	with:
	GITHUB_TOKEN: ${{ github.token }}
	COVERAGE_PATH: integrations/llama_stack
	SUBPROJECT_ID: llama_stack-combined
	MINIMUM_GREEN: 90
	MINIMUM_ORANGE: 60

	- name: Run unit tests with lowest direct dependencies
	if: github.event_name != 'push'
	run: \|
	hatch run uv pip compile pyproject.toml --resolution lowest-direct --output-file requirements_lowest_direct.txt
	hatch -e test env run -- uv pip install -r requirements_lowest_direct.txt
	hatch run test:unit

	# Since this integration inherits from OpenAIChatGenerator, we run ALL tests with Haystack main branch to catch regressions
	- name: Nightly - run tests with Haystack main branch
	if: github.event_name == 'schedule'
	run: \|
	hatch env prune
	hatch -e test env run -- uv pip install git+https://github.com/deepset-ai/haystack.git@main
	hatch run test:unit-cov-retry
	hatch run test:integration-cov-append-retry

	notify-slack-on-failure:
	needs: run
	if: failure() && github.event_name == 'schedule'
	runs-on: ubuntu-slim
	steps:
	- uses: deepset-ai/notify-slack-action@a65def0c8bf91d6520286ab34280151c76a5a008 # v1.1.0
	with:
	slack-webhook-url: ${{ secrets.SLACK_WEBHOOK_URL_NOTIFICATIONS }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Test / llama_stack #472

Workflow file

Test / llama_stack #472

Uh oh!

Workflow file for this run