-
Notifications
You must be signed in to change notification settings - Fork 255
143 lines (119 loc) · 4.92 KB
/
llama_stack.yml
File metadata and controls
143 lines (119 loc) · 4.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# This workflow comes from https://github.com/ofek/hatch-mypyc
# https://github.com/ofek/hatch-mypyc/blob/5a198c0ba8660494d02716cfc9d79ce4adfb1442/.github/workflows/test.yml
name: Test / llama_stack
on:
schedule:
- cron: "0 0 * * *"
pull_request:
paths:
- "integrations/llama_stack/**"
- "!integrations/llama_stack/*.md"
- ".github/workflows/llama_stack.yml"
defaults:
run:
working-directory: integrations/llama_stack
concurrency:
group: llama_stack-${{ github.head_ref }}
cancel-in-progress: true
env:
PYTHONUNBUFFERED: "1"
FORCE_COLOR: "1"
jobs:
run:
name: Python ${{ matrix.python-version }} on ${{ startsWith(matrix.os, 'macos-') && 'macOS' || startsWith(matrix.os, 'windows-') && 'Windows' || 'Linux' }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest] # to test on other Operating Systems, we need to install Ollama differently
python-version: ["3.12", "3.14"]
steps:
- uses: actions/checkout@v6
- name: Install and run Ollama Server as inference provider (needed for Llama Stack Server)
uses: nick-fields/retry@v4
with:
timeout_minutes: 4
max_attempts: 3
command: |
curl -fsSL https://ollama.com/install.sh | sh
nohup ollama serve > ollama.log 2>&1 &
# Check if the service is up and running with a timeout of 60 seconds
timeout=60
while [ $timeout -gt 0 ] && ! curl -sSf http://localhost:11434/ > /dev/null; do
echo "Waiting for Ollama service to start..."
sleep 5
((timeout-=5))
done
if [ $timeout -eq 0 ]; then
echo "Timed out waiting for Ollama service to start."
exit 1
fi
echo "Ollama service started successfully."
- name: Pull models
uses: nick-fields/retry@v4
with:
timeout_minutes: 2
max_attempts: 5
command: |
ollama pull llama3.2:3b
ollama list | grep -q "llama3.2:3b" || { echo "Model llama3.2:3b not pulled."; exit 1; }
echo "Models pulled successfully."
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}
- name: Test Llama Stack Server
env:
OLLAMA_INFERENCE_MODEL: llama3.2:3b
# Llama Stack's Ollama provider expects an OpenAI-compatible base URL.
# Ollama serves OpenAI-compatible endpoints under `/v1`, so include it here.
OLLAMA_URL: http://localhost:11434/v1
shell: bash
run: |
set -euo pipefail
pip install -q uv
# Install the starter distro's deps into the uv environment
uv run --with llama-stack bash -lc 'llama stack list-deps starter | xargs -L1 uv pip install'
# Start Llama Stack (no more --image-type flag)
uv run --with llama-stack llama stack run starter > server.log 2>&1 &
SERVER_PID=$!
# Wait up to ~120s for health; fail fast if process dies
for i in {1..60}; do
if curl -fsS http://localhost:8321/v1/models >/dev/null; then
echo "Llama Stack Server started successfully."
break
fi
if ! kill -0 "$SERVER_PID" 2>/dev/null; then
echo "Server exited early. Logs:"; cat server.log; exit 1
fi
sleep 2
done
# Final health check
curl -fsS http://localhost:8321/v1/models || { echo "Health check failed. Logs:"; cat server.log; exit 1; }
- name: Install Hatch
run: pip install hatch "virtualenv<21.0.0"
- name: Lint
if: matrix.python-version == '3.12' && runner.os == 'Linux'
run: hatch run fmt-check && hatch run test:types
- name: Run tests
run: hatch run test:cov-retry
- name: Run unit tests with lowest direct dependencies
run: |
hatch run uv pip compile pyproject.toml --resolution lowest-direct --output-file requirements_lowest_direct.txt
hatch -e test env run -- uv pip install -r requirements_lowest_direct.txt
hatch run test:unit
# Since this integration inherits from OpenAIChatGenerator, we run ALL tests with Haystack main branch to catch regressions
- name: Nightly - run tests with Haystack main branch
if: github.event_name == 'schedule'
run: |
hatch env prune
hatch -e test env run -- uv pip install git+https://github.com/deepset-ai/haystack.git@main
hatch run test:cov-retry
notify-slack-on-failure:
needs: run
if: failure() && github.event_name == 'schedule'
runs-on: ubuntu-slim
steps:
- uses: deepset-ai/notify-slack-action@v1
with:
slack-webhook-url: ${{ secrets.SLACK_WEBHOOK_URL_NOTIFICATIONS }}