-
Notifications
You must be signed in to change notification settings - Fork 254
204 lines (175 loc) · 7.61 KB
/
llama_stack.yml
File metadata and controls
204 lines (175 loc) · 7.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
# This workflow comes from https://github.com/ofek/hatch-mypyc
# https://github.com/ofek/hatch-mypyc/blob/5a198c0ba8660494d02716cfc9d79ce4adfb1442/.github/workflows/test.yml
name: Test / llama_stack
on:
schedule:
- cron: "0 0 * * *"
pull_request:
paths:
- "integrations/llama_stack/**"
- "!integrations/llama_stack/*.md"
- ".github/workflows/llama_stack.yml"
push:
branches:
- main
paths:
- "integrations/llama_stack/**"
- "!integrations/llama_stack/*.md"
- ".github/workflows/llama_stack.yml"
defaults:
run:
working-directory: integrations/llama_stack
concurrency:
group: llama_stack-${{ github.head_ref || github.sha }}
cancel-in-progress: true
env:
PYTHONUNBUFFERED: "1"
FORCE_COLOR: "1"
TEST_MATRIX_OS: '["ubuntu-latest"]'
TEST_MATRIX_PYTHON: '["3.12", "3.14"]'
jobs:
compute-test-matrix:
runs-on: ubuntu-slim
defaults:
run:
working-directory: .
outputs:
os: ${{ steps.set.outputs.os }}
python-version: ${{ steps.set.outputs.python-version }}
steps:
- id: set
run: |
echo 'os=${{ github.event_name == 'push' && '["ubuntu-latest"]' || env.TEST_MATRIX_OS }}' >> "$GITHUB_OUTPUT"
echo 'python-version=${{ github.event_name == 'push' && '["3.10"]' || env.TEST_MATRIX_PYTHON }}' >> "$GITHUB_OUTPUT"
run:
name: Python ${{ matrix.python-version }} on ${{ startsWith(matrix.os, 'macos-') && 'macOS' || startsWith(matrix.os, 'windows-') && 'Windows' || 'Linux' }}
needs: compute-test-matrix
permissions:
contents: write
pull-requests: write
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: ${{ fromJSON(needs.compute-test-matrix.outputs.os) }}
python-version: ${{ fromJSON(needs.compute-test-matrix.outputs.python-version) }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Install and run Ollama Server as inference provider (needed for Llama Stack Server)
uses: nick-fields/retry@ad984534de44a9489a53aefd81eb77f87c70dc60 # v4.0.0
with:
timeout_minutes: 4
max_attempts: 3
command: |
curl -fsSL https://ollama.com/install.sh | sh
nohup ollama serve > ollama.log 2>&1 &
# Check if the service is up and running with a timeout of 60 seconds
timeout=60
while [ $timeout -gt 0 ] && ! curl -sSf http://localhost:11434/ > /dev/null; do
echo "Waiting for Ollama service to start..."
sleep 5
((timeout-=5))
done
if [ $timeout -eq 0 ]; then
echo "Timed out waiting for Ollama service to start."
exit 1
fi
echo "Ollama service started successfully."
- name: Pull models
uses: nick-fields/retry@ad984534de44a9489a53aefd81eb77f87c70dc60 # v4.0.0
with:
timeout_minutes: 2
max_attempts: 5
command: |
ollama pull llama3.2:3b
ollama list | grep -q "llama3.2:3b" || { echo "Model llama3.2:3b not pulled."; exit 1; }
echo "Models pulled successfully."
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: ${{ matrix.python-version }}
- name: Test Llama Stack Server
env:
OLLAMA_INFERENCE_MODEL: llama3.2:3b
# Llama Stack's Ollama provider expects an OpenAI-compatible base URL.
# Ollama serves OpenAI-compatible endpoints under `/v1`, so include it here.
OLLAMA_URL: http://localhost:11434/v1
shell: bash
run: |
set -euo pipefail
pip install -q uv
# Install the starter distro's deps into the uv environment
uv run --with llama-stack bash -lc 'llama stack list-deps starter | xargs -L1 uv pip install'
# Start Llama Stack (no more --image-type flag)
uv run --with llama-stack llama stack run starter > server.log 2>&1 &
SERVER_PID=$!
# Wait up to ~120s for health; fail fast if process dies
for _ in {1..60}; do
if curl -fsS http://localhost:8321/v1/models >/dev/null; then
echo "Llama Stack Server started successfully."
break
fi
if ! kill -0 "$SERVER_PID" 2>/dev/null; then
echo "Server exited early. Logs:"; cat server.log; exit 1
fi
sleep 2
done
# Final health check
curl -fsS http://localhost:8321/v1/models || { echo "Health check failed. Logs:"; cat server.log; exit 1; }
- name: Install Hatch
run: pip install hatch
- name: Lint
if: matrix.python-version == '3.12' && runner.os == 'Linux'
run: hatch run fmt-check && hatch run test:types
- name: Run unit tests
run: hatch run test:unit-cov-retry
# On PR: posts coverage comment (directly on same-repo PRs; via artifact for fork PRs). On push to main: stores coverage baseline on data branch.
- name: Store unit tests coverage
id: coverage_comment
if: matrix.python-version == '3.10' && runner.os == 'Linux' && github.event_name != 'schedule'
uses: py-cov-action/python-coverage-comment-action@7188638f871f721a365d644f505d1ff3df20d683 # v3.40
with:
GITHUB_TOKEN: ${{ github.token }}
COVERAGE_PATH: integrations/llama_stack
SUBPROJECT_ID: llama_stack
MINIMUM_GREEN: 90
MINIMUM_ORANGE: 60
- name: Upload coverage comment to be posted
if: matrix.python-version == '3.10' && runner.os == 'Linux' && github.event_name == 'pull_request' && steps.coverage_comment.outputs.COMMENT_FILE_WRITTEN == 'true'
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: coverage-comment-llama_stack
path: python-coverage-comment-action-llama_stack.txt
- name: Run integration tests
run: hatch run test:integration-cov-append-retry
- name: Store combined coverage
if: github.event_name == 'push'
uses: py-cov-action/python-coverage-comment-action@7188638f871f721a365d644f505d1ff3df20d683 # v3.40
with:
GITHUB_TOKEN: ${{ github.token }}
COVERAGE_PATH: integrations/llama_stack
SUBPROJECT_ID: llama_stack-combined
MINIMUM_GREEN: 90
MINIMUM_ORANGE: 60
- name: Run unit tests with lowest direct dependencies
if: github.event_name != 'push'
run: |
hatch run uv pip compile pyproject.toml --resolution lowest-direct --output-file requirements_lowest_direct.txt
hatch -e test env run -- uv pip install -r requirements_lowest_direct.txt
hatch run test:unit
# Since this integration inherits from OpenAIChatGenerator, we run ALL tests with Haystack main branch to catch regressions
- name: Nightly - run tests with Haystack main branch
if: github.event_name == 'schedule'
run: |
hatch env prune
hatch -e test env run -- uv pip install git+https://github.com/deepset-ai/haystack.git@main
hatch run test:unit-cov-retry
hatch run test:integration-cov-append-retry
notify-slack-on-failure:
needs: run
if: failure() && github.event_name == 'schedule'
runs-on: ubuntu-slim
steps:
- uses: deepset-ai/notify-slack-action@3cda73b77a148f16f703274198e7771340cf862b # v1
with:
slack-webhook-url: ${{ secrets.SLACK_WEBHOOK_URL_NOTIFICATIONS }}