haystack-core-integrations/.github/workflows/llama_stack.yml at 55cdadeaafec88aa9bb3cd31376b1b7ba6eb6715 · deepset-ai/haystack-core-integrations · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
# This workflow comes from https://github.com/ofek/hatch-mypyc
# https://github.com/ofek/hatch-mypyc/blob/5a198c0ba8660494d02716cfc9d79ce4adfb1442/.github/workflows/test.yml
name: Test / llama_stack

on:
  schedule:
    - cron: "0 0 * * *"
  pull_request:
    paths:
      - "integrations/llama_stack/**"
      - "!integrations/llama_stack/*.md"
      - ".github/workflows/llama_stack.yml"
  push:
    branches:
      - main
    paths:
      - "integrations/llama_stack/**"
      - "!integrations/llama_stack/*.md"
      - ".github/workflows/llama_stack.yml"

defaults:
  run:
    working-directory: integrations/llama_stack

concurrency:
  group: llama_stack-${{ github.head_ref || github.sha }}
  cancel-in-progress: true

env:
  PYTHONUNBUFFERED: "1"
  FORCE_COLOR: "1"
  TEST_MATRIX_OS: '["ubuntu-latest"]'
  TEST_MATRIX_PYTHON: '["3.12", "3.14"]'

jobs:
  compute-test-matrix:
    runs-on: ubuntu-slim
    defaults:
      run:
        working-directory: .
    outputs:
      os: ${{ steps.set.outputs.os }}
      python-version: ${{ steps.set.outputs.python-version }}
    steps:
      - id: set
        run: |
          echo 'os=${{ github.event_name == 'push' && '["ubuntu-latest"]' || env.TEST_MATRIX_OS }}' >> "$GITHUB_OUTPUT"
          echo 'python-version=${{ github.event_name == 'push' && '["3.10"]' || env.TEST_MATRIX_PYTHON }}' >> "$GITHUB_OUTPUT"

  run:
    name: Python ${{ matrix.python-version }} on ${{ startsWith(matrix.os, 'macos-') && 'macOS' || startsWith(matrix.os, 'windows-') && 'Windows' || 'Linux' }}
    needs: compute-test-matrix
    permissions:
      contents: write
      pull-requests: write
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: ${{ fromJSON(needs.compute-test-matrix.outputs.os) }}
        python-version: ${{ fromJSON(needs.compute-test-matrix.outputs.python-version) }}

    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

      - name: Install and run Ollama Server as inference provider (needed for Llama Stack Server)
        uses: nick-fields/retry@ad984534de44a9489a53aefd81eb77f87c70dc60 # v4.0.0
        with:
          timeout_minutes: 4
          max_attempts: 3
          command: |
            curl -fsSL https://ollama.com/install.sh | sh
            nohup ollama serve > ollama.log 2>&1 &

            # Check if the service is up and running with a timeout of 60 seconds
            timeout=60
            while [ $timeout -gt 0 ] && ! curl -sSf http://localhost:11434/ > /dev/null; do
              echo "Waiting for Ollama service to start..."
              sleep 5
              ((timeout-=5))
            done

            if [ $timeout -eq 0 ]; then
              echo "Timed out waiting for Ollama service to start."
              exit 1
            fi

            echo "Ollama service started successfully."

      - name: Pull models
        uses: nick-fields/retry@ad984534de44a9489a53aefd81eb77f87c70dc60 # v4.0.0
        with:
          timeout_minutes: 2
          max_attempts: 5
          command: |
            ollama pull llama3.2:3b
            ollama list | grep -q "llama3.2:3b" || { echo "Model llama3.2:3b not pulled."; exit 1; }

            echo "Models pulled successfully."

      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
        with:
          python-version: ${{ matrix.python-version }}

      - name: Test Llama Stack Server
        env:
          OLLAMA_INFERENCE_MODEL: llama3.2:3b
          # Llama Stack's Ollama provider expects an OpenAI-compatible base URL.
          # Ollama serves OpenAI-compatible endpoints under `/v1`, so include it here.
          OLLAMA_URL: http://localhost:11434/v1
        shell: bash
        run: |
          set -euo pipefail
          pip install -q uv

          # Install the starter distro's deps into the uv environment
          uv run --with llama-stack bash -lc 'llama stack list-deps starter | xargs -L1 uv pip install'

          # Start Llama Stack (no more --image-type flag)
          uv run --with llama-stack llama stack run starter > server.log 2>&1 &
          SERVER_PID=$!

          # Wait up to ~120s for health; fail fast if process dies
          for _ in {1..60}; do
            if curl -fsS http://localhost:8321/v1/models >/dev/null; then
              echo "Llama Stack Server started successfully."
              break
            fi
            if ! kill -0 "$SERVER_PID" 2>/dev/null; then
              echo "Server exited early. Logs:"; cat server.log; exit 1
            fi
            sleep 2
          done

          # Final health check
          curl -fsS http://localhost:8321/v1/models || { echo "Health check failed. Logs:"; cat server.log; exit 1; }

      - name: Install Hatch
        run: pip install hatch

      - name: Lint
        if: matrix.python-version == '3.12' && runner.os == 'Linux'
        run: hatch run fmt-check && hatch run test:types

      - name: Run unit tests
        run: hatch run test:unit-cov-retry

      # On PR: posts coverage comment (directly on same-repo PRs; via artifact for fork PRs). On push to main: stores coverage baseline on data branch.
      - name: Store unit tests coverage
        id: coverage_comment
        if: matrix.python-version == '3.10' && runner.os == 'Linux' && github.event_name != 'schedule'
        uses: py-cov-action/python-coverage-comment-action@7188638f871f721a365d644f505d1ff3df20d683 # v3.40
        with:
          GITHUB_TOKEN: ${{ github.token }}
          COVERAGE_PATH: integrations/llama_stack
          SUBPROJECT_ID: llama_stack
          MINIMUM_GREEN: 90
          MINIMUM_ORANGE: 60

      - name: Upload coverage comment to be posted
        if: matrix.python-version == '3.10' && runner.os == 'Linux' && github.event_name == 'pull_request' && steps.coverage_comment.outputs.COMMENT_FILE_WRITTEN == 'true'
        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
        with:
          name: coverage-comment-llama_stack
          path: python-coverage-comment-action-llama_stack.txt

      - name: Run integration tests
        run: hatch run test:integration-cov-append-retry

      - name: Store combined coverage
        if: github.event_name == 'push'
        uses: py-cov-action/python-coverage-comment-action@7188638f871f721a365d644f505d1ff3df20d683 # v3.40
        with:
          GITHUB_TOKEN: ${{ github.token }}
          COVERAGE_PATH: integrations/llama_stack
          SUBPROJECT_ID: llama_stack-combined
          MINIMUM_GREEN: 90
          MINIMUM_ORANGE: 60

      - name: Run unit tests with lowest direct dependencies
        if: github.event_name != 'push'
        run: |
          hatch run uv pip compile pyproject.toml --resolution lowest-direct --output-file requirements_lowest_direct.txt
          hatch -e test env run -- uv pip install -r requirements_lowest_direct.txt
          hatch run test:unit

      # Since this integration inherits from OpenAIChatGenerator, we run ALL tests with Haystack main branch to catch regressions
      - name: Nightly - run tests with Haystack main branch
        if: github.event_name == 'schedule'
        run: |
          hatch env prune
          hatch -e test env run -- uv pip install git+https://github.com/deepset-ai/haystack.git@main
          hatch run test:unit-cov-retry
          hatch run test:integration-cov-append-retry

  notify-slack-on-failure:
    needs: run
    if: failure() && github.event_name == 'schedule'
    runs-on: ubuntu-slim
    steps:
      - uses: deepset-ai/notify-slack-action@3cda73b77a148f16f703274198e7771340cf862b # v1
        with:
          slack-webhook-url: ${{ secrets.SLACK_WEBHOOK_URL_NOTIFICATIONS }}