haystack-core-integrations/.github/workflows/llama_stack.yml at bf59f30571449cebb7699d3f0b803443e2a1e7d1 · deepset-ai/haystack-core-integrations · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# This workflow comes from https://github.com/ofek/hatch-mypyc
# https://github.com/ofek/hatch-mypyc/blob/5a198c0ba8660494d02716cfc9d79ce4adfb1442/.github/workflows/test.yml
name: Test / llama_stack

on:
  schedule:
    - cron: "0 0 * * *"
  pull_request:
    paths:
      - "integrations/llama_stack/**"
      - "!integrations/llama_stack/*.md"
      - ".github/workflows/llama_stack.yml"

defaults:
  run:
    working-directory: integrations/llama_stack

concurrency:
  group: llama_stack-${{ github.head_ref }}
  cancel-in-progress: true

env:
  PYTHONUNBUFFERED: "1"
  FORCE_COLOR: "1"

jobs:
  run:
    name: Python ${{ matrix.python-version }} on ${{ startsWith(matrix.os, 'macos-') && 'macOS' || startsWith(matrix.os, 'windows-') && 'Windows' || 'Linux' }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]  # to test on other Operating Systems, we need to install Ollama differently
        python-version: ["3.12", "3.14"]

    steps:
      - uses: actions/checkout@v6

      - name: Install and run Ollama Server as inference provider (needed for Llama Stack Server)
        uses: nick-fields/retry@v4
        with:
          timeout_minutes: 4
          max_attempts: 3
          command: |
            curl -fsSL https://ollama.com/install.sh | sh
            nohup ollama serve > ollama.log 2>&1 &

            # Check if the service is up and running with a timeout of 60 seconds
            timeout=60
            while [ $timeout -gt 0 ] && ! curl -sSf http://localhost:11434/ > /dev/null; do
              echo "Waiting for Ollama service to start..."
              sleep 5
              ((timeout-=5))
            done

            if [ $timeout -eq 0 ]; then
              echo "Timed out waiting for Ollama service to start."
              exit 1
            fi

            echo "Ollama service started successfully."

      - name: Pull models
        uses: nick-fields/retry@v4
        with:
          timeout_minutes: 2
          max_attempts: 5
          command: |
            ollama pull llama3.2:3b
            ollama list | grep -q "llama3.2:3b" || { echo "Model llama3.2:3b not pulled."; exit 1; }

            echo "Models pulled successfully."

      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v6
        with:
          python-version: ${{ matrix.python-version }}

      - name: Test Llama Stack Server
        env:
          OLLAMA_INFERENCE_MODEL: llama3.2:3b
          # Llama Stack's Ollama provider expects an OpenAI-compatible base URL.
          # Ollama serves OpenAI-compatible endpoints under `/v1`, so include it here.
          OLLAMA_URL: http://localhost:11434/v1
        shell: bash
        run: |
          set -euo pipefail
          pip install -q uv

          # Install the starter distro's deps into the uv environment
          uv run --with llama-stack bash -lc 'llama stack list-deps starter | xargs -L1 uv pip install'

          # Start Llama Stack (no more --image-type flag)
          uv run --with llama-stack llama stack run starter > server.log 2>&1 &
          SERVER_PID=$!

          # Wait up to ~120s for health; fail fast if process dies
          for i in {1..60}; do
            if curl -fsS http://localhost:8321/v1/models >/dev/null; then
              echo "Llama Stack Server started successfully."
              break
            fi
            if ! kill -0 "$SERVER_PID" 2>/dev/null; then
              echo "Server exited early. Logs:"; cat server.log; exit 1
            fi
            sleep 2
          done

          # Final health check
          curl -fsS http://localhost:8321/v1/models || { echo "Health check failed. Logs:"; cat server.log; exit 1; }

      - name: Install Hatch
        run: pip install hatch "virtualenv<21.0.0"

      - name: Lint
        if: matrix.python-version == '3.12' && runner.os == 'Linux'
        run: hatch run fmt-check && hatch run test:types

      - name: Run tests
        run: hatch run test:cov-retry

      - name: Run unit tests with lowest direct dependencies
        run: |
          hatch run uv pip compile pyproject.toml --resolution lowest-direct --output-file requirements_lowest_direct.txt
          hatch -e test env run -- uv pip install -r requirements_lowest_direct.txt
          hatch run test:unit

      # Since this integration inherits from OpenAIChatGenerator, we run ALL tests with Haystack main branch to catch regressions
      - name: Nightly - run tests with Haystack main branch
        if: github.event_name == 'schedule'
        run: |
          hatch env prune
          hatch -e test env run -- uv pip install git+https://github.com/deepset-ai/haystack.git@main
          hatch run test:cov-retry

  notify-slack-on-failure:
    needs: run
    if: failure() && github.event_name == 'schedule'
    runs-on: ubuntu-slim
    steps:
      - uses: deepset-ai/notify-slack-action@v1
        with:
          slack-webhook-url: ${{ secrets.SLACK_WEBHOOK_URL_NOTIFICATIONS }}