Skip to content

Commit 1e20104

Browse files
julian-rischclaudeanakin87
authored
feat: add whisper integration (moved from Haystack core) (#3468)
Co-authored-by: Claude Fable 5 <noreply@anthropic.com> Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>
1 parent a605402 commit 1e20104

19 files changed

Lines changed: 1555 additions & 0 deletions

.github/labeler.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,3 +428,7 @@ topic:DX:
428428
topic:security:
429429
- changed-files:
430430
- any-glob-to-any-file: ["SECURITY.md"]
431+
integration:whisper:
432+
- changed-files:
433+
- any-glob-to-any-file: "integrations/whisper/**/*"
434+
- any-glob-to-any-file: ".github/workflows/whisper.yml"

.github/workflows/CI_coverage_comment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ on:
8181
- "Test / watsonx"
8282
- "Test / weave"
8383
- "Test / weaviate"
84+
- "Test / whisper"
8485
types: [completed]
8586

8687
jobs:

.github/workflows/whisper.yml

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
# This workflow comes from https://github.com/ofek/hatch-mypyc
2+
# https://github.com/ofek/hatch-mypyc/blob/5a198c0ba8660494d02716cfc9d79ce4adfb1442/.github/workflows/test.yml
3+
name: Test / whisper
4+
5+
on:
6+
schedule:
7+
- cron: "0 0 * * *"
8+
pull_request:
9+
paths:
10+
- "integrations/whisper/**"
11+
- "!integrations/whisper/*.md"
12+
- ".github/workflows/whisper.yml"
13+
push:
14+
branches:
15+
- main
16+
paths:
17+
- "integrations/whisper/**"
18+
- "!integrations/whisper/*.md"
19+
- ".github/workflows/whisper.yml"
20+
21+
defaults:
22+
run:
23+
working-directory: integrations/whisper
24+
25+
concurrency:
26+
group: whisper-${{ github.head_ref || github.sha }}
27+
cancel-in-progress: true
28+
29+
env:
30+
PYTHONUNBUFFERED: "1"
31+
FORCE_COLOR: "1"
32+
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
33+
TEST_MATRIX_OS: '["ubuntu-latest", "windows-latest", "macos-latest"]'
34+
TEST_MATRIX_PYTHON: '["3.10", "3.14"]'
35+
36+
jobs:
37+
compute-test-matrix:
38+
runs-on: ubuntu-slim
39+
defaults:
40+
run:
41+
working-directory: .
42+
outputs:
43+
os: ${{ steps.set.outputs.os }}
44+
python-version: ${{ steps.set.outputs.python-version }}
45+
steps:
46+
- id: set
47+
run: |
48+
echo 'os=${{ github.event_name == 'push' && '["ubuntu-latest"]' || env.TEST_MATRIX_OS }}' >> "$GITHUB_OUTPUT"
49+
echo 'python-version=${{ github.event_name == 'push' && '["3.10"]' || env.TEST_MATRIX_PYTHON }}' >> "$GITHUB_OUTPUT"
50+
51+
run:
52+
name: Python ${{ matrix.python-version }} on ${{ startsWith(matrix.os, 'macos-') && 'macOS' || startsWith(matrix.os, 'windows-') && 'Windows' || 'Linux' }}
53+
needs: compute-test-matrix
54+
permissions:
55+
contents: write
56+
pull-requests: write
57+
runs-on: ${{ matrix.os }}
58+
strategy:
59+
fail-fast: false
60+
matrix:
61+
os: ${{ fromJSON(needs.compute-test-matrix.outputs.os) }}
62+
python-version: ${{ fromJSON(needs.compute-test-matrix.outputs.python-version) }}
63+
64+
steps:
65+
- name: Support longpaths
66+
if: matrix.os == 'windows-latest'
67+
working-directory: .
68+
run: git config --system core.longpaths true
69+
70+
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
71+
72+
- name: Set up Python ${{ matrix.python-version }}
73+
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
74+
with:
75+
python-version: ${{ matrix.python-version }}
76+
77+
- name: Install Hatch
78+
run: |
79+
python -m pip install --upgrade pip
80+
pip install hatch --uploaded-prior-to=P1D
81+
- name: Lint
82+
if: matrix.python-version == '3.10' && runner.os == 'Linux'
83+
run: hatch run fmt-check && hatch run test:types
84+
85+
- name: Run unit tests
86+
run: hatch run test:unit-cov-retry
87+
88+
# On PR: posts coverage comment (directly on same-repo PRs; via artifact for fork PRs). On push to main: stores coverage baseline on data branch.
89+
- name: Store unit tests coverage
90+
id: coverage_comment
91+
if: matrix.python-version == '3.10' && runner.os == 'Linux' && github.event_name != 'schedule'
92+
uses: py-cov-action/python-coverage-comment-action@63f52f4fbbffada6e8dee8ec432de7e01df9ba79 # v3.41
93+
with:
94+
GITHUB_TOKEN: ${{ github.token }}
95+
COVERAGE_PATH: integrations/whisper
96+
SUBPROJECT_ID: whisper
97+
MINIMUM_GREEN: 90
98+
MINIMUM_ORANGE: 60
99+
100+
- name: Upload coverage comment to be posted
101+
if: matrix.python-version == '3.10' && runner.os == 'Linux' && github.event_name == 'pull_request' && steps.coverage_comment.outputs.COMMENT_FILE_WRITTEN == 'true'
102+
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
103+
with:
104+
name: coverage-comment-whisper
105+
path: python-coverage-comment-action-whisper.txt
106+
107+
# ffmpeg is required by `LocalWhisperTranscriber` to decode audio files. It is skipped on Windows,
108+
# where the local-model integration tests do not run.
109+
- name: Install ffmpeg (Linux)
110+
if: runner.os == 'Linux'
111+
run: sudo apt update && sudo apt install -y ffmpeg
112+
113+
- name: Install ffmpeg (macOS)
114+
if: runner.os == 'macOS'
115+
run: brew install ffmpeg
116+
117+
- name: Run integration tests
118+
run: hatch run test:integration-cov-append-retry
119+
120+
- name: Store combined coverage
121+
if: github.event_name == 'push'
122+
uses: py-cov-action/python-coverage-comment-action@63f52f4fbbffada6e8dee8ec432de7e01df9ba79 # v3.41
123+
with:
124+
GITHUB_TOKEN: ${{ github.token }}
125+
COVERAGE_PATH: integrations/whisper
126+
SUBPROJECT_ID: whisper-combined
127+
MINIMUM_GREEN: 90
128+
MINIMUM_ORANGE: 60
129+
130+
- name: Run unit tests with lowest direct dependencies
131+
if: github.event_name != 'push'
132+
run: |
133+
hatch run uv pip compile pyproject.toml --resolution lowest-direct --output-file requirements_lowest_direct.txt
134+
hatch -e test env run -- uv pip install -r requirements_lowest_direct.txt
135+
hatch run test:unit
136+
137+
- name: Nightly - run unit tests with Haystack main branch
138+
if: github.event_name == 'schedule'
139+
run: |
140+
hatch env prune
141+
hatch -e test env run -- uv pip install git+https://github.com/deepset-ai/haystack.git@main
142+
hatch run test:unit
143+
144+
notify-slack-on-failure:
145+
needs: run
146+
if: failure() && github.event_name == 'schedule'
147+
runs-on: ubuntu-slim
148+
steps:
149+
- uses: deepset-ai/notify-slack-action@a65def0c8bf91d6520286ab34280151c76a5a008 # v1.1.0
150+
with:
151+
slack-webhook-url: ${{ secrets.SLACK_WEBHOOK_URL_NOTIFICATIONS }}

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ Please check out our [Contribution Guidelines](CONTRIBUTING.md) for all the deta
110110
| [weave-haystack](integrations/weave/) | Tracer | [![PyPI - Version](https://img.shields.io/pypi/v/weave-haystack.svg)](https://pypi.org/project/weave-haystack) | [![Test / weave](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/weave.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/weave.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-weave/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-weave/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-weave-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-weave-combined/htmlcov/index.html) |
111111
| [weaviate-haystack](integrations/weaviate/) | Document Store | [![PyPI - Version](https://img.shields.io/pypi/v/weaviate-haystack.svg)](https://pypi.org/project/weaviate-haystack) | [![Test / weaviate](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/weaviate.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/weaviate.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-weaviate/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-weaviate/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-weaviate-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-weaviate-combined/htmlcov/index.html) |
112112
| [instructor-embedders-haystack](integrations/instructor_embedders/) | Embedder | [![PyPI - Version](https://img.shields.io/pypi/v/instructor-embedders-haystack.svg)](https://pypi.org/project/instructor-embedders-haystack) | [Staged](https://docs.haystack.deepset.ai/docs/breaking-change-policy#discontinuing-an-integration) | | |
113+
| [whisper-haystack](integrations/whisper/) | Audio | [![PyPI - Version](https://img.shields.io/pypi/v/whisper-haystack.svg)](https://pypi.org/project/whisper-haystack) | [![Test / whisper](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/whisper.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/whisper.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-whisper/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-whisper/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-whisper-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-whisper-combined/htmlcov/index.html) |
113114

114115
## Releasing
115116

0 commit comments

Comments
 (0)