Skip to content

Commit 99f63fd

Browse files
ci: adding FAISS github workflow (#2879)
* adding FAISS github workflow * adding win and mac to matrix * reformatting files * adding py.typed * adding safeguard to check for index before operations that need the index * pinning numpy * pinning numpy * pinning numpy * removing numpy, making dependent on faiss-cpu * trying to fix lowest direct dependencies run * temporary disable lowest direct dependencies run * debugging: unit tests with lowest direct dependencies * debugging: unit tests with lowest direct dependencies * debugging: unit tests with lowest direct dependencies * debugging: unit tests with lowest direct dependencies * debugging: unit tests with lowest direct dependencies - pinning virtualenv * debugging: unit tests with lowest direct dependencies - pinning virtualenv * adding execptions to docstrings
1 parent d8ef2dd commit 99f63fd

5 files changed

Lines changed: 112 additions & 7 deletions

File tree

.github/workflows/faiss.yml

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
# This workflow comes from https://github.com/ofek/hatch-mypyc
2+
# https://github.com/ofek/hatch-mypyc/blob/5a198c0ba8660494d02716cfc9d79ce4adfb1442/.github/workflows/test.yml
3+
name: Test / faiss
4+
5+
on:
6+
schedule:
7+
- cron: "0 0 * * *"
8+
pull_request:
9+
paths:
10+
- "integrations/faiss/**"
11+
- "!integrations/faiss/*.md"
12+
- ".github/workflows/faiss.yml"
13+
14+
concurrency:
15+
group: faiss-${{ github.head_ref }}
16+
cancel-in-progress: true
17+
18+
env:
19+
PYTHONUNBUFFERED: "1"
20+
FORCE_COLOR: "1"
21+
22+
defaults:
23+
run:
24+
working-directory: integrations/faiss
25+
26+
jobs:
27+
run:
28+
name: Python ${{ matrix.python-version }} on ${{ startsWith(matrix.os, 'macos-') && 'macOS' || startsWith(matrix.os, 'windows-') && 'Windows' || 'Linux' }}
29+
runs-on: ${{ matrix.os }}
30+
strategy:
31+
fail-fast: false
32+
matrix:
33+
# FAISS wheels are most reliable on Linux in CI.
34+
os: [ubuntu-latest] #[ubuntu-latest, windows-latest, macos-latest]
35+
python-version: ["3.10", "3.13"]
36+
37+
steps:
38+
- uses: actions/checkout@v6
39+
40+
- name: Set up Python ${{ matrix.python-version }}
41+
uses: actions/setup-python@v6
42+
with:
43+
python-version: ${{ matrix.python-version }}
44+
45+
- name: Install Hatch
46+
run: pip install hatch "virtualenv<21.0.0"
47+
48+
- name: Lint
49+
if: matrix.python-version == '3.10' && runner.os == 'Linux'
50+
run: hatch run fmt-check && hatch run test:types
51+
52+
- name: Run tests
53+
run: hatch run test:cov-retry
54+
55+
- name: Run unit tests with lowest direct dependencies
56+
if: matrix.python-version == '3.10' && runner.os == 'Linux'
57+
run: |
58+
hatch env prune
59+
hatch run uv pip compile pyproject.toml --resolution lowest-direct --output-file requirements_lowest_direct.txt
60+
hatch -e test env run -- uv pip install -r requirements_lowest_direct.txt
61+
hatch run test:unit
62+
63+
- name: Nightly - run unit tests with Haystack main branch
64+
if: github.event_name == 'schedule'
65+
run: |
66+
hatch env prune
67+
hatch -e test env run -- uv pip install git+https://github.com/deepset-ai/haystack.git@main
68+
hatch run test:unit
69+
70+
- name: Send event to Datadog for nightly failures
71+
if: failure() && github.event_name == 'schedule'
72+
uses: ./.github/actions/send_failure
73+
with:
74+
title: |
75+
Core integrations nightly tests failure: ${{ github.workflow }}
76+
api-key: ${{ secrets.CORE_DATADOG_API_KEY }}

integrations/faiss/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ classifiers = [
2424
dependencies = [
2525
"haystack-ai>=2.24.0",
2626
"faiss-cpu>=1.8.0",
27-
"numpy",
27+
"numpy>=1.22,<2; python_version < '3.13'",
2828
]
2929

3030
[project.urls]

integrations/faiss/src/haystack_integrations/components/retrievers/faiss/embedding_retriever.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ class FAISSEmbeddingRetriever:
5050
5151
assert res["retriever"]["documents"][0].content == "There are over 7,000 languages spoken around the world today."
5252
```
53-
""" # noqa: E501
53+
""" # noqa: E501
5454

5555
def __init__(
5656
self,

integrations/faiss/src/haystack_integrations/components/retrievers/faiss/py.typed

Whitespace-only changes.

integrations/faiss/src/haystack_integrations/document_stores/faiss/document_store.py

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from pathlib import Path
1010
from typing import Any
1111

12-
import faiss
12+
import faiss # type: ignore[import-untyped]
1313
import numpy as np
1414
from haystack import default_from_dict, default_to_dict
1515
from haystack.dataclasses import Document
@@ -40,6 +40,8 @@ def __init__(
4040
:param index_path: Path to save/load the index and documents. If None, the store is in-memory only.
4141
:param index_string: The FAISS index factory string. Default is "Flat".
4242
:param embedding_dim: The dimension of the embeddings. Default is 768.
43+
:raises DocumentStoreError: If the FAISS index cannot be initialized.
44+
:raises ValueError: If `index_path` points to a missing `.faiss` file when loading persisted data.
4345
"""
4446
self.index_path = index_path
4547
self.embedding_dim = embedding_dim
@@ -68,6 +70,13 @@ def _create_new_index(self):
6870
msg = f"Could not create FAISS index with factory string '{self.index_string}': {e}"
6971
raise DocumentStoreError(msg) from e
7072

73+
def _get_index_or_raise(self) -> Any:
74+
"""Return the FAISS index or raise if it is unexpectedly missing."""
75+
if self.index is None:
76+
msg = "FAISS index has not been initialized."
77+
raise DocumentStoreError(msg)
78+
return self.index
79+
7180
def count_documents(self) -> int:
7281
"""
7382
Returns the number of documents in the store.
@@ -80,6 +89,7 @@ def filter_documents(self, filters: dict[str, Any] | None = None) -> list[Docume
8089
8190
:param filters: A dictionary of filters to apply.
8291
:return: A list of matching Documents.
92+
:raises FilterError: If the filter structure is invalid.
8393
"""
8494
if not filters:
8595
return list(self.documents.values())
@@ -120,6 +130,9 @@ def write_documents(self, documents: list[Document], policy: DuplicatePolicy = D
120130
:param documents: The list of documents to write.
121131
:param policy: The policy to handle duplicate documents.
122132
:return: The number of documents written.
133+
:raises ValueError: If `documents` is not an iterable of `Document` objects.
134+
:raises DuplicateDocumentError: If a duplicate document is found and `policy` is `DuplicatePolicy.FAIL`.
135+
:raises DocumentStoreError: If the FAISS index is unexpectedly unavailable when adding embeddings.
123136
"""
124137
if not isinstance(documents, Iterable) or isinstance(documents, (str, bytes)):
125138
msg = "param 'documents' must contain an iterable of objects of type Document."
@@ -175,13 +188,16 @@ def write_documents(self, documents: list[Document], policy: DuplicatePolicy = D
175188
if vectors_to_add:
176189
vectors = np.array(vectors_to_add, dtype="float32")
177190
ids = np.array(ids_to_add_to_index, dtype="int64")
178-
self.index.add_with_ids(vectors, ids)
191+
index = self._get_index_or_raise()
192+
index.add_with_ids(vectors, ids)
179193

180194
return docs_written
181195

182196
def delete_documents(self, document_ids: list[str]) -> None:
183197
"""
184198
Deletes documents from the store.
199+
200+
:raises DocumentStoreError: If the FAISS index is unexpectedly unavailable when removing embeddings.
185201
"""
186202
if not document_ids:
187203
return
@@ -197,9 +213,10 @@ def delete_documents(self, document_ids: list[str]) -> None:
197213
del self.id_map[int_id]
198214
ids_to_remove_from_index.append(int_id)
199215

200-
if ids_to_remove_from_index and self.index.ntotal > 0:
216+
index = self._get_index_or_raise()
217+
if ids_to_remove_from_index and index.ntotal > 0:
201218
ids_array = np.array(ids_to_remove_from_index, dtype="int64")
202-
self.index.remove_ids(ids_array)
219+
index.remove_ids(ids_array)
203220

204221
def delete_all_documents(self) -> None:
205222
"""
@@ -221,6 +238,7 @@ def search(
221238
:param top_k: The number of results to return.
222239
:param filters: Filters to apply.
223240
:return: A list of matching Documents.
241+
:raises FilterError: If the filter structure is invalid.
224242
"""
225243
if not self.index or self.index.ntotal == 0:
226244
return []
@@ -301,6 +319,9 @@ def _check_condition(self, doc: Document, condition: dict[str, Any]) -> bool:
301319
msg = "Missing 'field' in filter condition"
302320
raise FilterError(msg)
303321
field = condition.get("field")
322+
if not isinstance(field, str):
323+
msg = "'field' in filter condition must be a string"
324+
raise FilterError(msg)
304325
if "value" not in condition:
305326
msg = "Missing 'value' in filter condition"
306327
raise FilterError(msg)
@@ -370,6 +391,8 @@ def delete_by_filter(self, filters: dict[str, Any]) -> int:
370391
371392
:param filters: A dictionary of filters to apply to find documents to delete.
372393
:returns: The number of documents deleted.
394+
:raises FilterError: If the filter structure is invalid.
395+
:raises DocumentStoreError: If the FAISS index is unexpectedly unavailable when removing embeddings.
373396
"""
374397
docs_to_delete = self.filter_documents(filters)
375398
ids = [doc.id for doc in docs_to_delete]
@@ -382,6 +405,7 @@ def count_documents_by_filter(self, filters: dict[str, Any]) -> int:
382405
383406
:param filters: A dictionary of filters to apply.
384407
:returns: The number of matching documents.
408+
:raises FilterError: If the filter structure is invalid.
385409
"""
386410
return len(self.filter_documents(filters))
387411

@@ -395,6 +419,7 @@ def update_by_filter(self, filters: dict[str, Any], meta: dict[str, Any]) -> int
395419
:param filters: A dictionary of filters to apply to find documents to update.
396420
:param meta: A dictionary of metadata key-value pairs to update in the matching documents.
397421
:returns: The number of documents updated.
422+
:raises FilterError: If the filter structure is invalid.
398423
"""
399424
docs_to_update = self.filter_documents(filters)
400425
for doc in docs_to_update:
@@ -505,9 +530,11 @@ def from_dict(cls, data: dict[str, Any]) -> "FAISSDocumentStore":
505530
def save(self, index_path: str | Path) -> None:
506531
"""
507532
Saves the index and documents to disk.
533+
534+
:raises DocumentStoreError: If the FAISS index is unexpectedly unavailable.
508535
"""
509536
path = Path(index_path)
510-
faiss.write_index(self.index, str(path.with_suffix(".faiss")))
537+
faiss.write_index(self._get_index_or_raise(), str(path.with_suffix(".faiss")))
511538

512539
# Save documents and ID mapping
513540
data = {
@@ -523,6 +550,8 @@ def save(self, index_path: str | Path) -> None:
523550
def load(self, index_path: str | Path) -> None:
524551
"""
525552
Loads the index and documents from disk.
553+
554+
:raises ValueError: If the `.faiss` file does not exist.
526555
"""
527556
path = Path(index_path)
528557
if not path.with_suffix(".faiss").exists():

0 commit comments

Comments
 (0)