Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
daf3f10
adding FAISS github workflow
davidsbatista Feb 26, 2026
09e553f
adding win and mac to matrix
davidsbatista Feb 26, 2026
206a50f
reformatting files
davidsbatista Feb 26, 2026
4a28098
adding py.typed
davidsbatista Feb 26, 2026
7d18125
adding safeguard to check for index before operations that need the i…
davidsbatista Feb 26, 2026
ff58aba
pinning numpy
davidsbatista Feb 26, 2026
835abec
pinning numpy
davidsbatista Feb 26, 2026
76e7ca9
pinning numpy
davidsbatista Feb 26, 2026
53a8df1
removing numpy, making dependent on faiss-cpu
davidsbatista Feb 26, 2026
64cd717
trying to fix lowest direct dependencies run
davidsbatista Feb 26, 2026
b9ea99c
temporary disable lowest direct dependencies run
davidsbatista Feb 26, 2026
c3472c2
Merge branch 'main' into feat/adding-workflow-for-FAISS
davidsbatista Feb 27, 2026
b3489e4
Merge branch 'main' into feat/adding-workflow-for-FAISS
davidsbatista Feb 27, 2026
c31d2f8
debugging: unit tests with lowest direct dependencies
davidsbatista Feb 27, 2026
a0756ae
debugging: unit tests with lowest direct dependencies
davidsbatista Feb 27, 2026
7c1eb85
Merge branch 'main' into feat/adding-workflow-for-FAISS
davidsbatista Feb 27, 2026
f4e6c9d
debugging: unit tests with lowest direct dependencies
davidsbatista Feb 27, 2026
1c8d4b5
debugging: unit tests with lowest direct dependencies
davidsbatista Feb 27, 2026
c088a48
debugging: unit tests with lowest direct dependencies - pinning virtu…
davidsbatista Feb 27, 2026
c003271
debugging: unit tests with lowest direct dependencies - pinning virtu…
davidsbatista Feb 27, 2026
421896b
adding execptions to docstrings
davidsbatista Feb 27, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 76 additions & 0 deletions .github/workflows/faiss.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# This workflow comes from https://github.com/ofek/hatch-mypyc
# https://github.com/ofek/hatch-mypyc/blob/5a198c0ba8660494d02716cfc9d79ce4adfb1442/.github/workflows/test.yml
name: Test / faiss

on:
schedule:
- cron: "0 0 * * *"
pull_request:
paths:
- "integrations/faiss/**"
- "!integrations/faiss/*.md"
- ".github/workflows/faiss.yml"

concurrency:
group: faiss-${{ github.head_ref }}
cancel-in-progress: true

env:
PYTHONUNBUFFERED: "1"
FORCE_COLOR: "1"

defaults:
run:
working-directory: integrations/faiss

jobs:
run:
name: Python ${{ matrix.python-version }} on ${{ startsWith(matrix.os, 'macos-') && 'macOS' || startsWith(matrix.os, 'windows-') && 'Windows' || 'Linux' }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
# FAISS wheels are most reliable on Linux in CI.
os: [ubuntu-latest] #[ubuntu-latest, windows-latest, macos-latest]
python-version: ["3.10", "3.13"]

steps:
- uses: actions/checkout@v6

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}

- name: Install Hatch
run: pip install hatch "virtualenv<21.0.0"

- name: Lint
if: matrix.python-version == '3.10' && runner.os == 'Linux'
run: hatch run fmt-check && hatch run test:types

- name: Run tests
run: hatch run test:cov-retry

- name: Run unit tests with lowest direct dependencies
if: matrix.python-version == '3.10' && runner.os == 'Linux'
run: |
hatch env prune
hatch run uv pip compile pyproject.toml --resolution lowest-direct --output-file requirements_lowest_direct.txt
hatch -e test env run -- uv pip install -r requirements_lowest_direct.txt
hatch run test:unit

- name: Nightly - run unit tests with Haystack main branch
if: github.event_name == 'schedule'
run: |
hatch env prune
hatch -e test env run -- uv pip install git+https://github.com/deepset-ai/haystack.git@main
hatch run test:unit

- name: Send event to Datadog for nightly failures
if: failure() && github.event_name == 'schedule'
uses: ./.github/actions/send_failure
with:
title: |
Core integrations nightly tests failure: ${{ github.workflow }}
api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
2 changes: 1 addition & 1 deletion integrations/faiss/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ classifiers = [
dependencies = [
"haystack-ai>=2.24.0",
"faiss-cpu>=1.8.0",
"numpy",
"numpy>=1.22,<2; python_version < '3.13'",
Comment thread
julian-risch marked this conversation as resolved.
]

[project.urls]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ class FAISSEmbeddingRetriever:

assert res["retriever"]["documents"][0].content == "There are over 7,000 languages spoken around the world today."
```
""" # noqa: E501
""" # noqa: E501

def __init__(
self,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pathlib import Path
from typing import Any

import faiss
import faiss # type: ignore[import-untyped]
import numpy as np
from haystack import default_from_dict, default_to_dict
from haystack.dataclasses import Document
Expand Down Expand Up @@ -40,6 +40,8 @@ def __init__(
:param index_path: Path to save/load the index and documents. If None, the store is in-memory only.
:param index_string: The FAISS index factory string. Default is "Flat".
:param embedding_dim: The dimension of the embeddings. Default is 768.
:raises DocumentStoreError: If the FAISS index cannot be initialized.
:raises ValueError: If `index_path` points to a missing `.faiss` file when loading persisted data.
"""
self.index_path = index_path
self.embedding_dim = embedding_dim
Expand Down Expand Up @@ -68,6 +70,13 @@ def _create_new_index(self):
msg = f"Could not create FAISS index with factory string '{self.index_string}': {e}"
raise DocumentStoreError(msg) from e

def _get_index_or_raise(self) -> Any:
"""Return the FAISS index or raise if it is unexpectedly missing."""
if self.index is None:
msg = "FAISS index has not been initialized."
raise DocumentStoreError(msg)
return self.index

def count_documents(self) -> int:
"""
Returns the number of documents in the store.
Expand All @@ -80,6 +89,7 @@ def filter_documents(self, filters: dict[str, Any] | None = None) -> list[Docume

:param filters: A dictionary of filters to apply.
:return: A list of matching Documents.
:raises FilterError: If the filter structure is invalid.
"""
if not filters:
return list(self.documents.values())
Expand Down Expand Up @@ -120,6 +130,9 @@ def write_documents(self, documents: list[Document], policy: DuplicatePolicy = D
:param documents: The list of documents to write.
:param policy: The policy to handle duplicate documents.
:return: The number of documents written.
:raises ValueError: If `documents` is not an iterable of `Document` objects.
:raises DuplicateDocumentError: If a duplicate document is found and `policy` is `DuplicatePolicy.FAIL`.
:raises DocumentStoreError: If the FAISS index is unexpectedly unavailable when adding embeddings.
"""
if not isinstance(documents, Iterable) or isinstance(documents, (str, bytes)):
msg = "param 'documents' must contain an iterable of objects of type Document."
Expand Down Expand Up @@ -175,13 +188,16 @@ def write_documents(self, documents: list[Document], policy: DuplicatePolicy = D
if vectors_to_add:
vectors = np.array(vectors_to_add, dtype="float32")
ids = np.array(ids_to_add_to_index, dtype="int64")
self.index.add_with_ids(vectors, ids)
index = self._get_index_or_raise()
index.add_with_ids(vectors, ids)

return docs_written

def delete_documents(self, document_ids: list[str]) -> None:
"""
Deletes documents from the store.

:raises DocumentStoreError: If the FAISS index is unexpectedly unavailable when removing embeddings.
"""
if not document_ids:
return
Expand All @@ -197,9 +213,10 @@ def delete_documents(self, document_ids: list[str]) -> None:
del self.id_map[int_id]
ids_to_remove_from_index.append(int_id)

if ids_to_remove_from_index and self.index.ntotal > 0:
index = self._get_index_or_raise()
if ids_to_remove_from_index and index.ntotal > 0:
ids_array = np.array(ids_to_remove_from_index, dtype="int64")
self.index.remove_ids(ids_array)
index.remove_ids(ids_array)

def delete_all_documents(self) -> None:
"""
Expand All @@ -221,6 +238,7 @@ def search(
:param top_k: The number of results to return.
:param filters: Filters to apply.
:return: A list of matching Documents.
:raises FilterError: If the filter structure is invalid.
"""
if not self.index or self.index.ntotal == 0:
return []
Expand Down Expand Up @@ -301,6 +319,9 @@ def _check_condition(self, doc: Document, condition: dict[str, Any]) -> bool:
msg = "Missing 'field' in filter condition"
raise FilterError(msg)
field = condition.get("field")
if not isinstance(field, str):
msg = "'field' in filter condition must be a string"
raise FilterError(msg)
if "value" not in condition:
msg = "Missing 'value' in filter condition"
raise FilterError(msg)
Expand Down Expand Up @@ -370,6 +391,8 @@ def delete_by_filter(self, filters: dict[str, Any]) -> int:

:param filters: A dictionary of filters to apply to find documents to delete.
:returns: The number of documents deleted.
:raises FilterError: If the filter structure is invalid.
:raises DocumentStoreError: If the FAISS index is unexpectedly unavailable when removing embeddings.
"""
docs_to_delete = self.filter_documents(filters)
ids = [doc.id for doc in docs_to_delete]
Expand All @@ -382,6 +405,7 @@ def count_documents_by_filter(self, filters: dict[str, Any]) -> int:

:param filters: A dictionary of filters to apply.
:returns: The number of matching documents.
:raises FilterError: If the filter structure is invalid.
"""
return len(self.filter_documents(filters))

Expand All @@ -395,6 +419,7 @@ def update_by_filter(self, filters: dict[str, Any], meta: dict[str, Any]) -> int
:param filters: A dictionary of filters to apply to find documents to update.
:param meta: A dictionary of metadata key-value pairs to update in the matching documents.
:returns: The number of documents updated.
:raises FilterError: If the filter structure is invalid.
"""
docs_to_update = self.filter_documents(filters)
for doc in docs_to_update:
Expand Down Expand Up @@ -505,9 +530,11 @@ def from_dict(cls, data: dict[str, Any]) -> "FAISSDocumentStore":
def save(self, index_path: str | Path) -> None:
"""
Saves the index and documents to disk.

:raises DocumentStoreError: If the FAISS index is unexpectedly unavailable.
"""
path = Path(index_path)
faiss.write_index(self.index, str(path.with_suffix(".faiss")))
faiss.write_index(self._get_index_or_raise(), str(path.with_suffix(".faiss")))

# Save documents and ID mapping
data = {
Expand All @@ -523,6 +550,8 @@ def save(self, index_path: str | Path) -> None:
def load(self, index_path: str | Path) -> None:
"""
Loads the index and documents from disk.

:raises ValueError: If the `.faiss` file does not exist.
"""
path = Path(index_path)
if not path.with_suffix(".faiss").exists():
Expand Down
Loading