Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions integrations/openrouter/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,13 @@ select = [
"ARG",
"B",
"C",
"D102", # Missing docstring in public method
"D103", # Missing docstring in public function
"D205", # 1 blank line required between summary line and description
"D209", # Closing triple quotes go to new line
"D213", # summary lines must be positioned on the second physical line of the docstring
"D417", # Missing argument descriptions in the docstring
"D419", # Docstring is empty
"DTZ",
"E",
"EM",
Expand Down Expand Up @@ -134,9 +141,9 @@ ban-relative-imports = "parents"

[tool.ruff.lint.per-file-ignores]
# Tests can use magic values, assertions, and relative imports
"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"]
"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"]
# Examples can print their output and don't need type annotations
"examples/**/*" = ["T201", "ANN"]
"examples/**/*" = ["D", "T201", "ANN"]

[tool.coverage.run]
source = ["haystack_integrations"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
class OpenRouterChatGenerator(OpenAIChatGenerator):
"""
Enables text generation using OpenRouter generative models.

For supported models, see [OpenRouter docs](https://openrouter.ai/models).

Users can pass any text generation parameters valid for the OpenRouter chat completion API
Expand Down Expand Up @@ -71,8 +72,7 @@ def __init__(
http_client_kwargs: dict[str, Any] | None = None,
) -> None:
"""
Creates an instance of OpenRouterChatGenerator. Unless specified otherwise,
the default model is `openai/gpt-5-mini`.
Creates an instance of OpenRouterChatGenerator.
Comment thread
julian-risch marked this conversation as resolved.

:param api_key:
The OpenRouter API key.
Expand Down
9 changes: 8 additions & 1 deletion integrations/opensearch/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,13 @@ select = [
"ARG",
"B",
"C",
"D102", # Missing docstring in public method
"D103", # Missing docstring in public function
"D205", # 1 blank line required between summary line and description
"D209", # Closing triple quotes go to new line
"D213", # summary lines must be positioned on the second physical line of the docstring
"D417", # Missing argument descriptions in the docstring
"D419", # Docstring is empty
"DTZ",
"E",
"EM",
Expand Down Expand Up @@ -145,7 +152,7 @@ ban-relative-imports = "parents"

[tool.ruff.lint.per-file-ignores]
# Tests can use magic values, assertions, and relative imports
"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"]
"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"]

[tool.coverage.run]
source = ["haystack_integrations"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,9 @@ def __init__(
**kwargs: Any,
) -> None:
"""
Initialize the OpenSearchHybridRetriever, a super component to retrieve documents from OpenSearch using
both embedding-based and keyword-based retrieval methods.
Initialize the OpenSearchHybridRetriever using both embedding-based and keyword-based retrieval methods.

This is a super component to retrieve documents from OpenSearch using both retrieval methods.

We don't explicitly define all the init parameters of the components in the constructor, for each
of the components, since that would be around 20+ parameters. Instead, we define the most important ones
Expand Down Expand Up @@ -242,7 +243,9 @@ def __init__(

if TYPE_CHECKING:

def warm_up(self) -> None: ...
def warm_up(self) -> None:
"""Warm up the underlying pipeline components."""
...

def run(
self,
Expand All @@ -251,7 +254,9 @@ def run(
filters_embedding: dict[str, Any] | None = None,
top_k_bm25: int | None = None,
top_k_embedding: int | None = None,
) -> dict[str, list[Document]]: ...
) -> dict[str, list[Document]]:
"""Run the hybrid retrieval pipeline and return retrieved documents."""
...

def _create_pipeline(self, data: dict[str, Any]) -> Pipeline:
"""
Expand Down Expand Up @@ -328,6 +333,7 @@ def to_dict(self) -> dict[str, Any]:

@classmethod
def from_dict(cls, data: dict[str, Any]) -> "OpenSearchHybridRetriever":
"""Deserialize an OpenSearchHybridRetriever from a dictionary."""
# deserialize the document store
doc_store = OpenSearchDocumentStore.from_dict(data["init_parameters"]["document_store"])
data["init_parameters"]["document_store"] = doc_store
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def _get_aws_session(
) -> "boto3.Session":
"""
Creates an AWS Session with the given parameters.

Checks if the provided AWS credentials are valid and can be used to connect to AWS.

:param aws_access_key_id: AWS access key ID.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,7 @@ async def write_documents_async(
def _deserialize_document(hit: dict[str, Any]) -> Document:
"""
Creates a Document from the search hit provided.

This is mostly useful in self.filter_documents().
"""
data = hit["_source"]
Expand Down Expand Up @@ -1482,6 +1483,7 @@ def _embedding_retrieval(
) -> list[Document]:
"""
Retrieves documents that are most similar to the query embedding using a vector similarity metric.

It uses the OpenSearch's Approximate k-Nearest Neighbors search algorithm.

This method is not meant to be part of the public interface of
Expand Down Expand Up @@ -1513,8 +1515,9 @@ async def _embedding_retrieval_async(
search_kwargs: dict[str, Any] | None = None,
) -> list[Document]:
"""
Asynchronously retrieves documents that are most similar to the query embedding using a vector similarity
metric. It uses the OpenSearch's Approximate k-Nearest Neighbors search algorithm.
Asynchronously retrieves documents most similar to the query embedding using a vector similarity metric.

It uses the OpenSearch's Approximate k-Nearest Neighbors search algorithm.

This method is not meant to be part of the public interface of
`OpenSearchDocumentStore` nor called directly.
Expand Down Expand Up @@ -1641,8 +1644,7 @@ def _extract_distinct_counts_from_aggregations(

def count_unique_metadata_by_filter(self, filters: dict[str, Any], metadata_fields: list[str]) -> dict[str, int]:
"""
Returns the number of unique values for each specified metadata field of the documents
that match the provided filters.
Returns the number of unique values for each specified metadata field of the documents that match the filters.

:param filters: The filters to apply to count documents.
For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
Expand Down Expand Up @@ -1685,8 +1687,7 @@ async def count_unique_metadata_by_filter_async(
self, filters: dict[str, Any], metadata_fields: list[str]
) -> dict[str, int]:
"""
Asynchronously returns the number of unique values for each specified metadata field of the documents
that match the provided filters.
Asynchronously returns the number of unique values for each specified metadata field matching the filters.

:param filters: The filters to apply to count documents.
For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
Expand Down Expand Up @@ -1862,6 +1863,7 @@ def get_metadata_field_unique_values(
) -> tuple[list[str], dict[str, Any] | None]:
"""
Returns unique values for a metadata field, optionally filtered by a search term in the content.

Uses composite aggregations for proper pagination beyond 10k results.

:param metadata_field: The metadata field to get unique values for.
Expand Down Expand Up @@ -1927,6 +1929,7 @@ async def get_metadata_field_unique_values_async(
) -> tuple[list[str], dict[str, Any] | None]:
"""
Asynchronously returns unique values for a metadata field, optionally filtered by a search term in the content.

Uses composite aggregations for proper pagination beyond 10k results.

:param metadata_field: The metadata field to get unique values for.
Expand Down
11 changes: 9 additions & 2 deletions integrations/optimum/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,13 @@ select = [
"ARG",
"B",
"C",
"D102", # Missing docstring in public method
"D103", # Missing docstring in public function
"D205", # 1 blank line required between summary line and description
"D209", # Closing triple quotes go to new line
"D213", # summary lines must be positioned on the second physical line of the docstring
"D417", # Missing argument descriptions in the docstring
"D419", # Docstring is empty
"DTZ",
"E",
"EM",
Expand Down Expand Up @@ -151,9 +158,9 @@ ban-relative-imports = "parents"

[tool.ruff.lint.per-file-ignores]
# Tests can use magic values, assertions, and relative imports
"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"]
"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"]
# Examples can print their output
"examples/**" = ["T201"]
"examples/**" = ["D", "T201"]
"tests/**" = ["T201"]

[tool.coverage.run]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@

class OptimumEmbedderOptimizationMode(Enum):
"""
[ONXX Optimization modes](https://huggingface.co/docs/optimum/onnxruntime/usage_guides/optimization)
support by the Optimum Embedders.
ONNX Optimization modes supported by the Optimum Embedders.

See [Optimum ONNX optimization docs](https://huggingface.co/docs/optimum/onnxruntime/usage_guides/optimization)
for more details.
"""

#: Basic general optimizations.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@
@component
class OptimumDocumentEmbedder:
"""
A component for computing `Document` embeddings using models loaded with the
[HuggingFace Optimum](https://huggingface.co/docs/optimum/index) library,
leveraging the ONNX runtime for high-speed inference.
A component for computing `Document` embeddings using models loaded with the HuggingFace Optimum library.

Uses the [HuggingFace Optimum](https://huggingface.co/docs/optimum/index) library and leverages the ONNX
runtime for high-speed inference.

The embedding of each Document is stored in the `embedding` field of the Document.

Expand Down Expand Up @@ -199,6 +200,7 @@ def _prepare_texts_to_embed(self, documents: list[Document]) -> list[str]:
def run(self, documents: list[Document]) -> dict[str, list[Document]]:
"""
Embed a list of Documents.

The embedding of each Document is stored in the `embedding` field of the Document.

:param documents:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@
@component
class OptimumTextEmbedder:
"""
A component to embed text using models loaded with the
[HuggingFace Optimum](https://huggingface.co/docs/optimum/index) library,
leveraging the ONNX runtime for high-speed inference.
A component to embed text using models loaded with the HuggingFace Optimum library.

Uses the [HuggingFace Optimum](https://huggingface.co/docs/optimum/index) library and leverages the ONNX
runtime for high-speed inference.

Usage example:
```python
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@

class OptimumEmbedderQuantizationMode(Enum):
"""
[Dynamic Quantization modes](https://huggingface.co/docs/optimum/onnxruntime/usage_guides/quantization)
support by the Optimum Embedders.
Dynamic Quantization modes supported by the Optimum Embedders.

See [Optimum ONNX quantization docs](https://huggingface.co/docs/optimum/onnxruntime/usage_guides/quantization)
for more details.
"""

#: Quantization for the ARM64 architecture.
Expand Down
9 changes: 8 additions & 1 deletion integrations/paddleocr/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,13 @@ select = [
"ARG",
"B",
"C",
"D102", # Missing docstring in public method
"D103", # Missing docstring in public function
"D205", # 1 blank line required between summary line and description
"D209", # Closing triple quotes go to new line
"D213", # summary lines must be positioned on the second physical line of the docstring
"D417", # Missing argument descriptions in the docstring
"D419", # Docstring is empty
"DTZ",
"E",
"EM",
Expand Down Expand Up @@ -137,7 +144,7 @@ ban-relative-imports = "parents"

[tool.ruff.lint.per-file-ignores]
# Tests can use magic values, assertions, and relative imports
"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"]
"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"]

[tool.coverage.run]
source = ["haystack_integrations"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,7 @@ def _normalize_file_type(file_type: FileTypeInput) -> FileType | None:
@component
class PaddleOCRVLDocumentConverter:
"""
This component extracts text from documents using PaddleOCR's large model
document parsing API.
Extracts text from documents using PaddleOCR's large model document parsing API.

PaddleOCR-VL is used behind the scenes. For more information, please
refer to:
Expand Down
11 changes: 9 additions & 2 deletions integrations/pgvector/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,13 @@ select = [
"ARG",
"B",
"C",
"D102", # Missing docstring in public method
"D103", # Missing docstring in public function
"D205", # 1 blank line required between summary line and description
"D209", # Closing triple quotes go to new line
"D213", # summary lines must be positioned on the second physical line of the docstring
"D417", # Missing argument descriptions in the docstring
"D419", # Docstring is empty
"DTZ",
"E",
"EM",
Expand Down Expand Up @@ -149,9 +156,9 @@ ban-relative-imports = "parents"

[tool.ruff.lint.per-file-ignores]
# Tests can use magic values, assertions, and relative imports
"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"]
"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"]
# examples can contain "print" commands
"examples/**/*" = ["T201"]
"examples/**/*" = ["D", "T201"]


[tool.coverage.run]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ def __init__(
filter_policy: str | FilterPolicy = FilterPolicy.REPLACE,
) -> None:
"""
Initialize the PgvectorEmbeddingRetriever.

:param document_store: An instance of `PgvectorDocumentStore`.
:param filters: Filters applied to the retrieved Documents.
:param top_k: Maximum number of Documents to return.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ def __init__(
filter_policy: str | FilterPolicy = FilterPolicy.REPLACE,
) -> None:
"""
Initialize the PgvectorKeywordRetriever.

:param document_store: An instance of `PgvectorDocumentStore`.
:param filters: Filters applied to the retrieved Documents.
:param top_k: Maximum number of Documents to return.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@

def _from_haystack_to_pg_documents(documents: list[Document]) -> list[dict[str, Any]]:
"""
Internal method to convert a list of Haystack Documents to a list of dictionaries that can be used to insert
documents into the PgvectorDocumentStore.
Internal method to convert a list of Haystack Documents to a list of dictionaries.

The resulting dictionaries can be used to insert documents into the PgvectorDocumentStore.
"""

db_documents = []
Expand Down
Loading
Loading