Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion integrations/deepeval/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,13 @@ select = [
"ARG",
"B",
"C",
"D102", # Missing docstring in public method
"D103", # Missing docstring in public function
"D205", # 1 blank line required between summary line and description
"D209", # Closing triple quotes go to new line
"D213", # summary lines must be positioned on the second physical line of the docstring
"D417", # Missing argument descriptions in the docstring
"D419", # Docstring is empty
"DTZ",
"E",
"EM",
Expand Down Expand Up @@ -142,7 +149,7 @@ ban-relative-imports = "all"

[tool.ruff.lint.per-file-ignores]
# Tests can use magic values, assertions, and relative imports
"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"]
"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"]

[tool.coverage.run]
source = ["haystack_integrations"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@
@component
class DeepEvalEvaluator:
"""
A component that uses the [DeepEval framework](https://docs.confident-ai.com/docs/evaluation-introduction)
to evaluate inputs against a specific metric. Supported metrics are defined by `DeepEvalMetric`.
A component that uses DeepEval to evaluate inputs against a specific metric.

Uses the [DeepEval framework](https://docs.confident-ai.com/docs/evaluation-introduction).
Supported metrics are defined by `DeepEvalMetric`.

Usage example:
```python
Expand Down
9 changes: 8 additions & 1 deletion integrations/elasticsearch/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,13 @@ select = [
"ARG",
"B",
"C",
"D102", # Missing docstring in public method
"D103", # Missing docstring in public function
"D205", # 1 blank line required between summary line and description
"D209", # Closing triple quotes go to new line
"D213", # summary lines must be positioned on the second physical line of the docstring
"D417", # Missing argument descriptions in the docstring
"D419", # Docstring is empty
"DTZ",
"E",
"EM",
Expand Down Expand Up @@ -139,7 +146,7 @@ ban-relative-imports = "parents"

[tool.ruff.lint.per-file-ignores]
# Tests can use magic values, assertions, relative imports, and don't need type annotations
"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"]
"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"]

[tool.coverage.run]
source = ["haystack_integrations"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@
@component
class ElasticsearchBM25Retriever:
"""
ElasticsearchBM25Retriever retrieves documents from the ElasticsearchDocumentStore using BM25 algorithm to find the
most similar documents to a user's query.
Retrieves documents from ElasticsearchDocumentStore using the BM25 algorithm.

Finds the most similar documents to a user's query.

This retriever is only compatible with ElasticsearchDocumentStore.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,7 @@

class ElasticsearchDocumentStore:
"""
An ElasticsearchDocumentStore instance that works with Elastic Cloud or your own
Elasticsearch cluster.
An ElasticsearchDocumentStore instance that works with Elastic Cloud or your own Elasticsearch cluster.

Usage example (Elastic Cloud):
```python
Expand Down Expand Up @@ -309,6 +308,7 @@ def count_documents(self) -> int:
async def count_documents_async(self) -> int:
"""
Asynchronously returns how many documents are present in the document store.

:returns: Number of documents in the document store.
"""
self._ensure_initialized()
Expand Down Expand Up @@ -407,7 +407,9 @@ async def filter_documents_async(self, filters: dict[str, Any] | None = None) ->
def _deserialize_document(hit: dict[str, Any]) -> Document:
"""
Creates a `Document` from the search hit provided.

This is mostly useful in self.filter_documents().

:param hit: A search hit from Elasticsearch.
:returns: `Document` created from the search hit.
"""
Expand Down Expand Up @@ -1136,8 +1138,7 @@ def _extract_distinct_counts_from_aggregations(

def count_unique_metadata_by_filter(self, filters: dict[str, Any], metadata_fields: list[str]) -> dict[str, int]:
"""
Returns the number of unique values for each specified metadata field of the documents
that match the provided filters.
Returns the number of unique values for each specified metadata field that match the provided filters.

:param filters: The filters to apply to count documents.
For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
Expand Down Expand Up @@ -1180,8 +1181,7 @@ async def count_unique_metadata_by_filter_async(
self, filters: dict[str, Any], metadata_fields: list[str]
) -> dict[str, int]:
"""
Asynchronously returns the number of unique values for each specified metadata field of the documents
that match the provided filters.
Asynchronously returns unique value counts for each specified metadata field matching the provided filters.

:param filters: The filters to apply to count documents.
For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
Expand Down Expand Up @@ -1352,6 +1352,7 @@ def get_metadata_field_unique_values(
) -> tuple[list[str], dict[str, Any] | None]:
"""
Returns unique values for a metadata field, optionally filtered by a search term in the content.

Uses composite aggregations for proper pagination beyond 10k results.

See: https://www.elastic.co/docs/reference/aggregations/search-aggregations-bucket-composite-aggregation
Expand Down Expand Up @@ -1418,6 +1419,7 @@ async def get_metadata_field_unique_values_async(
) -> tuple[list[str], dict[str, Any] | None]:
"""
Asynchronously returns unique values for a metadata field, optionally filtered by a search term in the content.

Uses composite aggregations for proper pagination beyond 10k results.

See: https://www.elastic.co/docs/reference/aggregations/search-aggregations-bucket-composite-aggregation
Expand Down
9 changes: 8 additions & 1 deletion integrations/faiss/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,13 @@ select = [
"ARG",
"B",
"C",
"D102", # Missing docstring in public method
"D103", # Missing docstring in public function
"D205", # 1 blank line required between summary line and description
"D209", # Closing triple quotes go to new line
"D213", # summary lines must be positioned on the second physical line of the docstring
"D417", # Missing argument descriptions in the docstring
"D419", # Docstring is empty
"DTZ",
"E",
"EM",
Expand Down Expand Up @@ -149,7 +156,7 @@ ban-relative-imports = "parents"

[tool.ruff.lint.per-file-ignores]
# Tests can use magic values, assertions, and relative imports
"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"]
"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"]
"example/**/*" = ["T201"]

[tool.coverage.run]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ def __init__(
filter_policy: str | FilterPolicy = FilterPolicy.REPLACE,
) -> None:
"""
Initialize FAISSEmbeddingRetriever.

:param document_store: An instance of `FAISSDocumentStore`.
:param filters: Filters applied to the retrieved Documents at initialisation time. At runtime, these are merged
with any runtime filters according to the `filter_policy`.
Expand Down
9 changes: 8 additions & 1 deletion integrations/fastembed/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,13 @@ select = [
"ARG",
"B",
"C",
"D102", # Missing docstring in public method
"D103", # Missing docstring in public function
"D205", # 1 blank line required between summary line and description
"D209", # Closing triple quotes go to new line
"D213", # summary lines must be positioned on the second physical line of the docstring
"D417", # Missing argument descriptions in the docstring
"D419", # Docstring is empty
"DTZ",
"E",
"EM",
Expand Down Expand Up @@ -134,7 +141,7 @@ ban-relative-imports = "parents"

[tool.ruff.lint.per-file-ignores]
# Tests can use magic values, assertions, and relative imports
"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"]
"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"]
# examples can contain "print" commands
"examples/**/*" = ["T201", "E501"]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
class FastembedDocumentEmbedder:
"""
FastembedDocumentEmbedder computes Document embeddings using Fastembed embedding models.

The embedding of each Document is stored in the `embedding` field of the Document.

Usage example:
Expand Down Expand Up @@ -110,6 +111,7 @@ def __init__(
def to_dict(self) -> dict[str, Any]:
"""
Serializes the component to a dictionary.

:returns:
Dictionary with serialized data.
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ def __init__(
def to_dict(self) -> dict[str, Any]:
"""
Serializes the component to a dictionary.

:returns:
Dictionary with serialized data.
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@
@component
class FastembedRanker:
"""
Ranks Documents based on their similarity to the query using
[Fastembed models](https://qdrant.github.io/fastembed/examples/Supported_Models/).
Ranks Documents based on their similarity to the query using Fastembed models.

See https://qdrant.github.io/fastembed/examples/Supported_Models/ for supported models.

Documents are indexed from most to least semantically relevant to the query.

Expand Down Expand Up @@ -129,6 +130,7 @@ def warm_up(self) -> None:
def _prepare_fastembed_input_docs(self, documents: list[Document]) -> list[str]:
"""
Prepare the input by concatenating the document text with the metadata fields specified.

:param documents: The list of Document objects.

:return: A list of strings to be given as input to Fastembed model.
Expand Down
9 changes: 8 additions & 1 deletion integrations/firecrawl/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,13 @@ select = [
"ARG",
"B",
"C",
"D102", # Missing docstring in public method
"D103", # Missing docstring in public function
"D205", # 1 blank line required between summary line and description
"D209", # Closing triple quotes go to new line
"D213", # summary lines must be positioned on the second physical line of the docstring
"D417", # Missing argument descriptions in the docstring
"D419", # Docstring is empty
"DTZ",
"E",
"EM",
Expand Down Expand Up @@ -147,7 +154,7 @@ ban-relative-imports = "parents"

[tool.ruff.lint.per-file-ignores]
# Tests can use magic values, assertions, and relative imports
"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"]
"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"]

[tool.coverage.run]
source = ["haystack_integrations"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ async def run_async(
def warm_up(self) -> None:
"""
Warm up the Firecrawl client by initializing the clients.

This is useful to avoid cold start delays when crawling many URLs.
"""
if self._firecrawl_client is None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def __init__(
def warm_up(self) -> None:
"""
Warm up the Firecrawl clients by initializing the sync and async clients.

This is useful to avoid cold start delays when performing searches.
"""
if self._firecrawl_client is None:
Expand Down
9 changes: 8 additions & 1 deletion integrations/github/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,13 @@ select = [
"ARG",
"B",
"C",
"D102", # Missing docstring in public method
"D103", # Missing docstring in public function
"D205", # 1 blank line required between summary line and description
"D209", # Closing triple quotes go to new line
"D213", # summary lines must be positioned on the second physical line of the docstring
"D417", # Missing argument descriptions in the docstring
"D419", # Docstring is empty
"DTZ",
"E",
"EM",
Expand Down Expand Up @@ -135,7 +142,7 @@ ban-relative-imports = "parents"

[tool.ruff.lint.per-file-ignores]
# Tests can use magic values, assertions, and relative imports
"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"]
"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"]
# Ignore RUF001 for all files in the prompts directory
"src/haystack_integrations/components/prompts/**/*" = ["RUF001"]

Expand Down
9 changes: 8 additions & 1 deletion integrations/google_genai/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,13 @@ select = [
"ARG",
"B",
"C",
"D102", # Missing docstring in public method
"D103", # Missing docstring in public function
"D205", # 1 blank line required between summary line and description
"D209", # Closing triple quotes go to new line
"D213", # summary lines must be positioned on the second physical line of the docstring
"D417", # Missing argument descriptions in the docstring
"D419", # Docstring is empty
"DTZ",
"E",
"EM",
Expand Down Expand Up @@ -151,7 +158,7 @@ ban-relative-imports = "parents"

[tool.ruff.lint.per-file-ignores]
# Tests can use magic values, assertions, and relative imports
"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"]
"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"]
# Examples can use print statements
"examples/**/*" = ["T201"]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,7 @@ def _handle_streaming_response(
) -> dict[str, list[ChatMessage]]:
"""
Handle streaming response from Google Gen AI generate_content_stream.

:param response_stream: The streaming response from generate_content_stream.
:param streaming_callback: The callback function for streaming chunks.
:returns: A dictionary with the replies.
Expand Down Expand Up @@ -346,6 +347,7 @@ async def _handle_streaming_response_async(
) -> dict[str, list[ChatMessage]]:
"""
Handle async streaming response from Google Gen AI generate_content_stream.

:param response_stream: The async streaming response from generate_content_stream.
:param streaming_callback: The async callback function for streaming chunks.
:returns: A dictionary with the replies.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,9 @@

def _process_response_format(generation_kwargs: dict[str, Any]) -> dict[str, Any]:
"""
Process `response_format` from generation_kwargs into Google GenAI's native
`response_schema` and `response_mime_type` parameters.
Process `response_format` from generation_kwargs into Google GenAI native parameters.

Converts `response_format` into `response_schema` and `response_mime_type`.

Accepts either a Pydantic BaseModel class or a JSON schema dict. When
`response_format` is present, it is popped and replaced with the two
Expand Down Expand Up @@ -181,7 +182,6 @@ def remove_key_from_schema(
"""
Recursively traverse a schema and remove all occurrences of the target key.


:param schema: The schema dictionary/list/value to process
:param target_key: The key to remove from all dictionaries in the schema

Expand Down Expand Up @@ -441,7 +441,8 @@ def _convert_tools_to_google_genai_format(tools: ToolsType) -> list[types.Tool]:
def _convert_usage_metadata_to_serializable(
usage_metadata: UsageMetadata | GenerateContentResponseUsageMetadata | None,
) -> dict[str, Any]:
"""Build a JSON-serializable usage dict from a UsageMetadata object.
"""
Build a JSON-serializable usage dict from a UsageMetadata object.

Iterates over known UsageMetadata attribute names and adds each non-None value
in serialized form. Full list of fields: https://ai.google.dev/api/generate-content#UsageMetadata
Expand Down
9 changes: 8 additions & 1 deletion integrations/hanlp/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,13 @@ select = [
"ARG",
"B",
"C",
"D102", # Missing docstring in public method
"D103", # Missing docstring in public function
"D205", # 1 blank line required between summary line and description
"D209", # Closing triple quotes go to new line
"D213", # summary lines must be positioned on the second physical line of the docstring
"D417", # Missing argument descriptions in the docstring
"D419", # Docstring is empty
"DTZ",
"E",
"EM",
Expand Down Expand Up @@ -152,7 +159,7 @@ ban-relative-imports = "parents"

[tool.ruff.lint.per-file-ignores]
# Tests can use magic values, assertions, and relative imports
"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"]
"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"]

[tool.coverage.run]
source = ["haystack_integrations"]
Expand Down
Loading
Loading