diff --git a/integrations/deepeval/pyproject.toml b/integrations/deepeval/pyproject.toml index c657403160..352ae62d5e 100644 --- a/integrations/deepeval/pyproject.toml +++ b/integrations/deepeval/pyproject.toml @@ -87,6 +87,13 @@ select = [ "ARG", "B", "C", + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D205", # 1 blank line required between summary line and description + "D209", # Closing triple quotes go to new line + "D213", # summary lines must be positioned on the second physical line of the docstring + "D417", # Missing argument descriptions in the docstring + "D419", # Docstring is empty "DTZ", "E", "EM", @@ -142,7 +149,7 @@ ban-relative-imports = "all" [tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, and relative imports -"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"] +"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"] [tool.coverage.run] source = ["haystack_integrations"] diff --git a/integrations/deepeval/src/haystack_integrations/components/evaluators/deepeval/evaluator.py b/integrations/deepeval/src/haystack_integrations/components/evaluators/deepeval/evaluator.py index 9f25d642f9..84bd676857 100644 --- a/integrations/deepeval/src/haystack_integrations/components/evaluators/deepeval/evaluator.py +++ b/integrations/deepeval/src/haystack_integrations/components/evaluators/deepeval/evaluator.py @@ -19,8 +19,10 @@ @component class DeepEvalEvaluator: """ - A component that uses the [DeepEval framework](https://docs.confident-ai.com/docs/evaluation-introduction) - to evaluate inputs against a specific metric. Supported metrics are defined by `DeepEvalMetric`. + A component that uses DeepEval to evaluate inputs against a specific metric. + + Uses the [DeepEval framework](https://docs.confident-ai.com/docs/evaluation-introduction). + Supported metrics are defined by `DeepEvalMetric`. Usage example: ```python diff --git a/integrations/elasticsearch/pyproject.toml b/integrations/elasticsearch/pyproject.toml index c5280d4bc8..1d3a2ec1e2 100644 --- a/integrations/elasticsearch/pyproject.toml +++ b/integrations/elasticsearch/pyproject.toml @@ -90,6 +90,13 @@ select = [ "ARG", "B", "C", + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D205", # 1 blank line required between summary line and description + "D209", # Closing triple quotes go to new line + "D213", # summary lines must be positioned on the second physical line of the docstring + "D417", # Missing argument descriptions in the docstring + "D419", # Docstring is empty "DTZ", "E", "EM", @@ -139,7 +146,7 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, relative imports, and don't need type annotations -"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"] +"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"] [tool.coverage.run] source = ["haystack_integrations"] diff --git a/integrations/elasticsearch/src/haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py b/integrations/elasticsearch/src/haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py index 534bf393db..45942c3bbb 100644 --- a/integrations/elasticsearch/src/haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py +++ b/integrations/elasticsearch/src/haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py @@ -14,8 +14,9 @@ @component class ElasticsearchBM25Retriever: """ - ElasticsearchBM25Retriever retrieves documents from the ElasticsearchDocumentStore using BM25 algorithm to find the - most similar documents to a user's query. + Retrieves documents from ElasticsearchDocumentStore using the BM25 algorithm. + + Finds the most similar documents to a user's query. This retriever is only compatible with ElasticsearchDocumentStore. diff --git a/integrations/elasticsearch/src/haystack_integrations/document_stores/elasticsearch/document_store.py b/integrations/elasticsearch/src/haystack_integrations/document_stores/elasticsearch/document_store.py index f8691ca8bd..1fcf4819fa 100644 --- a/integrations/elasticsearch/src/haystack_integrations/document_stores/elasticsearch/document_store.py +++ b/integrations/elasticsearch/src/haystack_integrations/document_stores/elasticsearch/document_store.py @@ -50,8 +50,7 @@ class ElasticsearchDocumentStore: """ - An ElasticsearchDocumentStore instance that works with Elastic Cloud or your own - Elasticsearch cluster. + An ElasticsearchDocumentStore instance that works with Elastic Cloud or your own Elasticsearch cluster. Usage example (Elastic Cloud): ```python @@ -309,6 +308,7 @@ def count_documents(self) -> int: async def count_documents_async(self) -> int: """ Asynchronously returns how many documents are present in the document store. + :returns: Number of documents in the document store. """ self._ensure_initialized() @@ -407,7 +407,9 @@ async def filter_documents_async(self, filters: dict[str, Any] | None = None) -> def _deserialize_document(hit: dict[str, Any]) -> Document: """ Creates a `Document` from the search hit provided. + This is mostly useful in self.filter_documents(). + :param hit: A search hit from Elasticsearch. :returns: `Document` created from the search hit. """ @@ -1136,8 +1138,7 @@ def _extract_distinct_counts_from_aggregations( def count_unique_metadata_by_filter(self, filters: dict[str, Any], metadata_fields: list[str]) -> dict[str, int]: """ - Returns the number of unique values for each specified metadata field of the documents - that match the provided filters. + Returns the number of unique values for each specified metadata field that match the provided filters. :param filters: The filters to apply to count documents. For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering) @@ -1180,8 +1181,7 @@ async def count_unique_metadata_by_filter_async( self, filters: dict[str, Any], metadata_fields: list[str] ) -> dict[str, int]: """ - Asynchronously returns the number of unique values for each specified metadata field of the documents - that match the provided filters. + Asynchronously returns unique value counts for each specified metadata field matching the provided filters. :param filters: The filters to apply to count documents. For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering) @@ -1352,6 +1352,7 @@ def get_metadata_field_unique_values( ) -> tuple[list[str], dict[str, Any] | None]: """ Returns unique values for a metadata field, optionally filtered by a search term in the content. + Uses composite aggregations for proper pagination beyond 10k results. See: https://www.elastic.co/docs/reference/aggregations/search-aggregations-bucket-composite-aggregation @@ -1418,6 +1419,7 @@ async def get_metadata_field_unique_values_async( ) -> tuple[list[str], dict[str, Any] | None]: """ Asynchronously returns unique values for a metadata field, optionally filtered by a search term in the content. + Uses composite aggregations for proper pagination beyond 10k results. See: https://www.elastic.co/docs/reference/aggregations/search-aggregations-bucket-composite-aggregation diff --git a/integrations/faiss/pyproject.toml b/integrations/faiss/pyproject.toml index ba036365af..6f99407e37 100644 --- a/integrations/faiss/pyproject.toml +++ b/integrations/faiss/pyproject.toml @@ -95,6 +95,13 @@ select = [ "ARG", "B", "C", + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D205", # 1 blank line required between summary line and description + "D209", # Closing triple quotes go to new line + "D213", # summary lines must be positioned on the second physical line of the docstring + "D417", # Missing argument descriptions in the docstring + "D419", # Docstring is empty "DTZ", "E", "EM", @@ -149,7 +156,7 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, and relative imports -"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"] +"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"] "example/**/*" = ["T201"] [tool.coverage.run] diff --git a/integrations/faiss/src/haystack_integrations/components/retrievers/faiss/embedding_retriever.py b/integrations/faiss/src/haystack_integrations/components/retrievers/faiss/embedding_retriever.py index 7637e9091a..8d7e3daaf6 100644 --- a/integrations/faiss/src/haystack_integrations/components/retrievers/faiss/embedding_retriever.py +++ b/integrations/faiss/src/haystack_integrations/components/retrievers/faiss/embedding_retriever.py @@ -61,6 +61,8 @@ def __init__( filter_policy: str | FilterPolicy = FilterPolicy.REPLACE, ) -> None: """ + Initialize FAISSEmbeddingRetriever. + :param document_store: An instance of `FAISSDocumentStore`. :param filters: Filters applied to the retrieved Documents at initialisation time. At runtime, these are merged with any runtime filters according to the `filter_policy`. diff --git a/integrations/fastembed/pyproject.toml b/integrations/fastembed/pyproject.toml index 01fad41085..c457328fe2 100644 --- a/integrations/fastembed/pyproject.toml +++ b/integrations/fastembed/pyproject.toml @@ -83,6 +83,13 @@ select = [ "ARG", "B", "C", + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D205", # 1 blank line required between summary line and description + "D209", # Closing triple quotes go to new line + "D213", # summary lines must be positioned on the second physical line of the docstring + "D417", # Missing argument descriptions in the docstring + "D419", # Docstring is empty "DTZ", "E", "EM", @@ -134,7 +141,7 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, and relative imports -"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"] +"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"] # examples can contain "print" commands "examples/**/*" = ["T201", "E501"] diff --git a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_document_embedder.py b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_document_embedder.py index b2c28918dc..3263ec884b 100644 --- a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_document_embedder.py +++ b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_document_embedder.py @@ -14,6 +14,7 @@ class FastembedDocumentEmbedder: """ FastembedDocumentEmbedder computes Document embeddings using Fastembed embedding models. + The embedding of each Document is stored in the `embedding` field of the Document. Usage example: @@ -110,6 +111,7 @@ def __init__( def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. + :returns: Dictionary with serialized data. """ diff --git a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_document_embedder.py b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_document_embedder.py index 5fda8ffbbe..61bddad864 100644 --- a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_document_embedder.py +++ b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_document_embedder.py @@ -106,6 +106,7 @@ def __init__( def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. + :returns: Dictionary with serialized data. """ diff --git a/integrations/fastembed/src/haystack_integrations/components/rankers/fastembed/ranker.py b/integrations/fastembed/src/haystack_integrations/components/rankers/fastembed/ranker.py index efb6ac89b3..de5d63422a 100644 --- a/integrations/fastembed/src/haystack_integrations/components/rankers/fastembed/ranker.py +++ b/integrations/fastembed/src/haystack_integrations/components/rankers/fastembed/ranker.py @@ -14,8 +14,9 @@ @component class FastembedRanker: """ - Ranks Documents based on their similarity to the query using - [Fastembed models](https://qdrant.github.io/fastembed/examples/Supported_Models/). + Ranks Documents based on their similarity to the query using Fastembed models. + + See https://qdrant.github.io/fastembed/examples/Supported_Models/ for supported models. Documents are indexed from most to least semantically relevant to the query. @@ -129,6 +130,7 @@ def warm_up(self) -> None: def _prepare_fastembed_input_docs(self, documents: list[Document]) -> list[str]: """ Prepare the input by concatenating the document text with the metadata fields specified. + :param documents: The list of Document objects. :return: A list of strings to be given as input to Fastembed model. diff --git a/integrations/firecrawl/pyproject.toml b/integrations/firecrawl/pyproject.toml index 5c9cb26719..0b75c56cea 100644 --- a/integrations/firecrawl/pyproject.toml +++ b/integrations/firecrawl/pyproject.toml @@ -93,6 +93,13 @@ select = [ "ARG", "B", "C", + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D205", # 1 blank line required between summary line and description + "D209", # Closing triple quotes go to new line + "D213", # summary lines must be positioned on the second physical line of the docstring + "D417", # Missing argument descriptions in the docstring + "D419", # Docstring is empty "DTZ", "E", "EM", @@ -147,7 +154,7 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, and relative imports -"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"] +"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"] [tool.coverage.run] source = ["haystack_integrations"] diff --git a/integrations/firecrawl/src/haystack_integrations/components/fetchers/firecrawl/firecrawl_crawler.py b/integrations/firecrawl/src/haystack_integrations/components/fetchers/firecrawl/firecrawl_crawler.py index 65656ad00d..9f2d1316db 100644 --- a/integrations/firecrawl/src/haystack_integrations/components/fetchers/firecrawl/firecrawl_crawler.py +++ b/integrations/firecrawl/src/haystack_integrations/components/fetchers/firecrawl/firecrawl_crawler.py @@ -125,6 +125,7 @@ async def run_async( def warm_up(self) -> None: """ Warm up the Firecrawl client by initializing the clients. + This is useful to avoid cold start delays when crawling many URLs. """ if self._firecrawl_client is None: diff --git a/integrations/firecrawl/src/haystack_integrations/components/websearch/firecrawl/firecrawl_websearch.py b/integrations/firecrawl/src/haystack_integrations/components/websearch/firecrawl/firecrawl_websearch.py index 62ab7263a4..735449b5c8 100644 --- a/integrations/firecrawl/src/haystack_integrations/components/websearch/firecrawl/firecrawl_websearch.py +++ b/integrations/firecrawl/src/haystack_integrations/components/websearch/firecrawl/firecrawl_websearch.py @@ -71,6 +71,7 @@ def __init__( def warm_up(self) -> None: """ Warm up the Firecrawl clients by initializing the sync and async clients. + This is useful to avoid cold start delays when performing searches. """ if self._firecrawl_client is None: diff --git a/integrations/github/pyproject.toml b/integrations/github/pyproject.toml index 6d6e1e7f20..6cf95b2a1f 100644 --- a/integrations/github/pyproject.toml +++ b/integrations/github/pyproject.toml @@ -86,6 +86,13 @@ select = [ "ARG", "B", "C", + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D205", # 1 blank line required between summary line and description + "D209", # Closing triple quotes go to new line + "D213", # summary lines must be positioned on the second physical line of the docstring + "D417", # Missing argument descriptions in the docstring + "D419", # Docstring is empty "DTZ", "E", "EM", @@ -135,7 +142,7 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, and relative imports -"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"] +"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"] # Ignore RUF001 for all files in the prompts directory "src/haystack_integrations/components/prompts/**/*" = ["RUF001"] diff --git a/integrations/google_genai/pyproject.toml b/integrations/google_genai/pyproject.toml index db7445405c..b3b514ca21 100644 --- a/integrations/google_genai/pyproject.toml +++ b/integrations/google_genai/pyproject.toml @@ -96,6 +96,13 @@ select = [ "ARG", "B", "C", + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D205", # 1 blank line required between summary line and description + "D209", # Closing triple quotes go to new line + "D213", # summary lines must be positioned on the second physical line of the docstring + "D417", # Missing argument descriptions in the docstring + "D419", # Docstring is empty "DTZ", "E", "EM", @@ -151,7 +158,7 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, and relative imports -"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"] +"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"] # Examples can use print statements "examples/**/*" = ["T201"] diff --git a/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/chat_generator.py b/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/chat_generator.py index 7ef45be8cd..6e689ccf3a 100644 --- a/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/chat_generator.py +++ b/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/chat_generator.py @@ -315,6 +315,7 @@ def _handle_streaming_response( ) -> dict[str, list[ChatMessage]]: """ Handle streaming response from Google Gen AI generate_content_stream. + :param response_stream: The streaming response from generate_content_stream. :param streaming_callback: The callback function for streaming chunks. :returns: A dictionary with the replies. @@ -346,6 +347,7 @@ async def _handle_streaming_response_async( ) -> dict[str, list[ChatMessage]]: """ Handle async streaming response from Google Gen AI generate_content_stream. + :param response_stream: The async streaming response from generate_content_stream. :param streaming_callback: The async callback function for streaming chunks. :returns: A dictionary with the replies. diff --git a/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/utils.py b/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/utils.py index 82e64a6144..4344f44fc4 100644 --- a/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/utils.py +++ b/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/utils.py @@ -57,8 +57,9 @@ def _process_response_format(generation_kwargs: dict[str, Any]) -> dict[str, Any]: """ - Process `response_format` from generation_kwargs into Google GenAI's native - `response_schema` and `response_mime_type` parameters. + Process `response_format` from generation_kwargs into Google GenAI native parameters. + + Converts `response_format` into `response_schema` and `response_mime_type`. Accepts either a Pydantic BaseModel class or a JSON schema dict. When `response_format` is present, it is popped and replaced with the two @@ -181,7 +182,6 @@ def remove_key_from_schema( """ Recursively traverse a schema and remove all occurrences of the target key. - :param schema: The schema dictionary/list/value to process :param target_key: The key to remove from all dictionaries in the schema @@ -441,7 +441,8 @@ def _convert_tools_to_google_genai_format(tools: ToolsType) -> list[types.Tool]: def _convert_usage_metadata_to_serializable( usage_metadata: UsageMetadata | GenerateContentResponseUsageMetadata | None, ) -> dict[str, Any]: - """Build a JSON-serializable usage dict from a UsageMetadata object. + """ + Build a JSON-serializable usage dict from a UsageMetadata object. Iterates over known UsageMetadata attribute names and adds each non-None value in serialized form. Full list of fields: https://ai.google.dev/api/generate-content#UsageMetadata diff --git a/integrations/hanlp/pyproject.toml b/integrations/hanlp/pyproject.toml index 8943dbc277..bf5adef186 100644 --- a/integrations/hanlp/pyproject.toml +++ b/integrations/hanlp/pyproject.toml @@ -100,6 +100,13 @@ select = [ "ARG", "B", "C", + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D205", # 1 blank line required between summary line and description + "D209", # Closing triple quotes go to new line + "D213", # summary lines must be positioned on the second physical line of the docstring + "D417", # Missing argument descriptions in the docstring + "D419", # Docstring is empty "DTZ", "E", "EM", @@ -152,7 +159,7 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, and relative imports -"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"] +"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"] [tool.coverage.run] source = ["haystack_integrations"] diff --git a/integrations/jina/pyproject.toml b/integrations/jina/pyproject.toml index 6a16ea6862..ee887ee2ef 100644 --- a/integrations/jina/pyproject.toml +++ b/integrations/jina/pyproject.toml @@ -89,6 +89,13 @@ select = [ "ARG", "B", "C", + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D205", # 1 blank line required between summary line and description + "D209", # Closing triple quotes go to new line + "D213", # summary lines must be positioned on the second physical line of the docstring + "D417", # Missing argument descriptions in the docstring + "D419", # Docstring is empty "DTZ", "E", "EM", @@ -135,7 +142,7 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, and relative imports -"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"] +"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"] # examples can contain "print" commands "examples/**/*" = ["T201"] diff --git a/integrations/jina/src/haystack_integrations/components/connectors/jina/reader.py b/integrations/jina/src/haystack_integrations/components/connectors/jina/reader.py index 835a92db53..f4f31ef12e 100644 --- a/integrations/jina/src/haystack_integrations/components/connectors/jina/reader.py +++ b/integrations/jina/src/haystack_integrations/components/connectors/jina/reader.py @@ -70,6 +70,7 @@ def __init__( def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. + :returns: Dictionary with serialized data. """ @@ -84,6 +85,7 @@ def to_dict(self) -> dict[str, Any]: def from_dict(cls, data: dict[str, Any]) -> "JinaReaderConnector": """ Deserializes the component from a dictionary. + :param data: Dictionary to deserialize from. :returns: diff --git a/integrations/jina/src/haystack_integrations/components/embedders/jina/document_embedder.py b/integrations/jina/src/haystack_integrations/components/embedders/jina/document_embedder.py index f20c82f036..d8d1aef549 100644 --- a/integrations/jina/src/haystack_integrations/components/embedders/jina/document_embedder.py +++ b/integrations/jina/src/haystack_integrations/components/embedders/jina/document_embedder.py @@ -16,6 +16,7 @@ class JinaDocumentEmbedder: """ A component for computing Document embeddings using Jina AI models. + The embedding of each Document is stored in the `embedding` field of the Document. Usage example: @@ -105,6 +106,7 @@ def _get_telemetry_data(self) -> dict[str, Any]: def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. + :returns: Dictionary with serialized data. """ @@ -132,6 +134,7 @@ def to_dict(self) -> dict[str, Any]: def from_dict(cls, data: dict[str, Any]) -> "JinaDocumentEmbedder": """ Deserializes the component from a dictionary. + :param data: Dictionary to deserialize from. :returns: diff --git a/integrations/jina/src/haystack_integrations/components/embedders/jina/text_embedder.py b/integrations/jina/src/haystack_integrations/components/embedders/jina/text_embedder.py index 336fc18e2b..685a0dd164 100644 --- a/integrations/jina/src/haystack_integrations/components/embedders/jina/text_embedder.py +++ b/integrations/jina/src/haystack_integrations/components/embedders/jina/text_embedder.py @@ -91,6 +91,7 @@ def _get_telemetry_data(self) -> dict[str, Any]: def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. + :returns: Dictionary with serialized data. """ @@ -113,6 +114,7 @@ def to_dict(self) -> dict[str, Any]: def from_dict(cls, data: dict[str, Any]) -> "JinaTextEmbedder": """ Deserializes the component from a dictionary. + :param data: Dictionary to deserialize from. :returns: diff --git a/integrations/jina/src/haystack_integrations/components/rankers/jina/ranker.py b/integrations/jina/src/haystack_integrations/components/rankers/jina/ranker.py index ac78adf950..347943205c 100644 --- a/integrations/jina/src/haystack_integrations/components/rankers/jina/ranker.py +++ b/integrations/jina/src/haystack_integrations/components/rankers/jina/ranker.py @@ -74,6 +74,7 @@ def __init__( def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. + :returns: Dictionary with serialized data. """ @@ -89,6 +90,7 @@ def to_dict(self) -> dict[str, Any]: def from_dict(cls, data: dict[str, Any]) -> "JinaRanker": """ Deserializes the component from a dictionary. + :param data: Dictionary to deserialize from. :returns: diff --git a/integrations/kreuzberg/pyproject.toml b/integrations/kreuzberg/pyproject.toml index d66e06bcdc..0e9c7198ad 100644 --- a/integrations/kreuzberg/pyproject.toml +++ b/integrations/kreuzberg/pyproject.toml @@ -79,6 +79,13 @@ select = [ "ARG", # flake8-unused-arguments "B", # flake8-bugbear "C", # flake8-comprehensions + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D205", # 1 blank line required between summary line and description + "D209", # Closing triple quotes go to new line + "D213", # summary lines must be positioned on the second physical line of the docstring + "D417", # Missing argument descriptions in the docstring + "D419", # Docstring is empty "DTZ", # flake8-datetimez "E", # pycodestyle (error) "EM", # flake8-errmsg @@ -130,7 +137,7 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, and relative imports -"tests/**/*" = ["ANN", "PLR2004", "S101", "S108", "TID252"] +"tests/**/*" = ["D", "ANN", "PLR2004", "S101", "S108", "TID252"] [tool.mypy] strict = true diff --git a/integrations/kreuzberg/src/haystack_integrations/components/converters/kreuzberg/converter.py b/integrations/kreuzberg/src/haystack_integrations/components/converters/kreuzberg/converter.py index 5ea6f00134..1deffbd629 100644 --- a/integrations/kreuzberg/src/haystack_integrations/components/converters/kreuzberg/converter.py +++ b/integrations/kreuzberg/src/haystack_integrations/components/converters/kreuzberg/converter.py @@ -263,8 +263,9 @@ def _collect_batch_results( @staticmethod def _build_extraction_metadata(result: ExtractionResult) -> dict[str, Any]: """ - Build metadata dict from an `ExtractionResult`, flattening kreuzberg's - metadata fields and enriching with top-level result attributes. + Build metadata dict from an `ExtractionResult`. + + Flattens kreuzberg's metadata fields and enriches with top-level result attributes. Fields already present in `result.metadata` (`quality_score`, `output_format`, `keywords`) are passed through as-is - they diff --git a/integrations/kreuzberg/src/haystack_integrations/components/converters/kreuzberg/utils.py b/integrations/kreuzberg/src/haystack_integrations/components/converters/kreuzberg/utils.py index 7387cb8adf..68366636ea 100644 --- a/integrations/kreuzberg/src/haystack_integrations/components/converters/kreuzberg/utils.py +++ b/integrations/kreuzberg/src/haystack_integrations/components/converters/kreuzberg/utils.py @@ -15,7 +15,8 @@ def _is_batch_error(result: ExtractionResult) -> bool: - """Detect error results returned by kreuzberg's batch APIs. + """ + Detect error results returned by kreuzberg's batch APIs. Batch APIs return ``ExtractionResult(content="Error: ...", metadata={}, quality_score=None)`` instead of raising exceptions. Valid results always