diff --git a/integrations/openrouter/pyproject.toml b/integrations/openrouter/pyproject.toml index cbe0cb06b4..70cdc9343d 100644 --- a/integrations/openrouter/pyproject.toml +++ b/integrations/openrouter/pyproject.toml @@ -85,6 +85,13 @@ select = [ "ARG", "B", "C", + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D205", # 1 blank line required between summary line and description + "D209", # Closing triple quotes go to new line + "D213", # summary lines must be positioned on the second physical line of the docstring + "D417", # Missing argument descriptions in the docstring + "D419", # Docstring is empty "DTZ", "E", "EM", @@ -134,9 +141,9 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, and relative imports -"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"] +"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"] # Examples can print their output and don't need type annotations -"examples/**/*" = ["T201", "ANN"] +"examples/**/*" = ["D", "T201", "ANN"] [tool.coverage.run] source = ["haystack_integrations"] diff --git a/integrations/openrouter/src/haystack_integrations/components/generators/openrouter/chat/chat_generator.py b/integrations/openrouter/src/haystack_integrations/components/generators/openrouter/chat/chat_generator.py index c7d2a2665a..f79245cdcc 100644 --- a/integrations/openrouter/src/haystack_integrations/components/generators/openrouter/chat/chat_generator.py +++ b/integrations/openrouter/src/haystack_integrations/components/generators/openrouter/chat/chat_generator.py @@ -18,6 +18,7 @@ class OpenRouterChatGenerator(OpenAIChatGenerator): """ Enables text generation using OpenRouter generative models. + For supported models, see [OpenRouter docs](https://openrouter.ai/models). Users can pass any text generation parameters valid for the OpenRouter chat completion API @@ -71,8 +72,7 @@ def __init__( http_client_kwargs: dict[str, Any] | None = None, ) -> None: """ - Creates an instance of OpenRouterChatGenerator. Unless specified otherwise, - the default model is `openai/gpt-5-mini`. + Creates an instance of OpenRouterChatGenerator. :param api_key: The OpenRouter API key. diff --git a/integrations/opensearch/pyproject.toml b/integrations/opensearch/pyproject.toml index a3a54b95a4..7181ea18f4 100644 --- a/integrations/opensearch/pyproject.toml +++ b/integrations/opensearch/pyproject.toml @@ -94,6 +94,13 @@ select = [ "ARG", "B", "C", + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D205", # 1 blank line required between summary line and description + "D209", # Closing triple quotes go to new line + "D213", # summary lines must be positioned on the second physical line of the docstring + "D417", # Missing argument descriptions in the docstring + "D419", # Docstring is empty "DTZ", "E", "EM", @@ -145,7 +152,7 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, and relative imports -"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"] +"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"] [tool.coverage.run] source = ["haystack_integrations"] diff --git a/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/open_search_hybrid_retriever.py b/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/open_search_hybrid_retriever.py index 4da68626eb..fa6417c225 100644 --- a/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/open_search_hybrid_retriever.py +++ b/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/open_search_hybrid_retriever.py @@ -114,8 +114,9 @@ def __init__( **kwargs: Any, ) -> None: """ - Initialize the OpenSearchHybridRetriever, a super component to retrieve documents from OpenSearch using - both embedding-based and keyword-based retrieval methods. + Initialize the OpenSearchHybridRetriever using both embedding-based and keyword-based retrieval methods. + + This is a super component to retrieve documents from OpenSearch using both retrieval methods. We don't explicitly define all the init parameters of the components in the constructor, for each of the components, since that would be around 20+ parameters. Instead, we define the most important ones @@ -242,7 +243,9 @@ def __init__( if TYPE_CHECKING: - def warm_up(self) -> None: ... + def warm_up(self) -> None: + """Warm up the underlying pipeline components.""" + ... def run( self, @@ -251,7 +254,9 @@ def run( filters_embedding: dict[str, Any] | None = None, top_k_bm25: int | None = None, top_k_embedding: int | None = None, - ) -> dict[str, list[Document]]: ... + ) -> dict[str, list[Document]]: + """Run the hybrid retrieval pipeline and return retrieved documents.""" + ... def _create_pipeline(self, data: dict[str, Any]) -> Pipeline: """ @@ -328,6 +333,7 @@ def to_dict(self) -> dict[str, Any]: @classmethod def from_dict(cls, data: dict[str, Any]) -> "OpenSearchHybridRetriever": + """Deserialize an OpenSearchHybridRetriever from a dictionary.""" # deserialize the document store doc_store = OpenSearchDocumentStore.from_dict(data["init_parameters"]["document_store"]) data["init_parameters"]["document_store"] = doc_store diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/auth.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/auth.py index 619e80b38e..ccb6679d76 100644 --- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/auth.py +++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/auth.py @@ -41,6 +41,7 @@ def _get_aws_session( ) -> "boto3.Session": """ Creates an AWS Session with the given parameters. + Checks if the provided AWS credentials are valid and can be used to connect to AWS. :param aws_access_key_id: AWS access key ID. diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py index 6695b21f2f..fe4cb177bb 100644 --- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py +++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py @@ -559,6 +559,7 @@ async def write_documents_async( def _deserialize_document(hit: dict[str, Any]) -> Document: """ Creates a Document from the search hit provided. + This is mostly useful in self.filter_documents(). """ data = hit["_source"] @@ -1482,6 +1483,7 @@ def _embedding_retrieval( ) -> list[Document]: """ Retrieves documents that are most similar to the query embedding using a vector similarity metric. + It uses the OpenSearch's Approximate k-Nearest Neighbors search algorithm. This method is not meant to be part of the public interface of @@ -1513,8 +1515,9 @@ async def _embedding_retrieval_async( search_kwargs: dict[str, Any] | None = None, ) -> list[Document]: """ - Asynchronously retrieves documents that are most similar to the query embedding using a vector similarity - metric. It uses the OpenSearch's Approximate k-Nearest Neighbors search algorithm. + Asynchronously retrieves documents most similar to the query embedding using a vector similarity metric. + + It uses the OpenSearch's Approximate k-Nearest Neighbors search algorithm. This method is not meant to be part of the public interface of `OpenSearchDocumentStore` nor called directly. @@ -1641,8 +1644,7 @@ def _extract_distinct_counts_from_aggregations( def count_unique_metadata_by_filter(self, filters: dict[str, Any], metadata_fields: list[str]) -> dict[str, int]: """ - Returns the number of unique values for each specified metadata field of the documents - that match the provided filters. + Returns the number of unique values for each specified metadata field of the documents that match the filters. :param filters: The filters to apply to count documents. For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering) @@ -1685,8 +1687,7 @@ async def count_unique_metadata_by_filter_async( self, filters: dict[str, Any], metadata_fields: list[str] ) -> dict[str, int]: """ - Asynchronously returns the number of unique values for each specified metadata field of the documents - that match the provided filters. + Asynchronously returns the number of unique values for each specified metadata field matching the filters. :param filters: The filters to apply to count documents. For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering) @@ -1862,6 +1863,7 @@ def get_metadata_field_unique_values( ) -> tuple[list[str], dict[str, Any] | None]: """ Returns unique values for a metadata field, optionally filtered by a search term in the content. + Uses composite aggregations for proper pagination beyond 10k results. :param metadata_field: The metadata field to get unique values for. @@ -1927,6 +1929,7 @@ async def get_metadata_field_unique_values_async( ) -> tuple[list[str], dict[str, Any] | None]: """ Asynchronously returns unique values for a metadata field, optionally filtered by a search term in the content. + Uses composite aggregations for proper pagination beyond 10k results. :param metadata_field: The metadata field to get unique values for. diff --git a/integrations/optimum/pyproject.toml b/integrations/optimum/pyproject.toml index 9fa49d2ce8..39bc3c2c08 100644 --- a/integrations/optimum/pyproject.toml +++ b/integrations/optimum/pyproject.toml @@ -106,6 +106,13 @@ select = [ "ARG", "B", "C", + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D205", # 1 blank line required between summary line and description + "D209", # Closing triple quotes go to new line + "D213", # summary lines must be positioned on the second physical line of the docstring + "D417", # Missing argument descriptions in the docstring + "D419", # Docstring is empty "DTZ", "E", "EM", @@ -151,9 +158,9 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, and relative imports -"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"] +"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"] # Examples can print their output -"examples/**" = ["T201"] +"examples/**" = ["D", "T201"] "tests/**" = ["T201"] [tool.coverage.run] diff --git a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimization.py b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimization.py index 660ae6e7a4..d17624ce72 100644 --- a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimization.py +++ b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimization.py @@ -11,8 +11,10 @@ class OptimumEmbedderOptimizationMode(Enum): """ - [ONXX Optimization modes](https://huggingface.co/docs/optimum/onnxruntime/usage_guides/optimization) - support by the Optimum Embedders. + ONNX Optimization modes supported by the Optimum Embedders. + + See [Optimum ONNX optimization docs](https://huggingface.co/docs/optimum/onnxruntime/usage_guides/optimization) + for more details. """ #: Basic general optimizations. diff --git a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_document_embedder.py b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_document_embedder.py index 81ce58f661..727979d861 100644 --- a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_document_embedder.py +++ b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_document_embedder.py @@ -17,9 +17,10 @@ @component class OptimumDocumentEmbedder: """ - A component for computing `Document` embeddings using models loaded with the - [HuggingFace Optimum](https://huggingface.co/docs/optimum/index) library, - leveraging the ONNX runtime for high-speed inference. + A component for computing `Document` embeddings using models loaded with the HuggingFace Optimum library. + + Uses the [HuggingFace Optimum](https://huggingface.co/docs/optimum/index) library and leverages the ONNX + runtime for high-speed inference. The embedding of each Document is stored in the `embedding` field of the Document. @@ -199,6 +200,7 @@ def _prepare_texts_to_embed(self, documents: list[Document]) -> list[str]: def run(self, documents: list[Document]) -> dict[str, list[Document]]: """ Embed a list of Documents. + The embedding of each Document is stored in the `embedding` field of the Document. :param documents: diff --git a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_text_embedder.py b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_text_embedder.py index b145553c3f..52c37fece4 100644 --- a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_text_embedder.py +++ b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_text_embedder.py @@ -16,9 +16,10 @@ @component class OptimumTextEmbedder: """ - A component to embed text using models loaded with the - [HuggingFace Optimum](https://huggingface.co/docs/optimum/index) library, - leveraging the ONNX runtime for high-speed inference. + A component to embed text using models loaded with the HuggingFace Optimum library. + + Uses the [HuggingFace Optimum](https://huggingface.co/docs/optimum/index) library and leverages the ONNX + runtime for high-speed inference. Usage example: ```python diff --git a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/quantization.py b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/quantization.py index 11dbd147e8..799361fb69 100644 --- a/integrations/optimum/src/haystack_integrations/components/embedders/optimum/quantization.py +++ b/integrations/optimum/src/haystack_integrations/components/embedders/optimum/quantization.py @@ -11,8 +11,10 @@ class OptimumEmbedderQuantizationMode(Enum): """ - [Dynamic Quantization modes](https://huggingface.co/docs/optimum/onnxruntime/usage_guides/quantization) - support by the Optimum Embedders. + Dynamic Quantization modes supported by the Optimum Embedders. + + See [Optimum ONNX quantization docs](https://huggingface.co/docs/optimum/onnxruntime/usage_guides/quantization) + for more details. """ #: Quantization for the ARM64 architecture. diff --git a/integrations/paddleocr/pyproject.toml b/integrations/paddleocr/pyproject.toml index 7ce6e181c3..3f0ccd33d1 100644 --- a/integrations/paddleocr/pyproject.toml +++ b/integrations/paddleocr/pyproject.toml @@ -88,6 +88,13 @@ select = [ "ARG", "B", "C", + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D205", # 1 blank line required between summary line and description + "D209", # Closing triple quotes go to new line + "D213", # summary lines must be positioned on the second physical line of the docstring + "D417", # Missing argument descriptions in the docstring + "D419", # Docstring is empty "DTZ", "E", "EM", @@ -137,7 +144,7 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, and relative imports -"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"] +"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"] [tool.coverage.run] source = ["haystack_integrations"] diff --git a/integrations/paddleocr/src/haystack_integrations/components/converters/paddleocr/paddleocr_vl_document_converter.py b/integrations/paddleocr/src/haystack_integrations/components/converters/paddleocr/paddleocr_vl_document_converter.py index f5d8a13a84..6aa72dfacf 100644 --- a/integrations/paddleocr/src/haystack_integrations/components/converters/paddleocr/paddleocr_vl_document_converter.py +++ b/integrations/paddleocr/src/haystack_integrations/components/converters/paddleocr/paddleocr_vl_document_converter.py @@ -113,8 +113,7 @@ def _normalize_file_type(file_type: FileTypeInput) -> FileType | None: @component class PaddleOCRVLDocumentConverter: """ - This component extracts text from documents using PaddleOCR's large model - document parsing API. + Extracts text from documents using PaddleOCR's large model document parsing API. PaddleOCR-VL is used behind the scenes. For more information, please refer to: diff --git a/integrations/pgvector/pyproject.toml b/integrations/pgvector/pyproject.toml index 1bc8b76da3..2857d38e4c 100644 --- a/integrations/pgvector/pyproject.toml +++ b/integrations/pgvector/pyproject.toml @@ -96,6 +96,13 @@ select = [ "ARG", "B", "C", + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D205", # 1 blank line required between summary line and description + "D209", # Closing triple quotes go to new line + "D213", # summary lines must be positioned on the second physical line of the docstring + "D417", # Missing argument descriptions in the docstring + "D419", # Docstring is empty "DTZ", "E", "EM", @@ -149,9 +156,9 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, and relative imports -"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"] +"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"] # examples can contain "print" commands -"examples/**/*" = ["T201"] +"examples/**/*" = ["D", "T201"] [tool.coverage.run] diff --git a/integrations/pgvector/src/haystack_integrations/components/retrievers/pgvector/embedding_retriever.py b/integrations/pgvector/src/haystack_integrations/components/retrievers/pgvector/embedding_retriever.py index ae5364a655..4b8b21c7cd 100644 --- a/integrations/pgvector/src/haystack_integrations/components/retrievers/pgvector/embedding_retriever.py +++ b/integrations/pgvector/src/haystack_integrations/components/retrievers/pgvector/embedding_retriever.py @@ -68,6 +68,8 @@ def __init__( filter_policy: str | FilterPolicy = FilterPolicy.REPLACE, ) -> None: """ + Initialize the PgvectorEmbeddingRetriever. + :param document_store: An instance of `PgvectorDocumentStore`. :param filters: Filters applied to the retrieved Documents. :param top_k: Maximum number of Documents to return. diff --git a/integrations/pgvector/src/haystack_integrations/components/retrievers/pgvector/keyword_retriever.py b/integrations/pgvector/src/haystack_integrations/components/retrievers/pgvector/keyword_retriever.py index e57ca320bd..a03e4c8a4c 100644 --- a/integrations/pgvector/src/haystack_integrations/components/retrievers/pgvector/keyword_retriever.py +++ b/integrations/pgvector/src/haystack_integrations/components/retrievers/pgvector/keyword_retriever.py @@ -57,6 +57,8 @@ def __init__( filter_policy: str | FilterPolicy = FilterPolicy.REPLACE, ) -> None: """ + Initialize the PgvectorKeywordRetriever. + :param document_store: An instance of `PgvectorDocumentStore`. :param filters: Filters applied to the retrieved Documents. :param top_k: Maximum number of Documents to return. diff --git a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/converters.py b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/converters.py index 14d3b29fd9..35bfd0fa66 100644 --- a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/converters.py +++ b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/converters.py @@ -10,8 +10,9 @@ def _from_haystack_to_pg_documents(documents: list[Document]) -> list[dict[str, Any]]: """ - Internal method to convert a list of Haystack Documents to a list of dictionaries that can be used to insert - documents into the PgvectorDocumentStore. + Internal method to convert a list of Haystack Documents to a list of dictionaries. + + The resulting dictionaries can be used to insert documents into the PgvectorDocumentStore. """ db_documents = [] diff --git a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py index be1add0196..edbb3fae87 100644 --- a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py +++ b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py @@ -99,6 +99,7 @@ def __init__( ) -> None: """ Creates a new PgvectorDocumentStore instance. + It is meant to be connected to a PostgreSQL database with the pgvector extension installed. A specific table to store Haystack documents will be created if it doesn't exist yet. @@ -349,6 +350,7 @@ async def _execute_sql_async( def _ensure_db_setup(self) -> None: """ Ensures that the connection to the PostgreSQL database exists and is valid. + If not, connection and cursors are created. If the table is not initialized, it will be set up. """ @@ -387,6 +389,7 @@ def _ensure_db_setup(self) -> None: async def _ensure_db_setup_async(self) -> None: """ Async internal method. + Ensures that the connection to the PostgreSQL database exists and is valid. If not, connection and cursors are created. If the table is not initialized, it will be set up. @@ -550,6 +553,7 @@ async def _initialize_table_async(self) -> None: def delete_table(self) -> None: """ Deletes the table used to store Haystack documents. + The name of the schema (`schema_name`) and the name of the table (`table_name`) are defined when initializing the `PgvectorDocumentStore`. """ @@ -636,6 +640,7 @@ def _build_hnsw_queries(self) -> tuple[Composed | None, SQL, Composed, Composed] def _handle_hnsw(self) -> None: """ Internal method to handle the HNSW index creation. + It also sets the `hnsw.ef_search` parameter for queries if it is specified. """ @@ -1418,8 +1423,9 @@ async def _embedding_retrieval_async( vector_function: Literal["cosine_similarity", "inner_product", "l2_distance"] | None = None, ) -> list[Document]: """ - Asynchronously retrieves documents that are most similar to the query embedding using a - vector similarity metric. + Asynchronously retrieves documents that are most similar to the query embedding. + + Uses a vector similarity metric for comparison. """ sql_query, params = self._check_and_build_embedding_retrieval_query( @@ -1571,8 +1577,9 @@ def _process_count_unique_metadata_result( def count_unique_metadata_by_filter(self, filters: dict[str, Any], metadata_fields: list[str]) -> dict[str, int]: """ - Returns the count of unique values for each specified metadata field, - considering only documents that match the provided filters. + Returns the count of unique values for each specified metadata field. + + Considers only documents that match the provided filters. :param filters: The filters to apply to select documents. For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering) @@ -1604,8 +1611,9 @@ async def count_unique_metadata_by_filter_async( self, filters: dict[str, Any], metadata_fields: list[str] ) -> dict[str, int]: """ - Asynchronously returns the count of unique values for each specified metadata field, - considering only documents that match the provided filters. + Asynchronously returns the count of unique values for each specified metadata field. + + Considers only documents that match the provided filters. :param filters: The filters to apply to select documents. For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering) diff --git a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/filters.py b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/filters.py index 92b3d42bd6..15370eb333 100644 --- a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/filters.py +++ b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/filters.py @@ -116,8 +116,7 @@ def _parse_comparison_condition(condition: dict[str, Any]) -> tuple[Composed, li def _treat_meta_field(field: str, value: Any) -> Composed: """ - Internal method that returns a psycopg Composed object - to make the meta JSONB field queryable safely. + Internal method that returns a psycopg Composed object to make the meta JSONB field queryable safely. Uses psycopg.sql.Literal to embed the field name, preventing SQL injection via metadata field names without requiring regex validation. diff --git a/integrations/pinecone/pyproject.toml b/integrations/pinecone/pyproject.toml index 870bd64bf4..fa54fcecda 100644 --- a/integrations/pinecone/pyproject.toml +++ b/integrations/pinecone/pyproject.toml @@ -95,6 +95,13 @@ select = [ "ARG", "B", "C", + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D205", # 1 blank line required between summary line and description + "D209", # Closing triple quotes go to new line + "D213", # summary lines must be positioned on the second physical line of the docstring + "D417", # Missing argument descriptions in the docstring + "D419", # Docstring is empty "DTZ", "E", "EM", @@ -146,9 +153,9 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, and relative imports -"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"] +"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"] # examples can contain "print" commands -"examples/**/*" = ["T201"] +"examples/**/*" = ["D", "T201"] [tool.coverage.run] source = ["haystack_integrations"] diff --git a/integrations/pinecone/src/haystack_integrations/components/retrievers/pinecone/embedding_retriever.py b/integrations/pinecone/src/haystack_integrations/components/retrievers/pinecone/embedding_retriever.py index 70274ddb1b..ce664537e1 100644 --- a/integrations/pinecone/src/haystack_integrations/components/retrievers/pinecone/embedding_retriever.py +++ b/integrations/pinecone/src/haystack_integrations/components/retrievers/pinecone/embedding_retriever.py @@ -60,6 +60,8 @@ def __init__( filter_policy: str | FilterPolicy = FilterPolicy.REPLACE, ) -> None: """ + Initialize the PineconeEmbeddingRetriever. + :param document_store: The Pinecone Document Store. :param filters: Filters applied to the retrieved Documents. :param top_k: Maximum number of Documents to return. @@ -81,6 +83,7 @@ def __init__( def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. + :returns: Dictionary with serialized data. """ @@ -96,6 +99,7 @@ def to_dict(self) -> dict[str, Any]: def from_dict(cls, data: dict[str, Any]) -> "PineconeEmbeddingRetriever": """ Deserializes the component from a dictionary. + :param data: Dictionary to deserialize from. :returns: diff --git a/integrations/pinecone/src/haystack_integrations/document_stores/pinecone/document_store.py b/integrations/pinecone/src/haystack_integrations/document_stores/pinecone/document_store.py index 26fdbdfa21..359a4e5179 100644 --- a/integrations/pinecone/src/haystack_integrations/document_stores/pinecone/document_store.py +++ b/integrations/pinecone/src/haystack_integrations/document_stores/pinecone/document_store.py @@ -47,6 +47,7 @@ def __init__( ) -> None: """ Creates a new PineconeDocumentStore instance. + It is meant to be connected to a Pinecone index and namespace. :param api_key: The Pinecone API key. @@ -180,6 +181,7 @@ def _convert_dict_spec_to_pinecone_object(spec: dict[str, Any]) -> ServerlessSpe def from_dict(cls, data: dict[str, Any]) -> "PineconeDocumentStore": """ Deserializes the component from a dictionary. + :param data: Dictionary to deserialize from. :returns: @@ -191,6 +193,7 @@ def from_dict(cls, data: dict[str, Any]) -> "PineconeDocumentStore": def to_dict(self) -> dict[str, Any]: """ Serializes the component to a dictionary. + :returns: Dictionary with serialized data. """ @@ -632,8 +635,10 @@ async def _embedding_retrieval_async( @staticmethod def _convert_meta_to_int(metadata: dict[str, Any]) -> dict[str, Any]: """ - Pinecone store numeric metadata values as `float`. Some specific metadata are used in Retrievers components and - are expected to be `int`. This method converts them back to integers. + Convert specific numeric metadata values from `float` back to `int`. + + Pinecone stores numeric metadata values as `float`. Some specific metadata are used in Retrievers + components and are expected to be `int`. This method converts them back to integers. """ values_to_convert = ["split_id", "split_idx_start", "page_number"] diff --git a/integrations/pinecone/src/haystack_integrations/document_stores/pinecone/filters.py b/integrations/pinecone/src/haystack_integrations/document_stores/pinecone/filters.py index d31bd50558..97e2397c9d 100644 --- a/integrations/pinecone/src/haystack_integrations/document_stores/pinecone/filters.py +++ b/integrations/pinecone/src/haystack_integrations/document_stores/pinecone/filters.py @@ -9,6 +9,7 @@ def _normalize_filters(filters: dict[str, Any]) -> dict[str, Any]: """ Converts Haystack filters in Pinecone compatible filters. + Reference: https://docs.pinecone.io/docs/metadata-filtering """ if not isinstance(filters, dict): diff --git a/integrations/pinecone/tests/conftest.py b/integrations/pinecone/tests/conftest.py index e0d7885cfd..bafcd8d70c 100644 --- a/integrations/pinecone/tests/conftest.py +++ b/integrations/pinecone/tests/conftest.py @@ -94,8 +94,15 @@ async def delete_documents_and_wait_async(filters): await original_delete_documents(filters) await asyncio.sleep(DELETE_SLEEP_TIME_IN_SECONDS) + original_delete_all_documents = store.delete_all_documents_async + + async def delete_all_documents_and_wait_async(): + await original_delete_all_documents() + await asyncio.sleep(DELETE_SLEEP_TIME_IN_SECONDS) + store.write_documents_async = write_documents_and_wait_async store.delete_documents_async = delete_documents_and_wait_async + store.delete_all_documents_async = delete_all_documents_and_wait_async yield store try: diff --git a/integrations/pyversity/pyproject.toml b/integrations/pyversity/pyproject.toml index 6d7888c6c5..cf1e368570 100644 --- a/integrations/pyversity/pyproject.toml +++ b/integrations/pyversity/pyproject.toml @@ -89,6 +89,13 @@ select = [ "ARG", "B", "C", + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D205", # 1 blank line required between summary line and description + "D209", # Closing triple quotes go to new line + "D213", # summary lines must be positioned on the second physical line of the docstring + "D417", # Missing argument descriptions in the docstring + "D419", # Docstring is empty "DTZ", "E", "EM", @@ -138,9 +145,9 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, and relative imports -"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"] +"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"] # Examples can use print statements -"examples/**/*" = ["T201"] +"examples/**/*" = ["D", "T201"] [tool.coverage.run] source = ["haystack_integrations"] diff --git a/integrations/pyversity/src/haystack_integrations/components/rankers/pyversity/ranker.py b/integrations/pyversity/src/haystack_integrations/components/rankers/pyversity/ranker.py index 622ae11da3..1f54d474be 100644 --- a/integrations/pyversity/src/haystack_integrations/components/rankers/pyversity/ranker.py +++ b/integrations/pyversity/src/haystack_integrations/components/rankers/pyversity/ranker.py @@ -2,7 +2,8 @@ # # SPDX-License-Identifier: Apache-2.0 -"""Haystack integration for `pyversity `_. +""" +Haystack integration for `pyversity `_. Wraps pyversity's diversification algorithms as a Haystack ``@component``, making it easy to drop result diversification into any Haystack pipeline. diff --git a/integrations/qdrant/pyproject.toml b/integrations/qdrant/pyproject.toml index 6eaad6af52..e5ee1c3105 100644 --- a/integrations/qdrant/pyproject.toml +++ b/integrations/qdrant/pyproject.toml @@ -93,6 +93,13 @@ select = [ "ARG", "B", "C", + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D205", # 1 blank line required between summary line and description + "D209", # Closing triple quotes go to new line + "D213", # summary lines must be positioned on the second physical line of the docstring + "D417", # Missing argument descriptions in the docstring + "D419", # Docstring is empty "DTZ", "E", "EM", @@ -144,9 +151,9 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, and relative imports -"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"] +"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"] # examples can contain "print" commands -"examples/**/*" = ["T201"] +"examples/**/*" = ["D", "T201"] [tool.coverage.run] diff --git a/integrations/qdrant/src/haystack_integrations/components/retrievers/qdrant/retriever.py b/integrations/qdrant/src/haystack_integrations/components/retrievers/qdrant/retriever.py index 3025014f19..aa7ec1d356 100644 --- a/integrations/qdrant/src/haystack_integrations/components/retrievers/qdrant/retriever.py +++ b/integrations/qdrant/src/haystack_integrations/components/retrievers/qdrant/retriever.py @@ -482,8 +482,9 @@ async def run_async( @component class QdrantHybridRetriever: """ - A component for retrieving documents from an QdrantDocumentStore using both dense and sparse vectors - and fusing the results using Reciprocal Rank Fusion. + A component for retrieving documents from a QdrantDocumentStore using both dense and sparse vectors. + + Fuses the results using Reciprocal Rank Fusion. Usage example: ```python diff --git a/integrations/qdrant/src/haystack_integrations/document_stores/qdrant/converters.py b/integrations/qdrant/src/haystack_integrations/document_stores/qdrant/converters.py index 94001d67ff..fc005ad846 100644 --- a/integrations/qdrant/src/haystack_integrations/document_stores/qdrant/converters.py +++ b/integrations/qdrant/src/haystack_integrations/document_stores/qdrant/converters.py @@ -18,6 +18,7 @@ def convert_haystack_documents_to_qdrant_points( *, use_sparse_embeddings: bool, ) -> list[rest.PointStruct]: + """Convert a list of Haystack Document objects to Qdrant PointStruct objects.""" points = [] for document in documents: payload = document.to_dict(flatten=False) @@ -61,6 +62,7 @@ def convert_id(_id: str) -> str: def convert_qdrant_point_to_haystack_document(point: QdrantPoint, use_sparse_embeddings: bool) -> Document: + """Convert a Qdrant ScoredPoint or Record to a Haystack Document object.""" payload = point.payload or {} payload["score"] = point.score if hasattr(point, "score") else None diff --git a/integrations/qdrant/src/haystack_integrations/document_stores/qdrant/document_store.py b/integrations/qdrant/src/haystack_integrations/document_stores/qdrant/document_store.py index cf3dcb53c3..749ebfff13 100644 --- a/integrations/qdrant/src/haystack_integrations/document_stores/qdrant/document_store.py +++ b/integrations/qdrant/src/haystack_integrations/document_stores/qdrant/document_store.py @@ -53,8 +53,9 @@ def get_batches_from_generator(iterable: list, n: int) -> Generator: class QdrantDocumentStore: """ - A QdrantDocumentStore implementation that you can use with any Qdrant instance: in-memory, disk-persisted, - Docker-based, and Qdrant Cloud Cluster deployments. + A QdrantDocumentStore implementation that you can use with any Qdrant instance. + + Supports in-memory, disk-persisted, Docker-based, and Qdrant Cloud Cluster deployments. Usage example by creating an in-memory instance: @@ -375,6 +376,7 @@ def write_documents( ) -> int: """ Writes documents to Qdrant using the specified policy. + The QdrantDocumentStore can handle duplicate documents based on the given policy. The available policies are: - `FAIL`: The operation will raise an error if any document already exists. @@ -428,6 +430,7 @@ async def write_documents_async( ) -> int: """ Asynchronously writes documents to Qdrant using the specified policy. + The QdrantDocumentStore can handle duplicate documents based on the given policy. The available policies are: - `FAIL`: The operation will raise an error if any document already exists. @@ -1135,8 +1138,9 @@ async def count_unique_metadata_by_filter_async( self, filters: dict[str, Any], metadata_fields: list[str] ) -> dict[str, int]: """ - Asynchronously returns the number of unique values for each specified metadata field among documents that - match the filters. + Asynchronously returns the number of unique values for each specified metadata field among documents. + + Only documents that match the filters are considered. :param filters: The filters to restrict the documents considered. For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering) @@ -1838,8 +1842,9 @@ async def _query_hybrid_async( group_size: int | None = None, ) -> list[Document]: """ - Asynchronously retrieves documents based on dense and sparse embeddings and fuses - the results using Reciprocal Rank Fusion. + Asynchronously retrieves documents based on dense and sparse embeddings. + + Fuses the results using Reciprocal Rank Fusion. This method is not part of the public interface of `QdrantDocumentStore` and shouldn't be used directly. Use the `QdrantHybridRetriever` instead. @@ -2204,8 +2209,9 @@ def _handle_duplicate_documents( policy: DuplicatePolicy | None = None, ) -> list[Document]: """ - Checks whether any of the passed documents is already existing in the chosen index and returns a list of - documents that are not in the index yet. + Checks whether any of the passed documents is already existing in the chosen index. + + Returns a list of documents that are not in the index yet. :param documents: A list of Haystack Document objects. :param policy: The duplicate policy to use when writing documents. @@ -2231,9 +2237,9 @@ async def _handle_duplicate_documents_async( policy: DuplicatePolicy | None = None, ) -> list[Document]: """ - Asynchronously checks whether any of the passed documents is already existing - in the chosen index and returns a list of - documents that are not in the index yet. + Asynchronously checks whether any of the passed documents is already existing in the chosen index. + + Returns a list of documents that are not in the index yet. :param documents: A list of Haystack Document objects. :param policy: The duplicate policy to use when writing documents. diff --git a/integrations/qdrant/src/haystack_integrations/document_stores/qdrant/filters.py b/integrations/qdrant/src/haystack_integrations/document_stores/qdrant/filters.py index 44f1c8e36b..eb2de15a0a 100644 --- a/integrations/qdrant/src/haystack_integrations/document_stores/qdrant/filters.py +++ b/integrations/qdrant/src/haystack_integrations/document_stores/qdrant/filters.py @@ -9,7 +9,8 @@ def convert_filters_to_qdrant( filter_term: list[dict[str, Any]] | dict[str, Any] | models.Filter | None = None, ) -> models.Filter | None: - """Converts Haystack filters to the format used by Qdrant. + """ + Converts Haystack filters to the format used by Qdrant. :param filter_term: the haystack filter to be converted to qdrant. :returns: a single Qdrant Filter or None. @@ -228,6 +229,7 @@ def _build_gte_condition(key: str, value: str | float | int) -> models.Condition def is_datetime_string(value: str) -> bool: + """Return True if the given string can be parsed as an ISO 8601 datetime, False otherwise.""" try: datetime.fromisoformat(value) return True diff --git a/integrations/ragas/pyproject.toml b/integrations/ragas/pyproject.toml index c04dfeaa51..3af402e0de 100644 --- a/integrations/ragas/pyproject.toml +++ b/integrations/ragas/pyproject.toml @@ -90,6 +90,13 @@ select = [ "ARG", "B", "C", + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D205", # 1 blank line required between summary line and description + "D209", # Closing triple quotes go to new line + "D213", # summary lines must be positioned on the second physical line of the docstring + "D417", # Missing argument descriptions in the docstring + "D419", # Docstring is empty "DTZ", "E", "EM", @@ -143,7 +150,7 @@ ban-relative-imports = "all" [tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, and relative imports -"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"] +"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"] [tool.coverage.run] source = ["haystack_integrations"] diff --git a/integrations/ragas/src/haystack_integrations/components/evaluators/ragas/evaluator.py b/integrations/ragas/src/haystack_integrations/components/evaluators/ragas/evaluator.py index 443d99e03f..4772b76ddd 100644 --- a/integrations/ragas/src/haystack_integrations/components/evaluators/ragas/evaluator.py +++ b/integrations/ragas/src/haystack_integrations/components/evaluators/ragas/evaluator.py @@ -19,8 +19,9 @@ @component class RagasEvaluator: """ - A component that uses the [Ragas framework](https://docs.ragas.io/) to evaluate - inputs against specified Ragas metrics. + A component that uses the Ragas framework to evaluate inputs against specified Ragas metrics. + + See the [Ragas framework](https://docs.ragas.io/) for more details. Usage example: ```python @@ -76,7 +77,8 @@ def _validate_inputs( llm: BaseRagasLLM | None, embedding: BaseRagasEmbeddings | None, ) -> None: - """Validate input parameters. + """ + Validate input parameters. :param metrics: List of Ragas metrics to validate :param llm: Language model to validate @@ -152,7 +154,8 @@ def run( return {"result": result} def _process_documents(self, documents: list[Document | str] | None) -> list[str] | None: - """Process and validate input documents. + """ + Process and validate input documents. :param documents: List of Documents or strings to process :return: List of document contents as strings or None @@ -172,7 +175,8 @@ def _process_documents(self, documents: list[Document | str] | None) -> list[str raise ValueError(error_message) def _process_response(self, response: list[ChatMessage] | str | None) -> str | None: - """Process response into expected format. + """ + Process response into expected format. :param response: Response to process :return: None or Processed response string @@ -186,7 +190,8 @@ def _process_response(self, response: list[ChatMessage] | str | None) -> str | N return response def _handle_conversion_error(self, error: Exception) -> None: - """Handle evaluation errors with improved messages. + """ + Handle evaluation errors with improved messages. :params error: Original error """ diff --git a/integrations/snowflake/pyproject.toml b/integrations/snowflake/pyproject.toml index 35ddb54733..11263d737a 100644 --- a/integrations/snowflake/pyproject.toml +++ b/integrations/snowflake/pyproject.toml @@ -91,6 +91,13 @@ select = [ "ARG", "B", "C", + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D205", # 1 blank line required between summary line and description + "D209", # Closing triple quotes go to new line + "D213", # summary lines must be positioned on the second physical line of the docstring + "D417", # Missing argument descriptions in the docstring + "D419", # Docstring is empty "DTZ", "E", "EM", @@ -141,7 +148,7 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, and relative imports -"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"] +"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"] [tool.coverage.run] source = ["haystack_integrations"] diff --git a/integrations/snowflake/src/haystack_integrations/components/retrievers/snowflake/snowflake_table_retriever.py b/integrations/snowflake/src/haystack_integrations/components/retrievers/snowflake/snowflake_table_retriever.py index f075d11ae6..b5c38f2a9a 100644 --- a/integrations/snowflake/src/haystack_integrations/components/retrievers/snowflake/snowflake_table_retriever.py +++ b/integrations/snowflake/src/haystack_integrations/components/retrievers/snowflake/snowflake_table_retriever.py @@ -21,6 +21,7 @@ class SnowflakeTableRetriever: """ Connects to a Snowflake database to execute a SQL query using ADBC and Polars. + Returns the results as a Pandas DataFrame (converted from a Polars DataFrame) along with a Markdown-formatted string. For more information, see [Polars documentation](https://docs.pola.rs/api/python/dev/reference/api/polars.read_database_uri.html). @@ -116,6 +117,8 @@ def __init__( oauth_authorization_url: str | None = None, ) -> None: """ + Initialize SnowflakeTableRetriever with connection and authentication parameters. + :param user: User's login. :param account: Snowflake account identifier. :param authenticator: Authentication method. Required. Options: "SNOWFLAKE" (password), @@ -316,6 +319,7 @@ def _create_masked_uri(self, uri: str) -> str: def _polars_to_md(data: pl.DataFrame) -> str: """ Converts a Polars DataFrame to a Markdown-formatted string. + Uses Polars' built-in table formatting for efficient conversion. :param data: The Polars DataFrame to convert. @@ -339,6 +343,7 @@ def _polars_to_md(data: pl.DataFrame) -> str: def _execute_query_with_connector(self, query: str) -> pl.DataFrame | None: """ Executes a query using snowflake-connector-python directly (for JWT authentication). + This bypasses ADBC compatibility issues. :param query: SQL query to execute. @@ -401,7 +406,8 @@ def _execute_query_with_connector(self, query: str) -> pl.DataFrame | None: @staticmethod def _empty_response() -> dict[str, DataFrame | str]: - """Returns a standardized empty response. + """ + Returns a standardized empty response. :returns: A dictionary with the following keys: