From b034feeca55006c5bfdad6715b02786b10d3c158 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Fri, 20 Mar 2026 20:36:39 +0100 Subject: [PATCH] chore: enforce ruff docstring rules (D102/D103/D205/D209/D213/D417/D419) in integrations 21-30 Adds D102, D103, D205, D209, D213, D417, D419 ruff rules to pyproject.toml for: langfuse, lara, llama_cpp, llama_stack, mcp, meta_llama, mistral, mongodb_atlas, nvidia, ollama. Fixes all resulting docstring violations. Part of https://github.com/deepset-ai/haystack-core-integrations/issues/2947 Co-Authored-By: Claude Sonnet 4.6 --- integrations/langfuse/pyproject.toml | 13 ++++++++++--- .../connectors/langfuse/langfuse_connector.py | 1 + .../tracing/langfuse/tracer.py | 9 +++++++++ integrations/lara/pyproject.toml | 9 ++++++++- integrations/llama_cpp/pyproject.toml | 9 ++++++++- .../generators/llama_cpp/chat/chat_generator.py | 5 +++++ .../components/generators/llama_cpp/generator.py | 4 ++++ integrations/llama_stack/pyproject.toml | 9 ++++++++- .../llama_stack/chat/chat_generator.py | 7 +++++-- integrations/mcp/pyproject.toml | 11 +++++++++-- .../haystack_integrations/tools/mcp/mcp_tool.py | 6 +++--- .../tools/mcp/mcp_toolset.py | 6 +++--- integrations/meta_llama/pyproject.toml | 9 ++++++++- .../generators/meta_llama/chat/chat_generator.py | 6 ++++-- integrations/mistral/pyproject.toml | 9 ++++++++- .../converters/mistral/ocr_document_converter.py | 5 +++-- .../embedders/mistral/document_embedder.py | 1 + .../generators/mistral/chat/chat_generator.py | 6 ++++-- integrations/mongodb_atlas/pyproject.toml | 9 ++++++++- .../mongodb_atlas/embedding_retriever.py | 3 +-- .../mongodb_atlas/full_text_retriever.py | 1 + .../mongodb_atlas/document_store.py | 14 ++++++-------- integrations/nvidia/pyproject.toml | 9 ++++++++- .../embedders/nvidia/document_embedder.py | 4 ++-- .../components/embedders/nvidia/text_embedder.py | 4 ++-- .../components/embedders/nvidia/truncate.py | 1 + .../generators/nvidia/chat/chat_generator.py | 1 + .../components/generators/nvidia/generator.py | 6 ++++-- .../components/rankers/nvidia/ranker.py | 4 ++-- .../components/rankers/nvidia/truncate.py | 1 + .../haystack_integrations/utils/nvidia/models.py | 1 + .../utils/nvidia/nim_backend.py | 4 ++++ .../haystack_integrations/utils/nvidia/utils.py | 4 ++-- integrations/ollama/pyproject.toml | 9 ++++++++- .../embedders/ollama/document_embedder.py | 8 ++++++-- .../components/embedders/ollama/text_embedder.py | 7 +++++-- .../generators/ollama/chat/chat_generator.py | 16 ++++++++++------ .../components/generators/ollama/generator.py | 2 ++ 38 files changed, 176 insertions(+), 57 deletions(-) diff --git a/integrations/langfuse/pyproject.toml b/integrations/langfuse/pyproject.toml index d027ca2d21..7bb9be404c 100644 --- a/integrations/langfuse/pyproject.toml +++ b/integrations/langfuse/pyproject.toml @@ -88,6 +88,13 @@ select = [ "ARG", "B", "C", + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D205", # 1 blank line required between summary line and description + "D209", # Closing triple quotes go to new line + "D213", # summary lines must be positioned on the second physical line of the docstring + "D417", # Missing argument descriptions in the docstring + "D419", # Docstring is empty "DTZ", "E", "EM", @@ -136,10 +143,10 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, and relative imports -"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"] +"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"] # Examples can print their output -"examples/**" = ["T201", "ANN"] -"example/**" = ["T201", "ANN"] +"examples/**" = ["T201", "ANN", "D"] +"example/**" = ["T201", "ANN", "D"] "tests/**" = ["T201"] [tool.coverage.run] diff --git a/integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/langfuse_connector.py b/integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/langfuse_connector.py index 08437899c1..95e3e3ab7d 100644 --- a/integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/langfuse_connector.py +++ b/integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/langfuse_connector.py @@ -19,6 +19,7 @@ class LangfuseConnector: """ LangfuseConnector connects Haystack LLM framework with [Langfuse](https://langfuse.com) in order to enable the + tracing of operations and data flow within various components of a pipeline. To use LangfuseConnector, add it to your pipeline without connecting it to any other components. diff --git a/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py b/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py index 5e75e9ea2a..c02376e4be 100644 --- a/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py +++ b/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py @@ -125,6 +125,7 @@ def get_data(self) -> dict[str, Any]: return self._data def get_correlation_data_for_logs(self) -> dict[str, Any]: + """Return correlation data for log enrichment.""" return {} @@ -234,9 +235,11 @@ def handle(self, span: LangfuseSpan, component_type: str | None) -> None: @classmethod def from_dict(cls, data: dict[str, Any]) -> "SpanHandler": + """Deserialize a SpanHandler from a dictionary.""" return default_from_dict(cls, data) def to_dict(self) -> dict[str, Any]: + """Serialize this SpanHandler to a dictionary.""" return default_to_dict(self) @@ -273,6 +276,7 @@ class DefaultSpanHandler(SpanHandler): """DefaultSpanHandler provides the default Langfuse tracing behavior for Haystack.""" def create_span(self, context: SpanContext) -> LangfuseSpan: + """Create a Langfuse span based on the given context.""" if self.tracer is None: message = ( "Tracer is not initialized. " @@ -343,6 +347,7 @@ def create_span(self, context: SpanContext) -> LangfuseSpan: return LangfuseSpan(self.tracer.start_as_current_span(name=context.name)) def handle(self, span: LangfuseSpan, component_type: str | None) -> None: + """Process and enrich a span after component execution.""" # If the span is at the pipeline level, we add input and output keys to the span at_pipeline_level = span.get_data().get(_PIPELINE_INPUT_KEY) is not None if at_pipeline_level: @@ -456,6 +461,7 @@ def __init__( def trace( self, operation_name: str, tags: dict[str, Any] | None = None, parent_span: Span | None = None ) -> Iterator[Span]: + """Create and manage a tracing span as a context manager.""" tags = tags or {} span_name = tags.get(_COMPONENT_NAME_KEY, operation_name) component_type = tags.get(_COMPONENT_TYPE_KEY) @@ -543,6 +549,7 @@ def trace( self.flush() def flush(self) -> None: + """Flush all pending spans to Langfuse.""" self._tracer.flush() def current_span(self) -> Span | None: @@ -558,6 +565,7 @@ def current_span(self) -> Span | None: def get_trace_url(self) -> str: """ Return the URL to the tracing data. + :return: The URL to the tracing data. """ return self._tracer.get_trace_url() or "" @@ -565,6 +573,7 @@ def get_trace_url(self) -> str: def get_trace_id(self) -> str: """ Return the trace ID. + :return: The trace ID. """ return self._tracer.get_current_trace_id() or "" diff --git a/integrations/lara/pyproject.toml b/integrations/lara/pyproject.toml index ed8cff8991..cdd6bd31f9 100644 --- a/integrations/lara/pyproject.toml +++ b/integrations/lara/pyproject.toml @@ -95,6 +95,13 @@ select = [ "ARG", "B", "C", + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D205", # 1 blank line required between summary line and description + "D209", # Closing triple quotes go to new line + "D213", # summary lines must be positioned on the second physical line of the docstring + "D417", # Missing argument descriptions in the docstring + "D419", # Docstring is empty "DTZ", "E", "EM", @@ -149,7 +156,7 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, and relative imports -"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"] +"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"] [tool.coverage.run] source = ["haystack_integrations"] diff --git a/integrations/llama_cpp/pyproject.toml b/integrations/llama_cpp/pyproject.toml index 9515d3b4c3..f7870caf85 100644 --- a/integrations/llama_cpp/pyproject.toml +++ b/integrations/llama_cpp/pyproject.toml @@ -99,6 +99,13 @@ select = [ "ARG", "B", "C", + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D205", # 1 blank line required between summary line and description + "D209", # Closing triple quotes go to new line + "D213", # summary lines must be positioned on the second physical line of the docstring + "D417", # Missing argument descriptions in the docstring + "D419", # Docstring is empty "DTZ", "E", "EM", @@ -142,7 +149,7 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, and relative imports -"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"] +"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"] # Examples can print their output "examples/**" = ["T201"] "tests/**" = ["T201"] diff --git a/integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/chat/chat_generator.py b/integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/chat/chat_generator.py index 703c71ace7..82788b13ff 100644 --- a/integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/chat/chat_generator.py +++ b/integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/chat/chat_generator.py @@ -211,8 +211,11 @@ def __init__( model_clip_path: str | None = None, ) -> None: """ + Initialize LlamaCppChatGenerator. + :param model: The path of a quantized model for text generation, for example, "zephyr-7b-beta.Q4_0.gguf". If the model path is also specified in the `model_kwargs`, this parameter will be ignored. + :param n_ctx: The number of tokens in the context. When set to 0, the context will be taken from the model. :param n_batch: Prompt processing maximum batch size. :param model_kwargs: Dictionary containing keyword arguments used to initialize the LLM for text generation. @@ -274,6 +277,7 @@ def __init__( self._inference_lock = asyncio.Lock() def warm_up(self) -> None: + """Load and initialize the llama.cpp model.""" if self._model is not None: return @@ -462,6 +466,7 @@ def _handle_streaming_response( ) -> dict[str, list[ChatMessage]]: """ Take streaming responses from llama.cpp, convert to Haystack StreamingChunk objects, stream them, + and finally convert them to a ChatMessage. :param response_stream: The streaming responses from llama.cpp. diff --git a/integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/generator.py b/integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/generator.py index 719204bf01..cf53ef2f26 100644 --- a/integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/generator.py +++ b/integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/generator.py @@ -38,8 +38,11 @@ def __init__( generation_kwargs: dict[str, Any] | None = None, ) -> None: """ + Initialize LlamaCppGenerator. + :param model: The path of a quantized model for text generation, for example, "zephyr-7b-beta.Q4_0.gguf". If the model path is also specified in the `model_kwargs`, this parameter will be ignored. + :param n_ctx: The number of tokens in the context. When set to 0, the context will be taken from the model. :param n_batch: Prompt processing maximum batch size. :param model_kwargs: Dictionary containing keyword arguments used to initialize the LLM for text generation. @@ -69,6 +72,7 @@ def __init__( self.model: Llama | None = None def warm_up(self) -> None: + """Load and initialize the llama.cpp model.""" if self.model is None: self.model = Llama(**self.model_kwargs) diff --git a/integrations/llama_stack/pyproject.toml b/integrations/llama_stack/pyproject.toml index 75464dcc64..6a38f85ecc 100644 --- a/integrations/llama_stack/pyproject.toml +++ b/integrations/llama_stack/pyproject.toml @@ -84,6 +84,13 @@ select = [ "ARG", "B", "C", + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D205", # 1 blank line required between summary line and description + "D209", # Closing triple quotes go to new line + "D213", # summary lines must be positioned on the second physical line of the docstring + "D417", # Missing argument descriptions in the docstring + "D419", # Docstring is empty "DTZ", "E", "EM", @@ -137,7 +144,7 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, and relative imports -"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN", "E501", "F841"] +"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN", "E501", "F841"] # Examples can print their output and don't need type annotations "examples/**/*" = ["T201", "ANN"] diff --git a/integrations/llama_stack/src/haystack_integrations/components/generators/llama_stack/chat/chat_generator.py b/integrations/llama_stack/src/haystack_integrations/components/generators/llama_stack/chat/chat_generator.py index c8224b1442..11fb29b2cb 100644 --- a/integrations/llama_stack/src/haystack_integrations/components/generators/llama_stack/chat/chat_generator.py +++ b/integrations/llama_stack/src/haystack_integrations/components/generators/llama_stack/chat/chat_generator.py @@ -20,6 +20,7 @@ class LlamaStackChatGenerator(OpenAIChatGenerator): """ Enables text generation using Llama Stack framework. + Llama Stack Server supports multiple inference providers, including Ollama, Together, and vLLM and other cloud providers. For a complete list of inference providers, see [Llama Stack docs](https://llama-stack.readthedocs.io/en/latest/providers/inference/index.html). @@ -70,8 +71,10 @@ def __init__( http_client_kwargs: dict[str, Any] | None = None, ) -> None: """ - Creates an instance of LlamaStackChatGenerator. To use this chat generator, - you need to setup Llama Stack Server with an inference provider and have a model available. + Creates an instance of LlamaStackChatGenerator. + + To use this chat generator, you need to setup Llama Stack Server with an inference provider and have a model + available. :param model: The name of the model to use for chat completion. diff --git a/integrations/mcp/pyproject.toml b/integrations/mcp/pyproject.toml index e393086dd7..3dee9bfaa4 100644 --- a/integrations/mcp/pyproject.toml +++ b/integrations/mcp/pyproject.toml @@ -99,6 +99,13 @@ select = [ "ARG", "B", "C", + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D205", # 1 blank line required between summary line and description + "D209", # Closing triple quotes go to new line + "D213", # summary lines must be positioned on the second physical line of the docstring + "D417", # Missing argument descriptions in the docstring + "D419", # Docstring is empty "DTZ", "E", "EM", @@ -153,8 +160,8 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, and relative imports -"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"] -"examples/**/*" = ["T201", "E501", "ANN"] +"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"] +"examples/**/*" = ["T201", "E501", "ANN", "D"] [tool.coverage.run] source = ["haystack_integrations"] diff --git a/integrations/mcp/src/haystack_integrations/tools/mcp/mcp_tool.py b/integrations/mcp/src/haystack_integrations/tools/mcp/mcp_tool.py index fca0d65b38..33a465bae5 100644 --- a/integrations/mcp/src/haystack_integrations/tools/mcp/mcp_tool.py +++ b/integrations/mcp/src/haystack_integrations/tools/mcp/mcp_tool.py @@ -119,8 +119,7 @@ def run_background( self, coro_factory: Callable[[asyncio.Event], Coroutine[Any, Any, Any]], timeout: float | None = None ) -> tuple[concurrent.futures.Future[Any], asyncio.Event]: """ - Schedule `coro_factory` to run in the executor's event loop **without** blocking the - caller thread. + Schedule `coro_factory` to run in the executor's event loop **without** blocking the caller thread. The factory receives an :class:`asyncio.Event` that can be used to cooperatively shut the coroutine down. The method returns **both** the concurrent future (to observe @@ -1292,7 +1291,8 @@ def __del__(self) -> None: class _MCPClientSessionManager: - """Runs an MCPClient connect/close inside the AsyncExecutor's event loop. + """ + Runs an MCPClient connect/close inside the AsyncExecutor's event loop. Life-cycle: 1. Create the worker to schedule a long-running coroutine in the diff --git a/integrations/mcp/src/haystack_integrations/tools/mcp/mcp_toolset.py b/integrations/mcp/src/haystack_integrations/tools/mcp/mcp_toolset.py index ba7cd71782..226ccd483d 100644 --- a/integrations/mcp/src/haystack_integrations/tools/mcp/mcp_toolset.py +++ b/integrations/mcp/src/haystack_integrations/tools/mcp/mcp_toolset.py @@ -113,8 +113,7 @@ def _deserialize_state_config(config: dict[str, dict[str, Any]] | None) -> dict[ class MCPToolset(Toolset): """ - A Toolset that connects to an MCP (Model Context Protocol) server and provides - access to its tools. + A Toolset that connects to an MCP (Model Context Protocol) server and provides access to its tools. MCPToolset dynamically discovers and loads all tools from any MCP-compliant server, supporting both network-based streaming connections (Streamable HTTP, SSE) and local @@ -289,7 +288,8 @@ def __init__( self._warmup_called = True def warm_up(self) -> None: - """Connect and load tools when eager_connect is turned off. + """ + Connect and load tools when eager_connect is turned off. This method is automatically called by ``ToolInvoker.warm_up()`` and ``Pipeline.warm_up()``. You can also call it directly before using the toolset to ensure all tool schemas diff --git a/integrations/meta_llama/pyproject.toml b/integrations/meta_llama/pyproject.toml index ef52039b90..c7829c8a24 100644 --- a/integrations/meta_llama/pyproject.toml +++ b/integrations/meta_llama/pyproject.toml @@ -84,6 +84,13 @@ select = [ "ARG", "B", "C", + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D205", # 1 blank line required between summary line and description + "D209", # Closing triple quotes go to new line + "D213", # summary lines must be positioned on the second physical line of the docstring + "D417", # Missing argument descriptions in the docstring + "D419", # Docstring is empty "DTZ", "E", "EM", @@ -133,7 +140,7 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, and relative imports -"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"] +"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"] [tool.coverage.run] source = ["haystack_integrations"] diff --git a/integrations/meta_llama/src/haystack_integrations/components/generators/meta_llama/chat/chat_generator.py b/integrations/meta_llama/src/haystack_integrations/components/generators/meta_llama/chat/chat_generator.py index 8d8fcaa6aa..65ba2550b9 100644 --- a/integrations/meta_llama/src/haystack_integrations/components/generators/meta_llama/chat/chat_generator.py +++ b/integrations/meta_llama/src/haystack_integrations/components/generators/meta_llama/chat/chat_generator.py @@ -21,6 +21,7 @@ class MetaLlamaChatGenerator(OpenAIChatGenerator): """ Enables text generation using Llama generative models. + For supported models, see [Llama API Docs](https://llama.developer.meta.com/docs/). Users can pass any text generation parameters valid for the Llama Chat Completion API @@ -76,8 +77,9 @@ def __init__( tools: ToolsType | None = None, ) -> None: """ - Creates an instance of LlamaChatGenerator. Unless specified otherwise in the `model`, this is for Llama's - `Llama-4-Scout-17B-16E-Instruct-FP8` model. + Creates an instance of LlamaChatGenerator. + + Unless specified otherwise in the `model`, this is for Llama's `Llama-4-Scout-17B-16E-Instruct-FP8` model. :param api_key: The Llama API key. diff --git a/integrations/mistral/pyproject.toml b/integrations/mistral/pyproject.toml index 947b1e4be8..aa86633158 100644 --- a/integrations/mistral/pyproject.toml +++ b/integrations/mistral/pyproject.toml @@ -87,6 +87,13 @@ select = [ "ARG", "B", "C", + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D205", # 1 blank line required between summary line and description + "D209", # Closing triple quotes go to new line + "D213", # summary lines must be positioned on the second physical line of the docstring + "D417", # Missing argument descriptions in the docstring + "D419", # Docstring is empty "DTZ", "E", "EM", @@ -136,7 +143,7 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, and relative imports -"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"] +"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"] [tool.coverage.run] source = ["haystack_integrations"] diff --git a/integrations/mistral/src/haystack_integrations/components/converters/mistral/ocr_document_converter.py b/integrations/mistral/src/haystack_integrations/components/converters/mistral/ocr_document_converter.py index bedb6aafba..f1439b11eb 100644 --- a/integrations/mistral/src/haystack_integrations/components/converters/mistral/ocr_document_converter.py +++ b/integrations/mistral/src/haystack_integrations/components/converters/mistral/ocr_document_converter.py @@ -26,8 +26,9 @@ @component class MistralOCRDocumentConverter: """ - This component extracts text from documents using Mistral's OCR API, with optional structured - annotations for both individual image regions (bounding boxes) and full documents. + Extract text from documents using Mistral's OCR API with optional structured annotations. + + Supports optional structured annotations for individual image regions (bounding boxes) and full documents. Accepts document sources in various formats (str/Path for local files, ByteStream for in-memory data, DocumentURLChunk for document URLs, ImageURLChunk for image URLs, or FileChunk for Mistral file IDs) diff --git a/integrations/mistral/src/haystack_integrations/components/embedders/mistral/document_embedder.py b/integrations/mistral/src/haystack_integrations/components/embedders/mistral/document_embedder.py index 709798078b..6c4f0eb632 100644 --- a/integrations/mistral/src/haystack_integrations/components/embedders/mistral/document_embedder.py +++ b/integrations/mistral/src/haystack_integrations/components/embedders/mistral/document_embedder.py @@ -12,6 +12,7 @@ class MistralDocumentEmbedder(OpenAIDocumentEmbedder): """ A component for computing Document embeddings using Mistral models. + The embedding of each Document is stored in the `embedding` field of the Document. Usage example: diff --git a/integrations/mistral/src/haystack_integrations/components/generators/mistral/chat/chat_generator.py b/integrations/mistral/src/haystack_integrations/components/generators/mistral/chat/chat_generator.py index 374adfe379..7db80e5ae2 100644 --- a/integrations/mistral/src/haystack_integrations/components/generators/mistral/chat/chat_generator.py +++ b/integrations/mistral/src/haystack_integrations/components/generators/mistral/chat/chat_generator.py @@ -20,6 +20,7 @@ class MistralChatGenerator(OpenAIChatGenerator): """ Enables text generation using Mistral AI generative models. + For supported models, see [Mistral AI docs](https://docs.mistral.ai/getting-started/models). Users can pass any text generation parameters valid for the Mistral Chat Completion API @@ -124,8 +125,9 @@ def __init__( http_client_kwargs: dict[str, Any] | None = None, ) -> None: """ - Creates an instance of MistralChatGenerator. Unless specified otherwise in the `model`, this is for Mistral's - `mistral-small-latest` model. + Creates an instance of MistralChatGenerator. + + Unless specified otherwise in the `model`, this is for Mistral's `mistral-small-latest` model. :param api_key: The Mistral API key. diff --git a/integrations/mongodb_atlas/pyproject.toml b/integrations/mongodb_atlas/pyproject.toml index e65c02ae9c..9035240878 100644 --- a/integrations/mongodb_atlas/pyproject.toml +++ b/integrations/mongodb_atlas/pyproject.toml @@ -89,6 +89,13 @@ select = [ "ARG", "B", "C", + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D205", # 1 blank line required between summary line and description + "D209", # Closing triple quotes go to new line + "D213", # summary lines must be positioned on the second physical line of the docstring + "D417", # Missing argument descriptions in the docstring + "D419", # Docstring is empty "DTZ", "E", "EM", @@ -140,7 +147,7 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, and relative imports -"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"] +"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"] # examples can contain "print" commands "examples/**/*" = ["T201"] diff --git a/integrations/mongodb_atlas/src/haystack_integrations/components/retrievers/mongodb_atlas/embedding_retriever.py b/integrations/mongodb_atlas/src/haystack_integrations/components/retrievers/mongodb_atlas/embedding_retriever.py index 55e08f7d91..a562ea75b6 100644 --- a/integrations/mongodb_atlas/src/haystack_integrations/components/retrievers/mongodb_atlas/embedding_retriever.py +++ b/integrations/mongodb_atlas/src/haystack_integrations/components/retrievers/mongodb_atlas/embedding_retriever.py @@ -142,8 +142,7 @@ async def run_async( top_k: int | None = None, ) -> dict[str, list[Document]]: """ - Asynchronously retrieve documents from the MongoDBAtlasDocumentStore, based on the provided embedding - similarity. + Asynchronously retrieve documents from MongoDBAtlasDocumentStore based on embedding similarity. :param query_embedding: Embedding of the query. :param filters: Filters applied to the retrieved Documents. The way runtime filters are applied depends on diff --git a/integrations/mongodb_atlas/src/haystack_integrations/components/retrievers/mongodb_atlas/full_text_retriever.py b/integrations/mongodb_atlas/src/haystack_integrations/components/retrievers/mongodb_atlas/full_text_retriever.py index 9a370fc6d0..9e5ab5bc0f 100644 --- a/integrations/mongodb_atlas/src/haystack_integrations/components/retrievers/mongodb_atlas/full_text_retriever.py +++ b/integrations/mongodb_atlas/src/haystack_integrations/components/retrievers/mongodb_atlas/full_text_retriever.py @@ -49,6 +49,7 @@ def __init__( ) -> None: """ :param document_store: An instance of MongoDBAtlasDocumentStore. + :param filters: Filters applied to the retrieved Documents. Make sure that the fields used in the filters are included in the configuration of the `full_text_search_index`. The configuration must be done manually in the Web UI of MongoDB Atlas. diff --git a/integrations/mongodb_atlas/src/haystack_integrations/document_stores/mongodb_atlas/document_store.py b/integrations/mongodb_atlas/src/haystack_integrations/document_stores/mongodb_atlas/document_store.py index 73597212fa..f254f555e1 100644 --- a/integrations/mongodb_atlas/src/haystack_integrations/document_stores/mongodb_atlas/document_store.py +++ b/integrations/mongodb_atlas/src/haystack_integrations/document_stores/mongodb_atlas/document_store.py @@ -23,8 +23,7 @@ class MongoDBAtlasDocumentStore: """ - A MongoDBAtlasDocumentStore implementation that uses the - [MongoDB Atlas](https://www.mongodb.com/atlas/database) service that is easy to deploy, operate, and scale. + A MongoDBAtlasDocumentStore backed by [MongoDB Atlas](https://www.mongodb.com/atlas/database). To connect to MongoDB Atlas, you need to provide a connection string in the format: `"mongodb+srv://{mongo_atlas_username}:{mongo_atlas_password}@{mongo_atlas_host}/?{mongo_atlas_params_string}"`. @@ -120,6 +119,7 @@ def __del__(self) -> None: @property def connection(self) -> AsyncMongoClient | MongoClient: + """Return the active MongoDB client connection.""" if self._connection: return self._connection if self._connection_async: @@ -129,6 +129,7 @@ def connection(self) -> AsyncMongoClient | MongoClient: @property def collection(self) -> AsyncCollection | Collection: + """Return the active MongoDB collection.""" if self._collection: return self._collection if self._collection_async: @@ -352,8 +353,7 @@ async def count_unique_metadata_by_filter_async( self, filters: dict[str, Any], metadata_fields: list[str] ) -> dict[str, int]: """ - Asynchronously applies a filter selecting documents and counts the unique values for each meta field of the - matched documents. + Asynchronously applies a filter selecting documents and counts unique metadata values for each meta field. :param filters: The filters to apply to the document list. :param metadata_fields: The metadata fields to count unique values for. @@ -538,8 +538,7 @@ async def get_metadata_field_unique_values_async( self, metadata_field: str, search_term: str | None = None, from_: int = 0, size: int = 10 ) -> tuple[list[str], int]: """ - Asynchronously retrieves unique values for a field matching a search_term or all possible values if no search - term is given. + Asynchronously retrieves unique values for a metadata field, optionally filtered by a search term. :param metadata_field: The metadata field to retrieve unique values for. :param search_term: The search term to filter values. Matches as a case-insensitive substring. @@ -972,8 +971,7 @@ async def _embedding_retrieval_async( self, query_embedding: list[float], filters: dict[str, Any] | None = None, top_k: int = 10 ) -> list[Document]: """ - Asynchronously find the documents that are most similar to the provided `query_embedding` by using a vector - similarity metric. + Asynchronously find the documents most similar to the provided `query_embedding` using vector similarity. :param query_embedding: Embedding of the query :param filters: Optional filters. diff --git a/integrations/nvidia/pyproject.toml b/integrations/nvidia/pyproject.toml index 1271b386f9..8e844b3af0 100644 --- a/integrations/nvidia/pyproject.toml +++ b/integrations/nvidia/pyproject.toml @@ -91,6 +91,13 @@ select = [ "ARG", "B", "C", + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D205", # 1 blank line required between summary line and description + "D209", # Closing triple quotes go to new line + "D213", # summary lines must be positioned on the second physical line of the docstring + "D417", # Missing argument descriptions in the docstring + "D419", # Docstring is empty "DTZ", "E", "EM", @@ -140,7 +147,7 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, and relative imports -"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"] +"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"] [tool.coverage.run] source = ["haystack_integrations"] diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py index 1da245fa07..f94ac64e4d 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py @@ -20,8 +20,7 @@ @component class NvidiaDocumentEmbedder: """ - A component for embedding documents using embedding models provided by - [NVIDIA NIMs](https://ai.nvidia.com). + A component for embedding documents using embedding models provided by [NVIDIA NIMs](https://ai.nvidia.com). Usage example: ```python @@ -107,6 +106,7 @@ def __init__( @classmethod def class_name(cls) -> str: + """Return the class name identifier for serialization.""" return "NvidiaDocumentEmbedder" def default_model(self) -> None: diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py index 474799f2cb..609d403d3b 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py @@ -18,8 +18,7 @@ @component class NvidiaTextEmbedder: """ - A component for embedding strings using embedding models provided by - [NVIDIA NIMs](https://ai.nvidia.com). + A component for embedding strings using embedding models provided by [NVIDIA NIMs](https://ai.nvidia.com). For models that differentiate between query and document inputs, this component embeds the input string as a query. @@ -90,6 +89,7 @@ def __init__( @classmethod def class_name(cls) -> str: + """Return the class name identifier for serialization.""" return "NvidiaTextEmbedder" def default_model(self) -> None: diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/truncate.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/truncate.py index 8b24eaaa48..11322c9311 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/truncate.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/truncate.py @@ -8,6 +8,7 @@ class EmbeddingTruncateMode(Enum): """ Specifies how inputs to the NVIDIA embedding components are truncated. + If START, the input will be truncated from the start. If END, the input will be truncated from the end. If NONE, an error will be returned (if the input is too long). diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/chat/chat_generator.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/chat/chat_generator.py index 2a3fdf5272..620d456439 100644 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/chat/chat_generator.py +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/chat/chat_generator.py @@ -21,6 +21,7 @@ class NvidiaChatGenerator(OpenAIChatGenerator): """ Enables text generation using NVIDIA generative models. + For supported models, see [NVIDIA Docs](https://build.nvidia.com/models). Users can pass any text generation parameters valid for the NVIDIA Chat Completion API diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py index 822c151acd..4f829a4389 100644 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py @@ -15,8 +15,9 @@ @component class NvidiaGenerator: """ - Generates text using generative models hosted with - [NVIDIA NIM](https://ai.nvidia.com) on the [NVIDIA API Catalog](https://build.nvidia.com/explore/discover). + Generates text using generative models hosted with [NVIDIA NIM](https://ai.nvidia.com). + + Available via the [NVIDIA API Catalog](https://build.nvidia.com/explore/discover). ### Usage example @@ -88,6 +89,7 @@ def __init__( @classmethod def class_name(cls) -> str: + """Return the class name identifier for serialization.""" return "NvidiaGenerator" def default_model(self) -> None: diff --git a/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/ranker.py b/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/ranker.py index 1d698de21f..64530d2741 100644 --- a/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/ranker.py +++ b/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/ranker.py @@ -18,8 +18,7 @@ @component class NvidiaRanker: """ - A component for ranking documents using ranking models provided by - [NVIDIA NIMs](https://ai.nvidia.com). + A component for ranking documents using ranking models provided by [NVIDIA NIMs](https://ai.nvidia.com). Usage example: ```python @@ -120,6 +119,7 @@ def __init__( @classmethod def class_name(cls) -> str: + """Return the class name identifier for serialization.""" return "NvidiaRanker" def to_dict(self) -> dict[str, Any]: diff --git a/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/truncate.py b/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/truncate.py index ec554e451a..0d21a08441 100644 --- a/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/truncate.py +++ b/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/truncate.py @@ -8,6 +8,7 @@ class RankerTruncateMode(str, Enum): """ Specifies how inputs to the NVIDIA ranker components are truncated. + If NONE, the input will not be truncated and an error returned instead. If END, the input will be truncated from the end. """ diff --git a/integrations/nvidia/src/haystack_integrations/utils/nvidia/models.py b/integrations/nvidia/src/haystack_integrations/utils/nvidia/models.py index ee7d5e883c..6fe0cb060a 100644 --- a/integrations/nvidia/src/haystack_integrations/utils/nvidia/models.py +++ b/integrations/nvidia/src/haystack_integrations/utils/nvidia/models.py @@ -36,6 +36,7 @@ def __hash__(self) -> int: return hash(self.id) def validate(self) -> int: + """Validate the model against the backend and return a sort key.""" if self.client: client = self.client if isinstance(self.client, Client) else Client.from_str(self.client) supported = { diff --git a/integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py b/integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py index eb9e5197b9..a1033d1135 100644 --- a/integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py +++ b/integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py @@ -74,6 +74,7 @@ def __init__( self.timeout = timeout def embed(self, texts: list[str]) -> tuple[list[list[float]], dict[str, Any]]: + """Compute embeddings for a list of texts via the NIM API.""" url = f"{self.api_url}/embeddings" try: @@ -99,6 +100,7 @@ def embed(self, texts: list[str]) -> tuple[list[list[float]], dict[str, Any]]: return embeddings, {"usage": data["usage"]} def generate(self, prompt: str) -> tuple[list[str], list[dict[str, Any]]]: + """Generate text completions for a prompt via the NIM API.""" # We're using the chat completion endpoint as the NIM API doesn't support # the /completions endpoint. So both the non-chat and chat generator will use this. # This is the same for local containers and the cloud API. @@ -152,6 +154,7 @@ def generate(self, prompt: str) -> tuple[list[str], list[dict[str, Any]]]: return replies, meta def models(self) -> list[Model]: + """Retrieve available models from the NIM API.""" url = f"{self.api_url}/models" res = self.session.get( @@ -175,6 +178,7 @@ def models(self) -> list[Model]: return models def rank(self, query_text: str, document_texts: list[str]) -> list[dict[str, Any]]: + """Rank documents by relevance to a query via the NIM API.""" url = self.api_url try: diff --git a/integrations/nvidia/src/haystack_integrations/utils/nvidia/utils.py b/integrations/nvidia/src/haystack_integrations/utils/nvidia/utils.py index 402bd95763..128769a908 100644 --- a/integrations/nvidia/src/haystack_integrations/utils/nvidia/utils.py +++ b/integrations/nvidia/src/haystack_integrations/utils/nvidia/utils.py @@ -49,6 +49,7 @@ def is_hosted(api_url: str) -> bool: def lookup_model(name: str) -> Model | None: """ Lookup a model by name, using only the table of known models. + The name is either: - directly in the table - an alias in the table @@ -66,8 +67,7 @@ def lookup_model(name: str) -> Model | None: def determine_model(name: str) -> Model | None: """ - Determine the model to use based on a name, using - only the table of known models. + Determine the model to use based on a name, using only the table of known models. Raise a warning if the model is found to be an alias of a known model. diff --git a/integrations/ollama/pyproject.toml b/integrations/ollama/pyproject.toml index 31b6839245..c4b788569c 100644 --- a/integrations/ollama/pyproject.toml +++ b/integrations/ollama/pyproject.toml @@ -94,6 +94,13 @@ select = [ "ARG", "B", "C", + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D205", # 1 blank line required between summary line and description + "D209", # Closing triple quotes go to new line + "D213", # summary lines must be positioned on the second physical line of the docstring + "D417", # Missing argument descriptions in the docstring + "D419", # Docstring is empty "DTZ", "E", "EM", @@ -137,7 +144,7 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, and relative imports -"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"] +"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"] # Examples can print their output "examples/**" = ["T201"] diff --git a/integrations/ollama/src/haystack_integrations/components/embedders/ollama/document_embedder.py b/integrations/ollama/src/haystack_integrations/components/embedders/ollama/document_embedder.py index 229270e54d..a69294b8cb 100644 --- a/integrations/ollama/src/haystack_integrations/components/embedders/ollama/document_embedder.py +++ b/integrations/ollama/src/haystack_integrations/components/embedders/ollama/document_embedder.py @@ -10,8 +10,9 @@ @component class OllamaDocumentEmbedder: """ - Computes the embeddings of a list of Documents and stores the obtained vectors in the embedding field of each - Document. It uses embedding models compatible with the Ollama Library. + Computes the embeddings of a list of Documents and stores the obtained vectors in each Document's embedding field. + + It uses embedding models compatible with the Ollama Library. Usage example: ```python @@ -41,8 +42,11 @@ def __init__( batch_size: int = 32, ) -> None: """ + Create a new OllamaDocumentEmbedder instance. + :param model: The name of the model to use. The model should be available in the running Ollama instance. + :param url: The URL of a running Ollama instance. :param generation_kwargs: diff --git a/integrations/ollama/src/haystack_integrations/components/embedders/ollama/text_embedder.py b/integrations/ollama/src/haystack_integrations/components/embedders/ollama/text_embedder.py index b56dfa7790..8a87d9a145 100644 --- a/integrations/ollama/src/haystack_integrations/components/embedders/ollama/text_embedder.py +++ b/integrations/ollama/src/haystack_integrations/components/embedders/ollama/text_embedder.py @@ -8,8 +8,9 @@ @component class OllamaTextEmbedder: """ - Computes the embeddings of a list of Documents and stores the obtained vectors in the embedding field of - each Document. It uses embedding models compatible with the Ollama Library. + Computes the embeddings of a list of Documents and stores the obtained vectors in each Document's embedding field. + + It uses embedding models compatible with the Ollama Library. Usage example: ```python @@ -30,6 +31,8 @@ def __init__( keep_alive: float | str | None = None, ) -> None: """ + Create a new OllamaTextEmbedder instance. + :param model: The name of the model to use. The model should be available in the running Ollama instance. :param url: diff --git a/integrations/ollama/src/haystack_integrations/components/generators/ollama/chat/chat_generator.py b/integrations/ollama/src/haystack_integrations/components/generators/ollama/chat/chat_generator.py index cd72010cb8..e98375fcbe 100644 --- a/integrations/ollama/src/haystack_integrations/components/generators/ollama/chat/chat_generator.py +++ b/integrations/ollama/src/haystack_integrations/components/generators/ollama/chat/chat_generator.py @@ -104,6 +104,7 @@ def _convert_chatmessage_to_ollama_format(message: ChatMessage) -> dict[str, Any def _convert_ollama_meta_to_openai_format(input_response_dict: dict) -> dict[str, Any]: """ Map Ollama metadata keys onto the OpenAI-compatible names Haystack expects. + All fields that are not part of the OpenAI metadata are left unchanged in the returned dict. Example Ollama metadata: @@ -253,6 +254,8 @@ def __init__( think: bool | Literal["low", "medium", "high"] = False, ) -> None: """ + Create a new OllamaChatGenerator instance. + :param model: The name of the model to use. The model must already be present (pulled) in the running Ollama instance. :param url: @@ -355,9 +358,9 @@ def _handle_streaming_response( callback: SyncStreamingCallbackT | None, ) -> dict[str, list[ChatMessage]]: """ - Merge an Ollama streaming response into a single ChatMessage, preserving - tool calls. Works even when arguments arrive piecemeal as str fragments - or as full JSON dicts. + Merge an Ollama streaming response into a single ChatMessage, preserving tool calls. + + Works even when arguments arrive piecemeal as str fragments or as full JSON dicts. """ component_info = ComponentInfo.from_component(self) @@ -439,9 +442,10 @@ async def _handle_streaming_response_async( callback: AsyncStreamingCallbackT | None, ) -> dict[str, list[ChatMessage]]: """ - Merge an Ollama async streaming response into a single ChatMessage, preserving - tool calls. Works even when arguments arrive piecemeal as str fragments - or as full JSON dicts.""" + Merge an Ollama async streaming response into a single ChatMessage, preserving tool calls. + + Works even when arguments arrive piecemeal as str fragments or as full JSON dicts. + """ component_info = ComponentInfo.from_component(self) chunks: list[StreamingChunk] = [] diff --git a/integrations/ollama/src/haystack_integrations/components/generators/ollama/generator.py b/integrations/ollama/src/haystack_integrations/components/generators/ollama/generator.py index d612e012ea..da4d38dfc4 100644 --- a/integrations/ollama/src/haystack_integrations/components/generators/ollama/generator.py +++ b/integrations/ollama/src/haystack_integrations/components/generators/ollama/generator.py @@ -108,6 +108,8 @@ def __init__( streaming_callback: Callable[[StreamingChunk], None] | None = None, ) -> None: """ + Create a new OllamaGenerator instance. + :param model: The name of the model to use. The model should be available in the running Ollama instance. :param url: