deepset-ai · julian-risch · Mar 24, 2026 · Mar 20, 2026 · Mar 23, 2026 · Mar 24, 2026
@@ -85,6 +85,13 @@ select = [
   "ARG",
   "B",
   "C",
+  "D102",   # Missing docstring in public method
+  "D103",   # Missing docstring in public function
+  "D205",   # 1 blank line required between summary line and description
+  "D209",   # Closing triple quotes go to new line
+  "D213",   # summary lines must be positioned on the second physical line of the docstring
+  "D417",   # Missing argument descriptions in the docstring
+  "D419",   # Docstring is empty
   "DTZ",
   "E",
   "EM",
@@ -134,9 +141,9 @@ ban-relative-imports = "parents"
 
 [tool.ruff.lint.per-file-ignores]
 # Tests can use magic values, assertions, and relative imports
-"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"]
+"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"]
 # Examples can print their output and don't need type annotations
-"examples/**/*" = ["T201", "ANN"]
+"examples/**/*" = ["D", "T201", "ANN"]
 
 [tool.coverage.run]
 source = ["haystack_integrations"]

@@ -18,6 +18,7 @@
 class OpenRouterChatGenerator(OpenAIChatGenerator):
     """
     Enables text generation using OpenRouter generative models.
+
     For supported models, see [OpenRouter docs](https://openrouter.ai/models).
 
     Users can pass any text generation parameters valid for the OpenRouter chat completion API
@@ -71,8 +72,7 @@ def __init__(
         http_client_kwargs: dict[str, Any] | None = None,
     ) -> None:
         """
-        Creates an instance of OpenRouterChatGenerator. Unless specified otherwise,
-        the default model is `openai/gpt-5-mini`.
+        Creates an instance of OpenRouterChatGenerator.
 
         :param api_key:
             The OpenRouter API key.

@@ -94,6 +94,13 @@ select = [
   "ARG",
   "B",
   "C",
+  "D102",   # Missing docstring in public method
+  "D103",   # Missing docstring in public function
+  "D205",   # 1 blank line required between summary line and description
+  "D209",   # Closing triple quotes go to new line
+  "D213",   # summary lines must be positioned on the second physical line of the docstring
+  "D417",   # Missing argument descriptions in the docstring
+  "D419",   # Docstring is empty
   "DTZ",
   "E",
   "EM",
@@ -145,7 +152,7 @@ ban-relative-imports = "parents"
 
 [tool.ruff.lint.per-file-ignores]
 # Tests can use magic values, assertions, and relative imports
-"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"]
+"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"]
 
 [tool.coverage.run]
 source = ["haystack_integrations"]

@@ -114,8 +114,9 @@ def __init__(
         **kwargs: Any,
     ) -> None:
         """
-        Initialize the OpenSearchHybridRetriever, a super component to retrieve documents from OpenSearch using
-        both embedding-based and keyword-based retrieval methods.
+        Initialize the OpenSearchHybridRetriever using both embedding-based and keyword-based retrieval methods.
+
+        This is a super component to retrieve documents from OpenSearch using both retrieval methods.
 
         We don't explicitly define all the init parameters of the components in the constructor, for each
         of the components, since that would be around 20+ parameters. Instead, we define the most important ones
@@ -242,7 +243,9 @@ def __init__(
 
     if TYPE_CHECKING:
 
-        def warm_up(self) -> None: ...
+        def warm_up(self) -> None:
+            """Warm up the underlying pipeline components."""
+            ...
 
         def run(
             self,
@@ -251,7 +254,9 @@ def run(
             filters_embedding: dict[str, Any] | None = None,
             top_k_bm25: int | None = None,
             top_k_embedding: int | None = None,
-        ) -> dict[str, list[Document]]: ...
+        ) -> dict[str, list[Document]]:
+            """Run the hybrid retrieval pipeline and return retrieved documents."""
+            ...
 
     def _create_pipeline(self, data: dict[str, Any]) -> Pipeline:
         """
@@ -328,6 +333,7 @@ def to_dict(self) -> dict[str, Any]:
 
     @classmethod
     def from_dict(cls, data: dict[str, Any]) -> "OpenSearchHybridRetriever":
+        """Deserialize an OpenSearchHybridRetriever from a dictionary."""
         # deserialize the document store
         doc_store = OpenSearchDocumentStore.from_dict(data["init_parameters"]["document_store"])
         data["init_parameters"]["document_store"] = doc_store

@@ -41,6 +41,7 @@ def _get_aws_session(
 ) -> "boto3.Session":
     """
     Creates an AWS Session with the given parameters.
+
     Checks if the provided AWS credentials are valid and can be used to connect to AWS.
 
     :param aws_access_key_id: AWS access key ID.

@@ -559,6 +559,7 @@ async def write_documents_async(
     def _deserialize_document(hit: dict[str, Any]) -> Document:
         """
         Creates a Document from the search hit provided.
+
         This is mostly useful in self.filter_documents().
         """
         data = hit["_source"]
@@ -1482,6 +1483,7 @@ def _embedding_retrieval(
     ) -> list[Document]:
         """
         Retrieves documents that are most similar to the query embedding using a vector similarity metric.
+
         It uses the OpenSearch's Approximate k-Nearest Neighbors search algorithm.
 
         This method is not meant to be part of the public interface of
@@ -1513,8 +1515,9 @@ async def _embedding_retrieval_async(
         search_kwargs: dict[str, Any] | None = None,
     ) -> list[Document]:
         """
-        Asynchronously retrieves documents that are most similar to the query embedding using a vector similarity
-        metric. It uses the OpenSearch's Approximate k-Nearest Neighbors search algorithm.
+        Asynchronously retrieves documents most similar to the query embedding using a vector similarity metric.
+
+        It uses the OpenSearch's Approximate k-Nearest Neighbors search algorithm.
 
         This method is not meant to be part of the public interface of
         `OpenSearchDocumentStore` nor called directly.
@@ -1641,8 +1644,7 @@ def _extract_distinct_counts_from_aggregations(
 
     def count_unique_metadata_by_filter(self, filters: dict[str, Any], metadata_fields: list[str]) -> dict[str, int]:
         """
-        Returns the number of unique values for each specified metadata field of the documents
-        that match the provided filters.
+        Returns the number of unique values for each specified metadata field of the documents that match the filters.
 
         :param filters: The filters to apply to count documents.
             For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
@@ -1685,8 +1687,7 @@ async def count_unique_metadata_by_filter_async(
         self, filters: dict[str, Any], metadata_fields: list[str]
     ) -> dict[str, int]:
         """
-        Asynchronously returns the number of unique values for each specified metadata field of the documents
-        that match the provided filters.
+        Asynchronously returns the number of unique values for each specified metadata field matching the filters.
 
         :param filters: The filters to apply to count documents.
             For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
@@ -1862,6 +1863,7 @@ def get_metadata_field_unique_values(
     ) -> tuple[list[str], dict[str, Any] | None]:
         """
         Returns unique values for a metadata field, optionally filtered by a search term in the content.
+
         Uses composite aggregations for proper pagination beyond 10k results.
 
         :param metadata_field: The metadata field to get unique values for.
@@ -1927,6 +1929,7 @@ async def get_metadata_field_unique_values_async(
     ) -> tuple[list[str], dict[str, Any] | None]:
         """
         Asynchronously returns unique values for a metadata field, optionally filtered by a search term in the content.
+
         Uses composite aggregations for proper pagination beyond 10k results.
 
         :param metadata_field: The metadata field to get unique values for.

@@ -106,6 +106,13 @@ select = [
   "ARG",
   "B",
   "C",
+  "D102",   # Missing docstring in public method
+  "D103",   # Missing docstring in public function
+  "D205",   # 1 blank line required between summary line and description
+  "D209",   # Closing triple quotes go to new line
+  "D213",   # summary lines must be positioned on the second physical line of the docstring
+  "D417",   # Missing argument descriptions in the docstring
+  "D419",   # Docstring is empty
   "DTZ",
   "E",
   "EM",
@@ -151,9 +158,9 @@ ban-relative-imports = "parents"
 
 [tool.ruff.lint.per-file-ignores]
 # Tests can use magic values, assertions, and relative imports
-"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"]
+"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"]
 # Examples can print their output
-"examples/**" = ["T201"]
+"examples/**" = ["D", "T201"]
 "tests/**" = ["T201"]
 
 [tool.coverage.run]

@@ -11,8 +11,10 @@
 
 class OptimumEmbedderOptimizationMode(Enum):
     """
-    [ONXX Optimization modes](https://huggingface.co/docs/optimum/onnxruntime/usage_guides/optimization)
-    support by the Optimum Embedders.
+    ONNX Optimization modes supported by the Optimum Embedders.
+
+    See [Optimum ONNX optimization docs](https://huggingface.co/docs/optimum/onnxruntime/usage_guides/optimization)
+    for more details.
     """
 
     #: Basic general optimizations.

@@ -17,9 +17,10 @@
 @component
 class OptimumDocumentEmbedder:
     """
-    A component for computing `Document` embeddings using models loaded with the
-    [HuggingFace Optimum](https://huggingface.co/docs/optimum/index) library,
-    leveraging the ONNX runtime for high-speed inference.
+    A component for computing `Document` embeddings using models loaded with the HuggingFace Optimum library.
+
+    Uses the [HuggingFace Optimum](https://huggingface.co/docs/optimum/index) library and leverages the ONNX
+    runtime for high-speed inference.
 
     The embedding of each Document is stored in the `embedding` field of the Document.
 
@@ -199,6 +200,7 @@ def _prepare_texts_to_embed(self, documents: list[Document]) -> list[str]:
     def run(self, documents: list[Document]) -> dict[str, list[Document]]:
         """
         Embed a list of Documents.
+
         The embedding of each Document is stored in the `embedding` field of the Document.
 
         :param documents:

@@ -16,9 +16,10 @@
 @component
 class OptimumTextEmbedder:
     """
-    A component to embed text using models loaded with the
-    [HuggingFace Optimum](https://huggingface.co/docs/optimum/index) library,
-    leveraging the ONNX runtime for high-speed inference.
+    A component to embed text using models loaded with the HuggingFace Optimum library.
+
+    Uses the [HuggingFace Optimum](https://huggingface.co/docs/optimum/index) library and leverages the ONNX
+    runtime for high-speed inference.
 
     Usage example:
     ```python

@@ -11,8 +11,10 @@
 
 class OptimumEmbedderQuantizationMode(Enum):
     """
-    [Dynamic Quantization modes](https://huggingface.co/docs/optimum/onnxruntime/usage_guides/quantization)
-    support by the Optimum Embedders.
+    Dynamic Quantization modes supported by the Optimum Embedders.
+
+    See [Optimum ONNX quantization docs](https://huggingface.co/docs/optimum/onnxruntime/usage_guides/quantization)
+    for more details.
     """
 
     #: Quantization for the ARM64 architecture.

@@ -88,6 +88,13 @@ select = [
   "ARG",
   "B",
   "C",
+  "D102",   # Missing docstring in public method
+  "D103",   # Missing docstring in public function
+  "D205",   # 1 blank line required between summary line and description
+  "D209",   # Closing triple quotes go to new line
+  "D213",   # summary lines must be positioned on the second physical line of the docstring
+  "D417",   # Missing argument descriptions in the docstring
+  "D419",   # Docstring is empty
   "DTZ",
   "E",
   "EM",
@@ -137,7 +144,7 @@ ban-relative-imports = "parents"
 
 [tool.ruff.lint.per-file-ignores]
 # Tests can use magic values, assertions, and relative imports
-"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"]
+"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"]
 
 [tool.coverage.run]
 source = ["haystack_integrations"]

@@ -113,8 +113,7 @@ def _normalize_file_type(file_type: FileTypeInput) -> FileType | None:
 @component
 class PaddleOCRVLDocumentConverter:
     """
-    This component extracts text from documents using PaddleOCR's large model
-    document parsing API.
+    Extracts text from documents using PaddleOCR's large model document parsing API.
 
     PaddleOCR-VL is used behind the scenes. For more information, please
     refer to:

@@ -96,6 +96,13 @@ select = [
   "ARG",
   "B",
   "C",
+  "D102",   # Missing docstring in public method
+  "D103",   # Missing docstring in public function
+  "D205",   # 1 blank line required between summary line and description
+  "D209",   # Closing triple quotes go to new line
+  "D213",   # summary lines must be positioned on the second physical line of the docstring
+  "D417",   # Missing argument descriptions in the docstring
+  "D419",   # Docstring is empty
   "DTZ",
   "E",
   "EM",
@@ -149,9 +156,9 @@ ban-relative-imports = "parents"
 
 [tool.ruff.lint.per-file-ignores]
 # Tests can use magic values, assertions, and relative imports
-"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"]
+"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"]
 # examples can contain "print" commands
-"examples/**/*" = ["T201"]
+"examples/**/*" = ["D", "T201"]
 
 
 [tool.coverage.run]

@@ -68,6 +68,8 @@ def __init__(
         filter_policy: str | FilterPolicy = FilterPolicy.REPLACE,
     ) -> None:
         """
+        Initialize the PgvectorEmbeddingRetriever.
+
         :param document_store: An instance of `PgvectorDocumentStore`.
         :param filters: Filters applied to the retrieved Documents.
         :param top_k: Maximum number of Documents to return.

@@ -57,6 +57,8 @@ def __init__(
         filter_policy: str | FilterPolicy = FilterPolicy.REPLACE,
     ) -> None:
         """
+        Initialize the PgvectorKeywordRetriever.
+
         :param document_store: An instance of `PgvectorDocumentStore`.
         :param filters: Filters applied to the retrieved Documents.
         :param top_k: Maximum number of Documents to return.

@@ -10,8 +10,9 @@
 
 def _from_haystack_to_pg_documents(documents: list[Document]) -> list[dict[str, Any]]:
     """
-    Internal method to convert a list of Haystack Documents to a list of dictionaries that can be used to insert
-    documents into the PgvectorDocumentStore.
+    Internal method to convert a list of Haystack Documents to a list of dictionaries.
+
+    The resulting dictionaries can be used to insert documents into the PgvectorDocumentStore.
     """
 
     db_documents = []