fix: fix ragas types + add py.typed (#2000)

anakin87 · web-flow · commit ebcf147b105c · 2025-06-26T14:20:22.000+02:00
diff --git a/.github/workflows/ragas.yml b/.github/workflows/ragas.yml
@@ -50,11 +50,9 @@ jobs:
       - name: Install Hatch
         run: pip install --upgrade hatch
 
-    # TODO: Once this integration is properly typed, use hatch run test:types
-    # https://github.com/deepset-ai/haystack-core-integrations/issues/1771
       - name: Lint
         if: matrix.python-version == '3.9' && runner.os == 'Linux'
-        run: hatch run fmt-check && hatch run lint:typing
+        run: hatch run fmt-check && hatch run test:types
 
       - name: Generate docs
         if: matrix.python-version == '3.9' && runner.os == 'Linux'
diff --git a/integrations/ragas/pyproject.toml b/integrations/ragas/pyproject.toml
@@ -66,19 +66,18 @@ integration = 'pytest -m "integration" {args:tests}'
 all = 'pytest {args:tests}'
 cov-retry = 'all --cov=haystack_integrations --reruns 3 --reruns-delay 30 -x'
 
-types = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}"
+types = "mypy -p haystack_integrations.components.evaluators.ragas {args}"
 
-# TODO: remove lint environment once this integration is properly typed
-# test environment should be used instead
-# https://github.com/deepset-ai/haystack-core-integrations/issues/1771
-[tool.hatch.envs.lint]
-installer = "uv"
-detached = true
-dependencies = ["pip", "mypy>=1.0.0", "ruff>=0.0.243"]
-
-[tool.hatch.envs.lint.scripts]
-typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}"
+[tool.mypy]
+install_types = true
+non_interactive = true
+check_untyped_defs = true
+disallow_incomplete_defs = true
 
+[[tool.mypy.overrides]]
+# ragas does not provide types
+module = ["ragas.*"]
+ignore_missing_imports = true
 
 [tool.ruff]
 target-version = "py38"
@@ -158,16 +157,3 @@ parallel = false
 omit = ["*/tests/*", "*/__init__.py"]
 show_missing = true
 exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"]
-
-
-[[tool.mypy.overrides]]
-module = [
-  "haystack.*",
-  "pytest.*",
-  "ragas.*",
-  "datasets.*",
-  "numpy",
-  "grpc",
-  "haystack_integrations.*",
-]
-ignore_missing_imports = true
diff --git a/integrations/ragas/src/haystack_integrations/components/evaluators/py.typed b/integrations/ragas/src/haystack_integrations/components/evaluators/py.typed
diff --git a/integrations/ragas/src/haystack_integrations/components/evaluators/ragas/evaluator.py b/integrations/ragas/src/haystack_integrations/components/evaluators/ragas/evaluator.py
@@ -1,11 +1,11 @@
 import re
-from typing import Any, Dict, List, Optional, Union, get_args, get_origin
+from typing import Any, Dict, List, Optional, Union, cast, get_args, get_origin
 
 from haystack import Document, component
 from haystack.dataclasses import ChatMessage
-from pydantic import ValidationError  # type: ignore
+from pydantic import ValidationError
 
-from ragas import evaluate  # type: ignore
+from ragas import evaluate
 from ragas.dataset_schema import (
     EvaluationDataset,
     EvaluationResult,
@@ -135,7 +135,7 @@ def run(
             )
 
         except (ValueError, ValidationError) as e:
-            raise self._handle_conversion_error(e) from None
+            self._handle_conversion_error(e)
 
         dataset = EvaluationDataset([sample])
 
@@ -147,7 +147,7 @@ def run(
                 embeddings=self.embedding,
             )
         except (ValueError, ValidationError) as e:
-            raise self._handle_evaluation_error(e) from None
+            self._handle_evaluation_error(e)
 
         return {"result": result}
 
@@ -157,22 +157,19 @@ def _process_documents(self, documents: Union[List[Union[Document, str]], None])
         :param documents: List of Documents or strings to process
         :return: List of document contents as strings or None
         """
-        if documents:
-            first_type = type(documents[0])
-            if first_type is Document:
-                if not all(isinstance(doc, Document) for doc in documents):
-                    error_message = "All elements in documents list must be of type Document."
-                    raise ValueError(error_message)
-                return [doc.content for doc in documents]  # type: ignore[union-attr]
-
-            if first_type is str:
-                if not all(isinstance(doc, str) for doc in documents):
-                    error_message = "All elements in documents list must be strings."
-                    raise ValueError(error_message)
-                return documents
-            error_message = "Unsupported type in documents list."
-            raise ValueError(error_message)
-        return documents
+        if documents is None:
+            return None
+
+        if isinstance(documents, list) and all(isinstance(doc, str) for doc in documents):
+            # we need to check types again in the list comprehension to make mypy happy
+            return [doc for doc in documents if isinstance(doc, str)]
+
+        if isinstance(documents, list) and all(isinstance(doc, Document) for doc in documents):
+            # we need to check types again in the list comprehension to make mypy happy
+            return [doc.content for doc in documents if isinstance(doc, Document) and doc.content]
+
+        error_message = "'documents' must be a list of either Documents or strings."
+        raise ValueError(error_message)
 
     def _process_response(self, response: Optional[Union[List[ChatMessage], str]]) -> Union[str, None]:
         """Process response into expected format.
@@ -181,14 +178,14 @@ def _process_response(self, response: Optional[Union[List[ChatMessage], str]]) -
         :return: None or Processed response string
         """
         if isinstance(response, list):  # Check if response is a list
-            if all(isinstance(item, ChatMessage) for item in response):
-                return response[0]._content[0].text
+            if all(isinstance(item, ChatMessage) and item.text for item in response):
+                return response[0].text
             return None
         elif isinstance(response, str):
             return response
         return response
 
-    def _handle_conversion_error(self, error: Exception):
+    def _handle_conversion_error(self, error: Exception) -> None:
         """Handle evaluation errors with improved messages.
 
         :params error: Original error
@@ -199,7 +196,9 @@ def _handle_conversion_error(self, error: Exception):
                 "retrieved_contexts": "documents",
             }
             for err in error.errors():
-                field = err["loc"][0]
+                # loc is a tuple of strings and ints but according to pydantic docs, the first element is a string
+                # https://docs.pydantic.dev/latest/errors/errors/
+                field = cast(str, err["loc"][0])
                 haystack_field = field_mapping.get(field, field)
                 expected_type = self.run.__annotations__.get(haystack_field)
                 type_desc = self._get_expected_type_description(expected_type)
@@ -213,7 +212,7 @@ def _handle_conversion_error(self, error: Exception):
                 )
                 raise ValueError(error_message)
 
-    def _handle_evaluation_error(self, error: Exception):
+    def _handle_evaluation_error(self, error: Exception) -> None:
         error_message = str(error)
         columns_match = re.search(r"additional columns \[(.*?)\]", error_message)
         field_mapping = {
@@ -233,7 +232,7 @@ def _handle_evaluation_error(self, error: Exception):
             )
             raise ValueError(updated_error_message)
 
-    def _get_expected_type_description(self, expected_type) -> str:
+    def _get_expected_type_description(self, expected_type: Any) -> str:
         """Helper method to get a description of the expected type."""
         if get_origin(expected_type) is Union:
             expected_types = [getattr(t, "__name__", str(t)) for t in get_args(expected_type)]
diff --git a/integrations/ragas/tests/test_evaluator.py b/integrations/ragas/tests/test_evaluator.py
@@ -82,7 +82,7 @@ def test_initializer_allows_optional_llm_and_embeddings():
     "invalid_input,field_name,error_message",
     [
         (["Invalid query type"], "query", "'query' field expected"),
-        ([123, ["Invalid document"]], "documents", "Unsupported type in documents list"),
+        ([123, ["Invalid document"]], "documents", "'documents' must be a list"),
         (["score_1"], "rubrics", "'rubrics' field expected"),
     ],
 )

Original file line number	Diff line number	Diff line change
`@@ -82,7 +82,7 @@ def test_initializer_allows_optional_llm_and_embeddings():`
`82`	`82`	`"invalid_input,field_name,error_message",`
`83`	`83`	`[`
`84`	`84`	`(["Invalid query type"], "query", "'query' field expected"),`
`85`		`- ([123, ["Invalid document"]], "documents", "Unsupported type in documents list"),`
	`85`	`+ ([123, ["Invalid document"]], "documents", "'documents' must be a list"),`
`86`	`86`	`(["score_1"], "rubrics", "'rubrics' field expected"),`
`87`	`87`	`],`
`88`	`88`	`)`