Skip to content

Commit 4a5b3ee

Browse files
julian-rischclaude
andauthored
chore: enable ANN ruff ruleset and add missing docstrings for deepeval integration (#2985)
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent f812e94 commit 4a5b3ee

3 files changed

Lines changed: 78 additions & 6 deletions

File tree

integrations/deepeval/pyproject.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ line-length = 120
8282
[tool.ruff.lint]
8383
select = [
8484
"A",
85+
"ANN",
8586
"ARG",
8687
"B",
8788
"C",
@@ -110,6 +111,7 @@ select = [
110111
ignore = [
111112
# Allow non-abstract empty methods in abstract base classes
112113
"B027",
114+
"ANN401", # Allow Any - used legitimately for dynamic types and SDK boundaries
113115
# Allow boolean positional values in function calls, like `dict.get(... True)`
114116
"FBT003",
115117
# Ignore checks for possible passwords
@@ -139,7 +141,7 @@ ban-relative-imports = "all"
139141

140142
[tool.ruff.lint.per-file-ignores]
141143
# Tests can use magic values, assertions, and relative imports
142-
"tests/**/*" = ["PLR2004", "S101", "TID252"]
144+
"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"]
143145

144146
[tool.coverage.run]
145147
source = ["haystack_integrations"]

integrations/deepeval/src/haystack_integrations/components/evaluators/deepeval/evaluator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def __init__(
5454
self,
5555
metric: str | DeepEvalMetric,
5656
metric_params: dict[str, Any] | None = None,
57-
):
57+
) -> None:
5858
"""
5959
Construct a new DeepEval evaluator.
6060
@@ -144,7 +144,7 @@ def from_dict(cls, data: dict[str, Any]) -> "DeepEvalEvaluator":
144144
def _invoke_deepeval(test_cases: list[LLMTestCase], metric: BaseMetric) -> EvaluationResult:
145145
return evaluate(test_cases=test_cases, metrics=[metric])
146146

147-
def _init_backend(self):
147+
def _init_backend(self) -> None:
148148
"""
149149
Initialize the DeepEval backend.
150150
"""

integrations/deepeval/src/haystack_integrations/components/evaluators/deepeval/metrics.py

Lines changed: 73 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ class DeepEvalMetric(Enum):
4949
#: Inputs - `questions: List[str], contexts: List[List[str]], responses: List[str]`
5050
CONTEXTUAL_RELEVANCE = "contextual_relevance"
5151

52-
def __str__(self):
52+
def __str__(self) -> str:
5353
return self.value
5454

5555
@classmethod
@@ -87,7 +87,13 @@ class MetricResult:
8787
score: float | None = None
8888
explanation: str | None = None
8989

90-
def to_dict(self):
90+
def to_dict(self) -> dict[str, Any]:
91+
"""
92+
Convert the metric result to a dictionary.
93+
94+
:returns:
95+
A dictionary with the metric result fields.
96+
"""
9197
return dataclasses.asdict(self)
9298

9399

@@ -129,6 +135,23 @@ def new(
129135
*,
130136
init_parameters: Mapping[str, type] | None = None,
131137
) -> "MetricDescriptor":
138+
"""
139+
Create a new metric descriptor, inferring input parameters from the converter signature.
140+
141+
:param metric:
142+
The metric enum value.
143+
:param backend:
144+
The DeepEval metric class to instantiate.
145+
:param input_converter:
146+
Callable that converts Haystack inputs to DeepEval test cases.
147+
:param output_converter:
148+
Callable that converts DeepEval results to `MetricResult` objects.
149+
If ``None``, the default output converter is used.
150+
:param init_parameters:
151+
Optional mapping of parameter names to types accepted by the backend metric's constructor.
152+
:returns:
153+
A new `MetricDescriptor` instance.
154+
"""
132155
input_converter_signature = inspect.signature(input_converter)
133156
input_parameters = {}
134157
for name, param in input_converter_signature.parameters.items():
@@ -158,7 +181,7 @@ class InputConverters:
158181
"""
159182

160183
@staticmethod
161-
def _validate_input_elements(**kwargs):
184+
def _validate_input_elements(**kwargs: Any) -> None:
162185
for k, collection in kwargs.items():
163186
if not isinstance(collection, list):
164187
msg = (
@@ -177,6 +200,18 @@ def _validate_input_elements(**kwargs):
177200

178201
@staticmethod
179202
def validate_input_parameters(metric: DeepEvalMetric, expected: dict[str, Any], received: dict[str, Any]) -> None:
203+
"""
204+
Validate that all expected input parameters are present in the received inputs.
205+
206+
:param metric:
207+
The metric being evaluated, used for error messages.
208+
:param expected:
209+
Dictionary of expected parameter names to their types.
210+
:param received:
211+
Dictionary of received parameter names to their values.
212+
:raises ValueError:
213+
If a required parameter is missing from ``received``.
214+
"""
180215
for param, _ in expected.items():
181216
if param not in received:
182217
msg = f"DeepEval evaluator expected input parameter '{param}' for metric '{metric}'"
@@ -186,6 +221,18 @@ def validate_input_parameters(metric: DeepEvalMetric, expected: dict[str, Any],
186221
def question_context_response(
187222
questions: list[str], contexts: list[list[str]], responses: list[str]
188223
) -> Iterable[LLMTestCase]:
224+
"""
225+
Convert question, context, and response inputs to DeepEval test cases.
226+
227+
:param questions:
228+
List of input questions.
229+
:param contexts:
230+
List of retrieval context lists, one per question.
231+
:param responses:
232+
List of model responses, one per question.
233+
:returns:
234+
An iterable of `LLMTestCase` objects.
235+
"""
189236
InputConverters._validate_input_elements(questions=questions, contexts=contexts, responses=responses)
190237
for q, c, r in zip(questions, contexts, responses, strict=True): # type: ignore
191238
test_case = LLMTestCase(input=q, actual_output=r, retrieval_context=c)
@@ -195,6 +242,20 @@ def question_context_response(
195242
def question_context_response_ground_truth(
196243
questions: list[str], contexts: list[list[str]], responses: list[str], ground_truths: list[str]
197244
) -> Iterable[LLMTestCase]:
245+
"""
246+
Convert question, context, response, and ground truth inputs to DeepEval test cases.
247+
248+
:param questions:
249+
List of input questions.
250+
:param contexts:
251+
List of retrieval context lists, one per question.
252+
:param responses:
253+
List of model responses, one per question.
254+
:param ground_truths:
255+
List of expected (ground truth) responses, one per question.
256+
:returns:
257+
An iterable of `LLMTestCase` objects.
258+
"""
198259
InputConverters._validate_input_elements(questions=questions, contexts=contexts, responses=responses)
199260
for q, c, r, gt in zip(questions, contexts, responses, ground_truths, strict=True): # type: ignore
200261
test_case = LLMTestCase(input=q, actual_output=r, retrieval_context=c, expected_output=gt)
@@ -212,6 +273,15 @@ class OutputConverters:
212273
def default(
213274
metric: DeepEvalMetric,
214275
) -> Callable[[TestResult], list[MetricResult]]:
276+
"""
277+
Return the default output converter for a given metric.
278+
279+
:param metric:
280+
The metric for which to create the converter.
281+
:returns:
282+
A callable that converts a `TestResult` to a list of `MetricResult` objects.
283+
"""
284+
215285
def inner(output: TestResult, metric: DeepEvalMetric) -> list[MetricResult]:
216286
metric_name = str(metric)
217287
assert output.metrics_data

0 commit comments

Comments
 (0)