@@ -49,7 +49,7 @@ class DeepEvalMetric(Enum):
4949 #: Inputs - `questions: List[str], contexts: List[List[str]], responses: List[str]`
5050 CONTEXTUAL_RELEVANCE = "contextual_relevance"
5151
52- def __str__ (self ):
52+ def __str__ (self ) -> str :
5353 return self .value
5454
5555 @classmethod
@@ -87,7 +87,13 @@ class MetricResult:
8787 score : float | None = None
8888 explanation : str | None = None
8989
90- def to_dict (self ):
90+ def to_dict (self ) -> dict [str , Any ]:
91+ """
92+ Convert the metric result to a dictionary.
93+
94+ :returns:
95+ A dictionary with the metric result fields.
96+ """
9197 return dataclasses .asdict (self )
9298
9399
@@ -129,6 +135,23 @@ def new(
129135 * ,
130136 init_parameters : Mapping [str , type ] | None = None ,
131137 ) -> "MetricDescriptor" :
138+ """
139+ Create a new metric descriptor, inferring input parameters from the converter signature.
140+
141+ :param metric:
142+ The metric enum value.
143+ :param backend:
144+ The DeepEval metric class to instantiate.
145+ :param input_converter:
146+ Callable that converts Haystack inputs to DeepEval test cases.
147+ :param output_converter:
148+ Callable that converts DeepEval results to `MetricResult` objects.
149+ If ``None``, the default output converter is used.
150+ :param init_parameters:
151+ Optional mapping of parameter names to types accepted by the backend metric's constructor.
152+ :returns:
153+ A new `MetricDescriptor` instance.
154+ """
132155 input_converter_signature = inspect .signature (input_converter )
133156 input_parameters = {}
134157 for name , param in input_converter_signature .parameters .items ():
@@ -158,7 +181,7 @@ class InputConverters:
158181 """
159182
160183 @staticmethod
161- def _validate_input_elements (** kwargs ) :
184+ def _validate_input_elements (** kwargs : Any ) -> None :
162185 for k , collection in kwargs .items ():
163186 if not isinstance (collection , list ):
164187 msg = (
@@ -177,6 +200,18 @@ def _validate_input_elements(**kwargs):
177200
178201 @staticmethod
179202 def validate_input_parameters (metric : DeepEvalMetric , expected : dict [str , Any ], received : dict [str , Any ]) -> None :
203+ """
204+ Validate that all expected input parameters are present in the received inputs.
205+
206+ :param metric:
207+ The metric being evaluated, used for error messages.
208+ :param expected:
209+ Dictionary of expected parameter names to their types.
210+ :param received:
211+ Dictionary of received parameter names to their values.
212+ :raises ValueError:
213+ If a required parameter is missing from ``received``.
214+ """
180215 for param , _ in expected .items ():
181216 if param not in received :
182217 msg = f"DeepEval evaluator expected input parameter '{ param } ' for metric '{ metric } '"
@@ -186,6 +221,18 @@ def validate_input_parameters(metric: DeepEvalMetric, expected: dict[str, Any],
186221 def question_context_response (
187222 questions : list [str ], contexts : list [list [str ]], responses : list [str ]
188223 ) -> Iterable [LLMTestCase ]:
224+ """
225+ Convert question, context, and response inputs to DeepEval test cases.
226+
227+ :param questions:
228+ List of input questions.
229+ :param contexts:
230+ List of retrieval context lists, one per question.
231+ :param responses:
232+ List of model responses, one per question.
233+ :returns:
234+ An iterable of `LLMTestCase` objects.
235+ """
189236 InputConverters ._validate_input_elements (questions = questions , contexts = contexts , responses = responses )
190237 for q , c , r in zip (questions , contexts , responses , strict = True ): # type: ignore
191238 test_case = LLMTestCase (input = q , actual_output = r , retrieval_context = c )
@@ -195,6 +242,20 @@ def question_context_response(
195242 def question_context_response_ground_truth (
196243 questions : list [str ], contexts : list [list [str ]], responses : list [str ], ground_truths : list [str ]
197244 ) -> Iterable [LLMTestCase ]:
245+ """
246+ Convert question, context, response, and ground truth inputs to DeepEval test cases.
247+
248+ :param questions:
249+ List of input questions.
250+ :param contexts:
251+ List of retrieval context lists, one per question.
252+ :param responses:
253+ List of model responses, one per question.
254+ :param ground_truths:
255+ List of expected (ground truth) responses, one per question.
256+ :returns:
257+ An iterable of `LLMTestCase` objects.
258+ """
198259 InputConverters ._validate_input_elements (questions = questions , contexts = contexts , responses = responses )
199260 for q , c , r , gt in zip (questions , contexts , responses , ground_truths , strict = True ): # type: ignore
200261 test_case = LLMTestCase (input = q , actual_output = r , retrieval_context = c , expected_output = gt )
@@ -212,6 +273,15 @@ class OutputConverters:
212273 def default (
213274 metric : DeepEvalMetric ,
214275 ) -> Callable [[TestResult ], list [MetricResult ]]:
276+ """
277+ Return the default output converter for a given metric.
278+
279+ :param metric:
280+ The metric for which to create the converter.
281+ :returns:
282+ A callable that converts a `TestResult` to a list of `MetricResult` objects.
283+ """
284+
215285 def inner (output : TestResult , metric : DeepEvalMetric ) -> list [MetricResult ]:
216286 metric_name = str (metric )
217287 assert output .metrics_data
0 commit comments