Skip to content

Commit 805c925

Browse files
committed
fix(module): add EvaluationModuleError to public API and wrap _compute exceptions
Adds EvaluationModuleError exception class to evaluate/module.py and exports it from evaluate/__init__.py so callers can catch evaluate-specific failures without catching broad Exception or importing internal sklearn/numpy types. Wraps the _compute() call in EvaluationModule.compute() so that raw ValueError/KeyError/etc. from metric backends surface as EvaluationModuleError instead of leaking implementation details. Closes #758
1 parent a7dd338 commit 805c925

2 files changed

Lines changed: 24 additions & 2 deletions

File tree

src/evaluate/__init__.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,15 @@
4545
from .info import ComparisonInfo, EvaluationModuleInfo, MeasurementInfo, MetricInfo
4646
from .inspect import inspect_evaluation_module, list_evaluation_modules
4747
from .loading import load
48-
from .module import CombinedEvaluations, Comparison, EvaluationModule, Measurement, Metric, combine
48+
from .module import (
49+
CombinedEvaluations,
50+
Comparison,
51+
EvaluationModule,
52+
EvaluationModuleError,
53+
Measurement,
54+
Metric,
55+
combine,
56+
)
4957
from .saving import save
5058
from .utils import *
5159
from .utils import gradio, logging

src/evaluate/module.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,15 @@
4141
logger = get_logger(__name__)
4242

4343

44+
class EvaluationModuleError(Exception):
45+
"""Raised when an evaluation module's ``_compute`` method fails.
46+
47+
Catching this exception lets callers distinguish evaluate-specific
48+
failures from unrelated ``Exception`` subclasses without importing
49+
internal sklearn or numpy error types.
50+
"""
51+
52+
4453
class FileFreeLock(BaseFileLock):
4554
"""Thread lock until a file **cannot** be locked"""
4655

@@ -464,7 +473,12 @@ def compute(self, *, predictions=None, references=None, **kwargs) -> Optional[di
464473

465474
inputs = {input_name: self.data[input_name][:] for input_name in self._feature_names()}
466475
with temp_seed(self.seed):
467-
output = self._compute(**inputs, **compute_kwargs)
476+
try:
477+
output = self._compute(**inputs, **compute_kwargs)
478+
except EvaluationModuleError:
479+
raise
480+
except Exception as e:
481+
raise EvaluationModuleError(f"Metric '{self.name}' raised {type(e).__name__}: {e}") from e
468482

469483
if self.buf_writer is not None:
470484
self.buf_writer = None

0 commit comments

Comments
 (0)