Skip to content

Commit 4311a5c

Browse files
committed
set the eval metadata status as well
1 parent 1dafdd1 commit 4311a5c

1 file changed

Lines changed: 10 additions & 2 deletions

File tree

eval_protocol/pytest/evaluation_test.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,7 @@ def _log_eval_error(status: Status, rows: list[EvaluationRow] | None, passed: bo
371371
row.input_metadata.session_data = {}
372372
row.input_metadata.session_data["mode"] = mode
373373
# Initialize eval_metadata for each row
374-
row.eval_metadata = eval_metadata
374+
row.eval_metadata = eval_metadata.model_copy(deep=True)
375375
row.execution_metadata.experiment_id = experiment_id
376376
row.execution_metadata.invocation_id = invocation_id
377377

@@ -443,6 +443,10 @@ async def _execute_pointwise_eval_with_semaphore(
443443
is_score_valid=False,
444444
reason=f"Error during evaluation: {type(e).__name__}: {e}",
445445
)
446+
if result.eval_metadata is not None:
447+
result.eval_metadata.status = Status.error(
448+
f"Error during evaluation: {type(e).__name__}: {e}",
449+
)
446450
if not isinstance(result, EvaluationRow):
447451
raise ValueError(
448452
f"Test function {test_func.__name__} did not return an EvaluationRow instance. You must return an EvaluationRow instance from your test function decorated with @evaluation_test."
@@ -477,7 +481,11 @@ async def _execute_groupwise_eval_with_semaphore(
477481
score=0.0,
478482
is_score_valid=False,
479483
reason=f"Error during evaluation: {type(e).__name__}: {e}",
480-
)
484+
)
485+
if row.eval_metadata is not None:
486+
row.eval_metadata.status = Status.error(
487+
f"Error during evaluation: {type(e).__name__}: {e}",
488+
)
481489
if not isinstance(results, list):
482490
raise ValueError(
483491
f"Test function {test_func.__name__} did not return a list of EvaluationRow instances. You must return a list of EvaluationRow instances from your test function decorated with @evaluation_test."

0 commit comments

Comments
 (0)