Skip to content

Commit 05cdaee

Browse files
authored
Merge branch 'main' into feat/gcs-file-data-support
2 parents 59df355 + 684a6e7 commit 05cdaee

3 files changed

Lines changed: 49 additions & 3 deletions

File tree

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ dependencies = [
6060
"opentelemetry-exporter-gcp-logging>=1.9.0a0,<2",
6161
"opentelemetry-exporter-gcp-monitoring>=1.9.0a0,<2",
6262
"opentelemetry-exporter-gcp-trace>=1.9,<2",
63-
"opentelemetry-exporter-otlp-proto-http>=1.36.0",
63+
"opentelemetry-exporter-otlp-proto-http>=1.36",
6464
"opentelemetry-resourcedetector-gcp>=1.9.0a0,<2",
6565
"opentelemetry-sdk>=1.36,<1.39",
6666
"pyarrow>=14",
@@ -102,8 +102,8 @@ optional-dependencies.docs = [
102102
"myst-parser",
103103
"sphinx<9",
104104
"sphinx-autodoc-typehints",
105-
"sphinx-rtd-theme",
106105
"sphinx-click",
106+
"sphinx-rtd-theme",
107107
]
108108
optional-dependencies.eval = [
109109
"gepa>=0.1",

src/google/adk/optimization/local_eval_sampler.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,11 @@ def _extract_eval_data(
289289
for eval_metric_result in per_invocation_result.eval_metric_results:
290290
eval_metric_results.append({
291291
"metric_name": eval_metric_result.metric_name,
292-
"score": round(eval_metric_result.score, 2), # accurate enough
292+
"score": (
293+
round(eval_metric_result.score, 2)
294+
if eval_metric_result.score is not None
295+
else None
296+
), # accurate enough
293297
"eval_status": eval_metric_result.eval_status.name,
294298
})
295299
per_invocation_result_dict = {

tests/unittests/optimization/local_eval_sampler_test.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,48 @@ async def test_extract_eval_data(mocker):
338338
]
339339

340340

341+
def test_extract_eval_data_preserves_none_metric_score(mocker):
342+
mock_eval_sets_manager = mocker.MagicMock(spec=EvalSetsManager)
343+
mock_eval_case = mocker.MagicMock()
344+
mock_eval_case.conversation_scenario = "test_scenario"
345+
mock_eval_sets_manager.get_eval_case.return_value = mock_eval_case
346+
347+
mock_metric_result = mocker.MagicMock(spec=EvalMetricResult)
348+
mock_metric_result.metric_name = "test_metric"
349+
mock_metric_result.score = None
350+
mock_metric_result.eval_status = EvalStatus.NOT_EVALUATED
351+
352+
mock_per_inv_result = mocker.MagicMock(spec=EvalMetricResultPerInvocation)
353+
mock_per_inv_result.actual_invocation = mocker.MagicMock(spec=Invocation)
354+
mock_per_inv_result.expected_invocation = mocker.MagicMock(spec=Invocation)
355+
mock_per_inv_result.eval_metric_results = [mock_metric_result]
356+
357+
mock_eval_result = mocker.MagicMock(spec=EvalCaseResult)
358+
mock_eval_result.eval_id = "t1"
359+
mock_eval_result.eval_metric_result_per_invocation = [mock_per_inv_result]
360+
361+
mocker.patch(
362+
"google.adk.optimization.local_eval_sampler.extract_single_invocation_info",
363+
side_effect=[{"info": "actual"}, {"info": "expected"}],
364+
)
365+
366+
config = LocalEvalSamplerConfig(
367+
eval_config=EvalConfig(),
368+
app_name="test_app",
369+
train_eval_set="train_set",
370+
train_eval_case_ids=["t1"],
371+
)
372+
interface = LocalEvalSampler(config, mock_eval_sets_manager)
373+
374+
eval_data = interface._extract_eval_data("train_set", [mock_eval_result])
375+
376+
assert eval_data["t1"]["invocations"][0]["eval_metric_results"] == [{
377+
"metric_name": "test_metric",
378+
"score": None,
379+
"eval_status": "NOT_EVALUATED",
380+
}]
381+
382+
341383
@pytest.mark.asyncio
342384
async def test_sample_and_score(mocker):
343385
# Mock results

0 commit comments

Comments
 (0)