Skip to content

Commit e9215a2

Browse files
committed
fix: align eval user_id default and warn on subdirectory evalset files
Fixes two server-side issues from #5423: 1. Align default user_id fallback from 'test_user_id' to 'user' to match the adk-web frontend default, preventing session 404 errors when eval sets don't specify a user_id. 2. Log a warning when .evalset.json files are found in subdirectories of the agent directory. These files are not discovered by the UI and should be moved to the root agent directory. This addresses the 'fragile and undocumented' evalset discovery reported in the issue. Note on port change (Issue 3): adk web now serves on port 8000 instead of the previous 8501. This is not a bug but should be called out in migration/upgrade notes.
1 parent 7ae83b2 commit e9215a2

4 files changed

Lines changed: 62 additions & 24 deletions

File tree

src/google/adk/evaluation/evaluation_generator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ async def _generate_inferences_from_root_agent(
209209
app_name = (
210210
initial_session.app_name if initial_session else "EvaluationGenerator"
211211
)
212-
user_id = initial_session.user_id if initial_session else "test_user_id"
212+
user_id = initial_session.user_id if initial_session else "user"
213213
session_id = session_id if session_id else str(uuid.uuid4())
214214

215215
_ = await session_service.create_session(

src/google/adk/evaluation/local_eval_service.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ async def _evaluate_single_inference_result(
265265
user_id = (
266266
eval_case.session_input.user_id
267267
if eval_case.session_input and eval_case.session_input.user_id
268-
else 'test_user_id'
268+
else 'user'
269269
)
270270

271271
if eval_case.conversation_scenario is None and len(

src/google/adk/evaluation/local_eval_sets_manager.py

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -247,19 +247,37 @@ def list_eval_sets(self, app_name: str) -> list[str]:
247247
Raises:
248248
NotFoundError: If the eval directory for the app is not found.
249249
"""
250-
eval_set_file_path = os.path.join(self._agents_dir, app_name)
250+
app_dir = os.path.join(self._agents_dir, app_name)
251+
if not os.path.isdir(app_dir):
252+
raise NotFoundError(
253+
f"Eval directory for app `{app_name}` not found."
254+
)
251255
eval_sets = []
252-
try:
253-
for file in os.listdir(eval_set_file_path):
256+
for file in os.listdir(app_dir):
257+
if file.endswith(_EVAL_SET_FILE_EXTENSION):
258+
eval_sets.append(
259+
file.removesuffix(_EVAL_SET_FILE_EXTENSION)
260+
)
261+
262+
# Warn about .evalset.json files in subdirectories that won't be
263+
# discovered. This helps users who organize eval sets into folders
264+
# understand why they don't appear in the UI.
265+
for dirpath, _, filenames in os.walk(app_dir):
266+
if dirpath == app_dir:
267+
continue
268+
for file in filenames:
254269
if file.endswith(_EVAL_SET_FILE_EXTENSION):
255-
eval_sets.append(
256-
os.path.basename(file).removesuffix(_EVAL_SET_FILE_EXTENSION)
270+
rel_path = os.path.relpath(
271+
os.path.join(dirpath, file), app_dir
257272
)
258-
return sorted(eval_sets)
259-
except FileNotFoundError as e:
260-
raise NotFoundError(
261-
f"Eval directory for app `{app_name}` not found."
262-
) from e
273+
logger.warning(
274+
"Eval set file '%s' found in subdirectory and will be"
275+
" ignored. Move it to '%s' to make it discoverable.",
276+
rel_path,
277+
app_dir,
278+
)
279+
280+
return sorted(eval_sets)
263281

264282
@override
265283
def get_eval_case(

tests/unittests/evaluation/test_local_eval_sets_manager.py

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from __future__ import annotations
1616

1717
import json
18+
import logging
1819
import os
1920
import uuid
2021

@@ -409,27 +410,46 @@ def test_local_eval_sets_manager_create_eval_set_already_exists(
409410
local_eval_sets_manager.create_eval_set(app_name, eval_set_id)
410411

411412
def test_local_eval_sets_manager_list_eval_sets_success(
412-
self, local_eval_sets_manager, mocker
413+
self, tmp_path
413414
):
414415
app_name = "test_app"
415-
mock_listdir_return = [
416-
"eval_set_1.evalset.json",
417-
"eval_set_2.evalset.json",
418-
"not_an_eval_set.txt",
419-
]
420-
mocker.patch("os.listdir", return_value=mock_listdir_return)
421-
mocker.patch("os.path.join", return_value="dummy_path")
422-
mocker.patch("os.path.basename", side_effect=lambda x: x)
416+
app_dir = tmp_path / app_name
417+
app_dir.mkdir()
418+
(app_dir / "eval_set_1.evalset.json").write_text("{}")
419+
(app_dir / "eval_set_2.evalset.json").write_text("{}")
420+
(app_dir / "not_an_eval_set.txt").write_text("")
423421

422+
local_eval_sets_manager = LocalEvalSetsManager(agents_dir=str(tmp_path))
424423
eval_sets = local_eval_sets_manager.list_eval_sets(app_name)
425424

426425
assert eval_sets == ["eval_set_1", "eval_set_2"]
427426

428-
def test_local_eval_sets_manager_list_eval_sets_not_found(
429-
self, local_eval_sets_manager, mocker
427+
def test_local_eval_sets_manager_list_eval_sets_subdirectories(
428+
self, tmp_path, caplog
430429
):
430+
"""Eval sets in subdirectories should be ignored with a warning."""
431431
app_name = "test_app"
432-
mocker.patch("os.listdir", side_effect=FileNotFoundError)
432+
app_dir = tmp_path / app_name
433+
app_dir.mkdir()
434+
(app_dir / "top_level.evalset.json").write_text("{}")
435+
sub_dir = app_dir / "eval_sets"
436+
sub_dir.mkdir()
437+
(sub_dir / "nested.evalset.json").write_text("{}")
438+
439+
local_eval_sets_manager = LocalEvalSetsManager(agents_dir=str(tmp_path))
440+
with caplog.at_level(logging.WARNING):
441+
eval_sets = local_eval_sets_manager.list_eval_sets(app_name)
442+
443+
# Only root-level eval sets are returned.
444+
assert eval_sets == ["top_level"]
445+
# A warning is logged for the subdirectory file.
446+
assert "nested.evalset.json" in caplog.text
447+
assert "ignored" in caplog.text
448+
449+
def test_local_eval_sets_manager_list_eval_sets_not_found(
450+
self, local_eval_sets_manager
451+
):
452+
app_name = "nonexistent_app"
433453

434454
with pytest.raises(NotFoundError):
435455
local_eval_sets_manager.list_eval_sets(app_name)

0 commit comments

Comments
 (0)