Skip to content

Commit c2c3911

Browse files
authored
Merge branch 'main' into feat/add-metadata-parameter
2 parents b633bab + 81eaeb5 commit c2c3911

File tree

15 files changed

+312
-66
lines changed

15 files changed

+312
-66
lines changed

.github/workflows/check-file-contents.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ jobs:
9696
echo ""
9797
9898
set +e
99-
FILES_WITH_FORBIDDEN_IMPORT=$(grep -lE '^from.*cli.*import.*$' $CHANGED_FILES)
99+
FILES_WITH_FORBIDDEN_IMPORT=$(grep -lE '^from.*\bcli\b.*import.*$' $CHANGED_FILES)
100100
GREP_EXIT_CODE=$?
101101
set -e
102102

contributing/samples/toolbox_agent/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,10 @@ Install SQLite from [https://sqlite.org/](https://sqlite.org/)
2626

2727
### 3. Install Required Python Dependencies
2828

29-
**Important**: The ADK's `ToolboxToolset` class requires the `toolbox-core` package, which is not automatically installed with the ADK. Install it using:
29+
**Important**: The ADK's `ToolboxToolset` class requires the `toolbox-adk` package, which is not automatically installed with the ADK. Install it using:
3030

3131
```bash
32-
pip install toolbox-core
32+
pip install google-adk[toolbox]
3333
```
3434

3535
### 4. Create Database (Optional)

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,11 +157,12 @@ extensions = [
157157
"llama-index-readers-file>=0.4.0", # For retrieval using LlamaIndex.
158158
"llama-index-embeddings-google-genai>=0.3.0", # For files retrieval using LlamaIndex.
159159
"lxml>=5.3.0", # For load_web_page tool.
160-
"toolbox-adk>=0.1.0", # For tools.toolbox_toolset.ToolboxToolset
160+
"toolbox-adk>=0.5.7, <0.6.0", # For tools.toolbox_toolset.ToolboxToolset
161161
]
162162

163163
otel-gcp = ["opentelemetry-instrumentation-google-genai>=0.3b0, <1.0.0"]
164164

165+
toolbox = ["toolbox-adk>=0.5.7, <0.6.0"]
165166

166167
[tool.pyink]
167168
# Format py files following Google style-guide

src/google/adk/agents/remote_a2a_agent.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -443,7 +443,8 @@ async def _handle_a2a_response(
443443
and event.content is not None
444444
and event.content.parts
445445
):
446-
event.content.parts[0].thought = True
446+
for part in event.content.parts:
447+
part.thought = True
447448
elif (
448449
isinstance(update, A2ATaskStatusUpdateEvent)
449450
and update.status
@@ -503,8 +504,6 @@ async def _handle_a2a_response(
503504
invocation_id=ctx.invocation_id,
504505
branch=ctx.branch,
505506
)
506-
event.custom_metadata = event.custom_metadata or {}
507-
event.custom_metadata[A2A_METADATA_PREFIX + "response"] = True
508507
return event
509508
except A2AClientError as e:
510509
logger.error("Failed to handle A2A response: %s", e)

src/google/adk/cli/adk_web_server.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,7 @@ class AppInfo(common.BaseModel):
330330
root_agent_name: str
331331
description: str
332332
language: Literal["yaml", "python"]
333+
is_computer_use: bool = False
333334

334335

335336
class ListAppsResponse(common.BaseModel):

src/google/adk/cli/cli_eval.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@
3434
from ..evaluation.eval_case import get_all_tool_calls
3535
from ..evaluation.eval_case import IntermediateDataType
3636
from ..evaluation.eval_metrics import EvalMetric
37+
from ..evaluation.eval_metrics import Interval
38+
from ..evaluation.eval_metrics import MetricInfo
39+
from ..evaluation.eval_metrics import MetricValueInfo
3740
from ..evaluation.eval_result import EvalCaseResult
3841
from ..evaluation.eval_sets_manager import EvalSetsManager
3942
from ..utils.context_utils import Aclosing
@@ -70,6 +73,19 @@ def _get_agent_module(agent_module_file_path: str):
7073
return _import_from_path(module_name, file_path)
7174

7275

76+
def get_default_metric_info(
77+
metric_name: str, description: str = ""
78+
) -> MetricInfo:
79+
"""Returns a default MetricInfo for a metric."""
80+
return MetricInfo(
81+
metric_name=metric_name,
82+
description=description,
83+
metric_value_info=MetricValueInfo(
84+
interval=Interval(min_value=0.0, max_value=1.0)
85+
),
86+
)
87+
88+
7389
def get_root_agent(agent_module_file_path: str) -> Agent:
7490
"""Returns root agent given the agent module."""
7591
agent_module = _get_agent_module(agent_module_file_path)

src/google/adk/cli/cli_tools_click.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -712,8 +712,11 @@ def cli_eval(
712712
logs.setup_adk_logger(getattr(logging, log_level.upper()))
713713

714714
try:
715+
import importlib
716+
715717
from ..evaluation.base_eval_service import InferenceConfig
716718
from ..evaluation.base_eval_service import InferenceRequest
719+
from ..evaluation.custom_metric_evaluator import _CustomMetricEvaluator
717720
from ..evaluation.eval_config import get_eval_metrics_from_config
718721
from ..evaluation.eval_config import get_evaluation_criteria_or_default
719722
from ..evaluation.eval_result import EvalCaseResult
@@ -723,9 +726,11 @@ def cli_eval(
723726
from ..evaluation.local_eval_set_results_manager import LocalEvalSetResultsManager
724727
from ..evaluation.local_eval_sets_manager import load_eval_set_from_file
725728
from ..evaluation.local_eval_sets_manager import LocalEvalSetsManager
729+
from ..evaluation.metric_evaluator_registry import DEFAULT_METRIC_EVALUATOR_REGISTRY
726730
from ..evaluation.simulation.user_simulator_provider import UserSimulatorProvider
727731
from .cli_eval import _collect_eval_results
728732
from .cli_eval import _collect_inferences
733+
from .cli_eval import get_default_metric_info
729734
from .cli_eval import get_root_agent
730735
from .cli_eval import parse_and_get_evals_to_run
731736
from .cli_eval import pretty_print_eval_result
@@ -818,11 +823,30 @@ def cli_eval(
818823
)
819824

820825
try:
826+
metric_evaluator_registry = DEFAULT_METRIC_EVALUATOR_REGISTRY
827+
if eval_config.custom_metrics:
828+
for (
829+
metric_name,
830+
config,
831+
) in eval_config.custom_metrics.items():
832+
if config.metric_info:
833+
metric_info = config.metric_info.model_copy()
834+
metric_info.metric_name = metric_name
835+
else:
836+
metric_info = get_default_metric_info(
837+
metric_name=metric_name, description=config.description
838+
)
839+
840+
metric_evaluator_registry.register_evaluator(
841+
metric_info, _CustomMetricEvaluator
842+
)
843+
821844
eval_service = LocalEvalService(
822845
root_agent=root_agent,
823846
eval_sets_manager=eval_sets_manager,
824847
eval_set_results_manager=eval_set_results_manager,
825848
user_simulator_provider=user_simulator_provider,
849+
metric_evaluator_registry=metric_evaluator_registry,
826850
)
827851

828852
inference_results = asyncio.run(

src/google/adk/cli/utils/agent_loader.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from ...agents import config_agent_utils
3333
from ...agents.base_agent import BaseAgent
3434
from ...apps.app import App
35+
from ...tools.computer_use.computer_use_toolset import ComputerUseToolset
3536
from ...utils.feature_decorator import experimental
3637
from .base_agent_loader import BaseAgentLoader
3738

@@ -358,12 +359,17 @@ def list_agents_detailed(self) -> list[dict[str, Any]]:
358359
agent = loaded
359360

360361
language = self._determine_agent_language(agent_name)
362+
is_computer_use = any(
363+
isinstance(t, ComputerUseToolset)
364+
for t in getattr(agent, "tools", [])
365+
)
361366

362367
app_info = {
363368
"name": agent_name,
364369
"root_agent_name": agent.name,
365370
"description": agent.description,
366371
"language": language,
372+
"is_computer_use": is_computer_use,
367373
}
368374
apps_info.append(app_info)
369375

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import importlib
18+
import inspect
19+
from typing import Callable
20+
from typing import Optional
21+
22+
from typing_extensions import override
23+
24+
from .eval_case import ConversationScenario
25+
from .eval_case import Invocation
26+
from .eval_metrics import EvalMetric
27+
from .eval_metrics import EvalStatus
28+
from .evaluator import EvaluationResult
29+
from .evaluator import Evaluator
30+
31+
32+
def _get_metric_function(
33+
custom_function_path: str,
34+
) -> Callable[..., EvaluationResult]:
35+
"""Returns the custom metric function from the given path."""
36+
try:
37+
module_name, function_name = custom_function_path.rsplit(".", 1)
38+
module = importlib.import_module(module_name)
39+
metric_function = getattr(module, function_name)
40+
return metric_function
41+
except (ImportError, AttributeError, ValueError) as e:
42+
raise ImportError(
43+
f"Could not import custom metric function from {custom_function_path}"
44+
) from e
45+
46+
47+
def _get_eval_status(score: Optional[float], threshold: float) -> EvalStatus:
48+
if score is None:
49+
return EvalStatus.NOT_EVALUATED
50+
return EvalStatus.PASSED if score >= threshold else EvalStatus.FAILED
51+
52+
53+
class _CustomMetricEvaluator(Evaluator):
54+
"""Evaluator for custom metrics."""
55+
56+
def __init__(self, eval_metric: EvalMetric, custom_function_path: str):
57+
self._eval_metric = eval_metric
58+
self._metric_function = _get_metric_function(custom_function_path)
59+
60+
@override
61+
async def evaluate_invocations(
62+
self,
63+
actual_invocations: list[Invocation],
64+
expected_invocations: Optional[list[Invocation]],
65+
conversation_scenario: Optional[ConversationScenario] = None,
66+
) -> EvaluationResult:
67+
if inspect.iscoroutinefunction(self._metric_function):
68+
eval_result = await self._metric_function(
69+
actual_invocations, expected_invocations, conversation_scenario
70+
)
71+
else:
72+
eval_result = self._metric_function(
73+
actual_invocations, expected_invocations, conversation_scenario
74+
)
75+
76+
eval_result.overall_eval_status = _get_eval_status(
77+
eval_result.overall_score, self._eval_metric.threshold
78+
)
79+
return eval_result

src/google/adk/evaluation/eval_config.py

Lines changed: 64 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,46 @@
2828
from ..agents.common_configs import CodeConfig
2929
from ..evaluation.eval_metrics import EvalMetric
3030
from .eval_metrics import BaseCriterion
31+
from .eval_metrics import MetricInfo
3132
from .eval_metrics import Threshold
3233
from .simulation.user_simulator import BaseUserSimulatorConfig
3334

3435
logger = logging.getLogger("google_adk." + __name__)
3536

3637

38+
class CustomMetricConfig(BaseModel):
39+
"""Configuration for a custom metric."""
40+
41+
model_config = ConfigDict(
42+
alias_generator=alias_generators.to_camel,
43+
populate_by_name=True,
44+
)
45+
46+
code_config: CodeConfig = Field(
47+
description=(
48+
"Code config for the custom metric, used to locate the custom metric"
49+
" function."
50+
)
51+
)
52+
metric_info: Optional[MetricInfo] = Field(
53+
default=None,
54+
description="Metric info for the custom metric.",
55+
)
56+
description: str = Field(
57+
default="",
58+
description="Description for the custom metric info.",
59+
)
60+
61+
@model_validator(mode="after")
62+
def check_code_config_args(self) -> "CustomMetricConfig":
63+
"""Checks that the code config does not have args."""
64+
if self.code_config.args:
65+
raise ValueError(
66+
"args field in CodeConfig for custom metric is not supported."
67+
)
68+
return self
69+
70+
3771
class EvalConfig(BaseModel):
3872
"""Configurations needed to run an Eval.
3973
@@ -74,24 +108,43 @@ class EvalConfig(BaseModel):
74108
""",
75109
)
76110

77-
custom_metrics: Optional[dict[str, CodeConfig]] = Field(
111+
custom_metrics: Optional[dict[str, CustomMetricConfig]] = Field(
78112
default=None,
79-
description="""A dictionary mapping custom metric names to CodeConfig
80-
objects, which specify the path to the function for each custom metric.
113+
description="""A dictionary mapping custom metric names to
114+
a CustomMetricConfig object.
81115
82116
If a metric name in `criteria` is also present in `custom_metrics`, the
83-
corresponding `CodeConfig`'s `name` field will be used to locate the custom
84-
metric implementation. The `name` field should contain the fully qualified
85-
path to the custom metric function, e.g., `my.custom.metrics.metric_function`.
117+
`code_config` in `CustomMetricConfig` will be used to locate the custom metric
118+
implementation.
119+
120+
The `metric` field in `CustomMetricConfig` can be used to provide metric
121+
information like `min_value`, `max_value`, and `description`. If `metric`
122+
is not provided, a default `MetricInfo` will be created, using
123+
`description` from `CustomMetricConfig` if provided, and default values
124+
for `min_value` (0.0) and `max_value` (1.0).
86125
87126
Example:
88127
{
89128
"criteria": {
90-
"my_custom_metric": 0.5
129+
"my_custom_metric": 0.5,
130+
"my_simple_metric": 0.8
91131
},
92132
"custom_metrics": {
133+
"my_simple_metric": {
134+
"code_config": {
135+
"name": "path.to.my.simple.metric.function"
136+
}
137+
},
93138
"my_custom_metric": {
94-
"name": "path.to.my.custom.metric.function"
139+
"code_config": {
140+
"name": "path.to.my.custom.metric.function"
141+
},
142+
"metric": {
143+
"metric_name": "my_custom_metric",
144+
"min_value": -10.0,
145+
"max_value": 10.0,
146+
"description": "My custom metric."
147+
}
95148
}
96149
}
97150
}
@@ -103,17 +156,6 @@ class EvalConfig(BaseModel):
103156
description="Config to be used by the user simulator.",
104157
)
105158

106-
@model_validator(mode="after")
107-
def check_custom_metrics_code_config_args(self) -> "EvalConfig":
108-
if self.custom_metrics:
109-
for metric_name, metric_config in self.custom_metrics.items():
110-
if metric_config.args:
111-
raise ValueError(
112-
f"args field in CodeConfig for custom metric '{metric_name}' is"
113-
" not supported."
114-
)
115-
return self
116-
117159

118160
_DEFAULT_EVAL_CONFIG = EvalConfig(
119161
criteria={"tool_trajectory_avg_score": 1.0, "response_match_score": 0.8}
@@ -144,11 +186,10 @@ def get_eval_metrics_from_config(eval_config: EvalConfig) -> list[EvalMetric]:
144186
if eval_config.criteria:
145187
for metric_name, criterion in eval_config.criteria.items():
146188
custom_function_path = None
147-
if (
148-
eval_config.custom_metrics
149-
and metric_name in eval_config.custom_metrics
189+
if eval_config.custom_metrics and (
190+
config := eval_config.custom_metrics.get(metric_name)
150191
):
151-
custom_function_path = eval_config.custom_metrics[metric_name].name
192+
custom_function_path = config.code_config.name
152193

153194
if isinstance(criterion, float):
154195
eval_metric_list.append(

0 commit comments

Comments
 (0)