Skip to content

Commit 6ac28a5

Browse files
jsondaicopybara-github
authored andcommitted
feat: GenAI Client(evals) - update SDK type definitions for Agent Data
PiperOrigin-RevId: 873093421
1 parent 89d5723 commit 6ac28a5

File tree

5 files changed

+382
-218
lines changed

5 files changed

+382
-218
lines changed

tests/unit/vertexai/genai/replays/test_evaluate_instances.py

Lines changed: 47 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -81,13 +81,19 @@ def test_rouge_metric(client):
8181

8282
def test_pointwise_metric(client):
8383
"""Tests the _evaluate_instances method with PointwiseMetricInput."""
84-
instance_dict = {"prompt": "What is the capital of France?", "response": "Paris"}
84+
instance_dict = {
85+
"prompt": "What is the capital of France?",
86+
"response": "Paris",
87+
}
8588
json_instance = json.dumps(instance_dict)
8689

8790
test_input = types.PointwiseMetricInput(
8891
instance=types.PointwiseMetricInstance(json_instance=json_instance),
8992
metric_spec=genai_types.PointwiseMetricSpec(
90-
metric_prompt_template="Evaluate if the response '{response}' correctly answers the prompt '{prompt}'."
93+
metric_prompt_template=(
94+
"Evaluate if the response '{response}' correctly answers the"
95+
" prompt '{prompt}'."
96+
)
9197
),
9298
)
9399
response = client.evals.evaluate_instances(
@@ -99,82 +105,37 @@ def test_pointwise_metric(client):
99105
assert response.pointwise_metric_result.score is not None
100106

101107

102-
def test_pointwise_metric_with_agent_data(client):
103-
"""Tests the _evaluate_instances method with PointwiseMetricInput and agent_data."""
104-
instance_dict = {"prompt": "What is the capital of France?", "response": "Paris"}
105-
json_instance = json.dumps(instance_dict)
106-
agent_data = types.evals.AgentData(
107-
agent_config=types.evals.AgentConfig(
108-
tools=types.evals.Tools(
109-
tool=[
110-
genai_types.Tool(
111-
function_declarations=[
112-
genai_types.FunctionDeclaration(name="search")
113-
]
114-
)
115-
]
116-
),
117-
developer_instruction=types.evals.InstanceData(text="instruction"),
118-
),
119-
events=types.evals.Events(
120-
event=[genai_types.Content(parts=[genai_types.Part(text="hello")])]
121-
),
122-
)
123-
instance = types.EvaluationInstance(
124-
prompt=types.evals.InstanceData(text="What is the capital of France?"),
125-
response=types.evals.InstanceData(text="Paris"),
126-
agent_data=agent_data,
127-
)
128-
129-
test_input = types.PointwiseMetricInput(
130-
instance=types.PointwiseMetricInstance(json_instance=json_instance),
131-
metric_spec=genai_types.PointwiseMetricSpec(
132-
metric_prompt_template="Evaluate if the response '{response}' correctly answers the prompt '{prompt}'."
133-
),
134-
)
135-
response = client.evals.evaluate_instances(
136-
metric_config=types._EvaluateInstancesRequestParameters(
137-
pointwise_metric_input=test_input,
138-
instance=instance,
139-
)
140-
)
141-
assert response.pointwise_metric_result is not None
142-
assert response.pointwise_metric_result.score is not None
143-
144-
145-
def test_predefined_metric_with_agent_data(client):
146-
"""Tests the _evaluate_instances method with predefined metric and agent_data."""
147-
agent_data = types.evals.AgentData(
148-
agent_config=types.evals.AgentConfig(
149-
tools=types.evals.Tools(
150-
tool=[
151-
genai_types.Tool(
152-
function_declarations=[
153-
genai_types.FunctionDeclaration(name="search")
154-
]
155-
)
156-
]
157-
),
158-
developer_instruction=types.evals.InstanceData(text="instruction"),
159-
),
160-
events=types.evals.Events(
161-
event=[genai_types.Content(parts=[genai_types.Part(text="hello")])]
162-
),
163-
)
164-
instance = types.EvaluationInstance(
165-
prompt=types.evals.InstanceData(text="What is the capital of France?"),
166-
response=types.evals.InstanceData(text="Paris"),
167-
reference=types.evals.InstanceData(text="Paris"),
168-
agent_data=agent_data,
169-
)
170-
171-
response = client.evals.evaluate_instances(
172-
metric_config=types._EvaluateInstancesRequestParameters(
173-
metrics=[types.Metric(name="general_quality_v1")],
174-
instance=instance,
175-
)
176-
)
177-
assert response.metric_results[0].score is not None
108+
# def test_predefined_metric_with_agent_data(client):
109+
# """Tests the _evaluate_instances method with predefined metric and agent_data."""
110+
# agent_data = types.evals.AgentData(
111+
# agent_config=types.evals.AgentConfig(
112+
# tools=[
113+
# genai_types.Tool(
114+
# function_declarations=[
115+
# genai_types.FunctionDeclaration(name="search")
116+
# ]
117+
# )
118+
# ],
119+
# developer_instruction=types.evals.InstanceData(text="instruction"),
120+
# ),
121+
# events=types.evals.Events(
122+
# event=[genai_types.Content(parts=[genai_types.Part(text="hello")])]
123+
# ),
124+
# )
125+
# instance = types.EvaluationInstance(
126+
# prompt=types.evals.InstanceData(text="What is the capital of France?"),
127+
# response=types.evals.InstanceData(text="Paris"),
128+
# reference=types.evals.InstanceData(text="Paris"),
129+
# agent_data=agent_data,
130+
# )
131+
132+
# response = client.evals.evaluate_instances(
133+
# metric_config=types._EvaluateInstancesRequestParameters(
134+
# metrics=[types.Metric(name="general_quality_v1")],
135+
# instance=instance,
136+
# )
137+
# )
138+
# assert response.metric_results[0].score is not None
178139

179140

180141
def test_pairwise_metric_with_autorater(client):
@@ -189,7 +150,10 @@ def test_pairwise_metric_with_autorater(client):
189150
test_input = types.PairwiseMetricInput(
190151
instance=types.PairwiseMetricInstance(json_instance=json_instance),
191152
metric_spec=genai_types.PairwiseMetricSpec(
192-
metric_prompt_template="Which response is a better summary? Baseline: '{baseline_response}' or Candidate: '{candidate_response}'"
153+
metric_prompt_template=(
154+
"Which response is a better summary? Baseline:"
155+
" '{baseline_response}' or Candidate: '{candidate_response}'"
156+
)
193157
),
194158
)
195159
autorater_config = genai_types.AutoraterConfig(sampling_count=2)
@@ -240,7 +204,10 @@ def test_inference_with_prompt_template(client):
240204

241205
def test_run_inference_with_agent(client):
242206
test_df = pd.DataFrame(
243-
{"prompt": ["agent prompt"], "session_inputs": ['{"user_id": "user_123"}']}
207+
{
208+
"prompt": ["agent prompt"],
209+
"session_inputs": ['{"user_id": "user_123"}'],
210+
}
244211
)
245212
inference_result = client.evals.run_inference(
246213
agent="projects/977012026409/locations/us-central1/reasoningEngines/7188347537655332864",

tests/unit/vertexai/genai/test_evals.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4056,7 +4056,7 @@ def test_eval_case_to_agent_data(self):
40564056
)
40574057

40584058
assert agent_data.agent_config.developer_instruction.text == "instruction1"
4059-
assert agent_data.agent_config.tools.tool == [tool]
4059+
assert agent_data.agent_config.legacy_tools.tool == [tool]
40604060
assert agent_data.events.event[0].parts[0].text == "intermediate event"
40614061

40624062
def test_eval_case_to_agent_data_events_only(self):
@@ -4164,7 +4164,7 @@ def test_eval_case_to_agent_data_agent_info_empty_tools(self):
41644164
)
41654165

41664166
assert agent_data.agent_config.developer_instruction.text == "instruction1"
4167-
assert not agent_data.agent_config.tools.tool
4167+
assert not agent_data.agent_config.legacy_tools.tool
41684168

41694169
def test_eval_case_to_agent_data_agent_info_empty(self):
41704170
intermediate_events = [

vertexai/_genai/_evals_metric_handlers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -899,7 +899,7 @@ def _eval_case_to_agent_data(
899899

900900
if tools or developer_instruction:
901901
agent_config = types.evals.AgentConfig(
902-
tools=tools,
902+
legacy_tools=tools,
903903
developer_instruction=developer_instruction,
904904
)
905905

vertexai/_genai/types/common.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1492,6 +1492,10 @@ class EvalCase(_common.BaseModel):
14921492
default=None,
14931493
description="""This field is experimental and may change in future versions. The agent info of the agent under evaluation. This can be extended for multi-agent evaluation.""",
14941494
)
1495+
agent_data: Optional[evals_types.AgentData] = Field(
1496+
default=None,
1497+
description="""This field is experimental and may change in future versions. The agent data of the agent under evaluation.""",
1498+
)
14951499
# Allow extra fields to support custom metric prompts and stay backward compatible.
14961500
model_config = ConfigDict(frozen=True, extra="allow")
14971501

@@ -1526,6 +1530,9 @@ class EvalCaseDict(TypedDict, total=False):
15261530
agent_info: Optional[evals_types.AgentInfo]
15271531
"""This field is experimental and may change in future versions. The agent info of the agent under evaluation. This can be extended for multi-agent evaluation."""
15281532

1533+
agent_data: Optional[evals_types.AgentData]
1534+
"""This field is experimental and may change in future versions. The agent data of the agent under evaluation."""
1535+
15291536

15301537
EvalCaseOrDict = Union[EvalCase, EvalCaseDict]
15311538

0 commit comments

Comments
 (0)