@@ -81,13 +81,19 @@ def test_rouge_metric(client):
8181
8282def test_pointwise_metric (client ):
8383 """Tests the _evaluate_instances method with PointwiseMetricInput."""
84- instance_dict = {"prompt" : "What is the capital of France?" , "response" : "Paris" }
84+ instance_dict = {
85+ "prompt" : "What is the capital of France?" ,
86+ "response" : "Paris" ,
87+ }
8588 json_instance = json .dumps (instance_dict )
8689
8790 test_input = types .PointwiseMetricInput (
8891 instance = types .PointwiseMetricInstance (json_instance = json_instance ),
8992 metric_spec = genai_types .PointwiseMetricSpec (
90- metric_prompt_template = "Evaluate if the response '{response}' correctly answers the prompt '{prompt}'."
93+ metric_prompt_template = (
94+ "Evaluate if the response '{response}' correctly answers the"
95+ " prompt '{prompt}'."
96+ )
9197 ),
9298 )
9399 response = client .evals .evaluate_instances (
@@ -99,82 +105,37 @@ def test_pointwise_metric(client):
99105 assert response .pointwise_metric_result .score is not None
100106
101107
102- def test_pointwise_metric_with_agent_data (client ):
103- """Tests the _evaluate_instances method with PointwiseMetricInput and agent_data."""
104- instance_dict = {"prompt" : "What is the capital of France?" , "response" : "Paris" }
105- json_instance = json .dumps (instance_dict )
106- agent_data = types .evals .AgentData (
107- agent_config = types .evals .AgentConfig (
108- tools = types .evals .Tools (
109- tool = [
110- genai_types .Tool (
111- function_declarations = [
112- genai_types .FunctionDeclaration (name = "search" )
113- ]
114- )
115- ]
116- ),
117- developer_instruction = types .evals .InstanceData (text = "instruction" ),
118- ),
119- events = types .evals .Events (
120- event = [genai_types .Content (parts = [genai_types .Part (text = "hello" )])]
121- ),
122- )
123- instance = types .EvaluationInstance (
124- prompt = types .evals .InstanceData (text = "What is the capital of France?" ),
125- response = types .evals .InstanceData (text = "Paris" ),
126- agent_data = agent_data ,
127- )
128-
129- test_input = types .PointwiseMetricInput (
130- instance = types .PointwiseMetricInstance (json_instance = json_instance ),
131- metric_spec = genai_types .PointwiseMetricSpec (
132- metric_prompt_template = "Evaluate if the response '{response}' correctly answers the prompt '{prompt}'."
133- ),
134- )
135- response = client .evals .evaluate_instances (
136- metric_config = types ._EvaluateInstancesRequestParameters (
137- pointwise_metric_input = test_input ,
138- instance = instance ,
139- )
140- )
141- assert response .pointwise_metric_result is not None
142- assert response .pointwise_metric_result .score is not None
143-
144-
145- def test_predefined_metric_with_agent_data (client ):
146- """Tests the _evaluate_instances method with predefined metric and agent_data."""
147- agent_data = types .evals .AgentData (
148- agent_config = types .evals .AgentConfig (
149- tools = types .evals .Tools (
150- tool = [
151- genai_types .Tool (
152- function_declarations = [
153- genai_types .FunctionDeclaration (name = "search" )
154- ]
155- )
156- ]
157- ),
158- developer_instruction = types .evals .InstanceData (text = "instruction" ),
159- ),
160- events = types .evals .Events (
161- event = [genai_types .Content (parts = [genai_types .Part (text = "hello" )])]
162- ),
163- )
164- instance = types .EvaluationInstance (
165- prompt = types .evals .InstanceData (text = "What is the capital of France?" ),
166- response = types .evals .InstanceData (text = "Paris" ),
167- reference = types .evals .InstanceData (text = "Paris" ),
168- agent_data = agent_data ,
169- )
170-
171- response = client .evals .evaluate_instances (
172- metric_config = types ._EvaluateInstancesRequestParameters (
173- metrics = [types .Metric (name = "general_quality_v1" )],
174- instance = instance ,
175- )
176- )
177- assert response .metric_results [0 ].score is not None
108+ # def test_predefined_metric_with_agent_data(client):
109+ # """Tests the _evaluate_instances method with predefined metric and agent_data."""
110+ # agent_data = types.evals.AgentData(
111+ # agent_config=types.evals.AgentConfig(
112+ # tools=[
113+ # genai_types.Tool(
114+ # function_declarations=[
115+ # genai_types.FunctionDeclaration(name="search")
116+ # ]
117+ # )
118+ # ],
119+ # developer_instruction=types.evals.InstanceData(text="instruction"),
120+ # ),
121+ # events=types.evals.Events(
122+ # event=[genai_types.Content(parts=[genai_types.Part(text="hello")])]
123+ # ),
124+ # )
125+ # instance = types.EvaluationInstance(
126+ # prompt=types.evals.InstanceData(text="What is the capital of France?"),
127+ # response=types.evals.InstanceData(text="Paris"),
128+ # reference=types.evals.InstanceData(text="Paris"),
129+ # agent_data=agent_data,
130+ # )
131+
132+ # response = client.evals.evaluate_instances(
133+ # metric_config=types._EvaluateInstancesRequestParameters(
134+ # metrics=[types.Metric(name="general_quality_v1")],
135+ # instance=instance,
136+ # )
137+ # )
138+ # assert response.metric_results[0].score is not None
178139
179140
180141def test_pairwise_metric_with_autorater (client ):
@@ -189,7 +150,10 @@ def test_pairwise_metric_with_autorater(client):
189150 test_input = types .PairwiseMetricInput (
190151 instance = types .PairwiseMetricInstance (json_instance = json_instance ),
191152 metric_spec = genai_types .PairwiseMetricSpec (
192- metric_prompt_template = "Which response is a better summary? Baseline: '{baseline_response}' or Candidate: '{candidate_response}'"
153+ metric_prompt_template = (
154+ "Which response is a better summary? Baseline:"
155+ " '{baseline_response}' or Candidate: '{candidate_response}'"
156+ )
193157 ),
194158 )
195159 autorater_config = genai_types .AutoraterConfig (sampling_count = 2 )
@@ -240,7 +204,10 @@ def test_inference_with_prompt_template(client):
240204
241205def test_run_inference_with_agent (client ):
242206 test_df = pd .DataFrame (
243- {"prompt" : ["agent prompt" ], "session_inputs" : ['{"user_id": "user_123"}' ]}
207+ {
208+ "prompt" : ["agent prompt" ],
209+ "session_inputs" : ['{"user_id": "user_123"}' ],
210+ }
244211 )
245212 inference_result = client .evals .run_inference (
246213 agent = "projects/977012026409/locations/us-central1/reasoningEngines/7188347537655332864" ,
0 commit comments