Skip to content

Commit 10efc13

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
fix: Only include CandidateResponse if a response is present
PiperOrigin-RevId: 878162123
1 parent 134b989 commit 10efc13

File tree

2 files changed

+54
-21
lines changed

2 files changed

+54
-21
lines changed

tests/unit/vertexai/genai/replays/test_create_evaluation_run.py

Lines changed: 44 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@
6363
)
6464
),
6565
)
66+
INFERENCE_CONFIG = types.EvaluationRunInferenceConfig(
67+
model="projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash"
68+
)
6669

6770

6871
def test_create_eval_run_data_source_evaluation_set(client):
@@ -189,9 +192,6 @@ def test_create_eval_run_data_source_bigquery_request_set(client):
189192
def test_create_eval_run_with_inference_configs(client):
190193
"""Tests that create_evaluation_run() creates a correctly structured EvaluationRun with inference_configs."""
191194
client._api_client._http_options.api_version = "v1beta1"
192-
inference_config = types.EvaluationRunInferenceConfig(
193-
model="projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash"
194-
)
195195
evaluation_run = client.evals.create_evaluation_run(
196196
name="test_inference_config",
197197
display_name="test_inference_config",
@@ -200,7 +200,7 @@ def test_create_eval_run_with_inference_configs(client):
200200
),
201201
dest=GCS_DEST,
202202
metrics=[GENERAL_QUALITY_METRIC],
203-
inference_configs={"model_1": inference_config},
203+
inference_configs={"model_1": INFERENCE_CONFIG},
204204
labels={"label1": "value1"},
205205
)
206206
assert isinstance(evaluation_run, types.EvaluationRun)
@@ -216,7 +216,7 @@ def test_create_eval_run_with_inference_configs(client):
216216
),
217217
metrics=[GENERAL_QUALITY_METRIC],
218218
)
219-
assert evaluation_run.inference_configs["model_1"] == inference_config
219+
assert evaluation_run.inference_configs["model_1"] == INFERENCE_CONFIG
220220
assert evaluation_run.labels == {
221221
"label1": "value1",
222222
}
@@ -318,6 +318,43 @@ def test_create_eval_run_with_inference_configs(client):
318318
# )
319319
# assert evaluation_run.error is None
320320

321+
# def test_create_eval_run_data_source_evaluation_dataset_inference_config(client):
322+
# """Tests that create_evaluation_run() creates a correctly structured EvaluationRun with EvaluationDataset."""
323+
# input_df = pd.DataFrame(
324+
# {
325+
# "prompt": ["prompt1", "prompt2"],
326+
# "reference": ["reference1", "reference2"],
327+
# }
328+
# )
329+
# evaluation_run = client.evals.create_evaluation_run(
330+
# name="test9",
331+
# display_name="test9",
332+
# dataset=types.EvaluationDataset(
333+
# candidate_name="candidate_1",
334+
# eval_dataset_df=input_df,
335+
# ),
336+
# dest=GCS_DEST,
337+
# metrics=[GENERAL_QUALITY_METRIC],
338+
# inference_configs={"candidate_1": INFERENCE_CONFIG},
339+
# )
340+
# assert isinstance(evaluation_run, types.EvaluationRun)
341+
# assert evaluation_run.display_name == "test9"
342+
# assert evaluation_run.state == types.EvaluationRunState.PENDING
343+
# assert isinstance(evaluation_run.data_source, types.EvaluationRunDataSource)
344+
# # Check evaluation set
345+
# assert evaluation_run.data_source.evaluation_set
346+
# eval_set = client.evals.get_evaluation_set(
347+
# name=evaluation_run.data_source.evaluation_set
348+
# )
349+
# assert len(eval_set.evaluation_items) == 2
350+
# assert evaluation_run.inference_configs["candidate_1"] == INFERENCE_CONFIG
351+
# # Check evaluation items
352+
# for i, eval_item_name in enumerate(eval_set.evaluation_items):
353+
# eval_item = client.evals.get_evaluation_item(name=eval_item_name)
354+
# assert eval_item.evaluation_item_type == types.EvaluationItemType.REQUEST
355+
# assert eval_item.evaluation_request.prompt.text == input_df.iloc[i]["prompt"]
356+
# assert eval_item.evaluation_request.candidate_responses == []
357+
# assert evaluation_run.error is None
321358

322359
pytest_plugins = ("pytest_asyncio",)
323360

@@ -370,9 +407,6 @@ async def test_create_eval_run_async(client):
370407
async def test_create_eval_run_async_with_inference_configs(client):
371408
"""Tests that create_evaluation_run() creates a correctly structured EvaluationRun with inference_configs asynchronously."""
372409
client._api_client._http_options.api_version = "v1beta1"
373-
inference_config = types.EvaluationRunInferenceConfig(
374-
model="projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash"
375-
)
376410
evaluation_run = await client.aio.evals.create_evaluation_run(
377411
name="test_inference_config_async",
378412
display_name="test_inference_config_async",
@@ -381,7 +415,7 @@ async def test_create_eval_run_async_with_inference_configs(client):
381415
),
382416
dest=GCS_DEST,
383417
metrics=[GENERAL_QUALITY_METRIC],
384-
inference_configs={"model_1": inference_config},
418+
inference_configs={"model_1": INFERENCE_CONFIG},
385419
labels={"label1": "value1"},
386420
)
387421
assert isinstance(evaluation_run, types.EvaluationRun)
@@ -397,7 +431,7 @@ async def test_create_eval_run_async_with_inference_configs(client):
397431
),
398432
metrics=[GENERAL_QUALITY_METRIC],
399433
)
400-
assert evaluation_run.inference_configs["model_1"] == inference_config
434+
assert evaluation_run.inference_configs["model_1"] == INFERENCE_CONFIG
401435
assert evaluation_run.labels == {
402436
"label1": "value1",
403437
}

vertexai/_genai/_evals_common.py

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2167,6 +2167,15 @@ def _create_evaluation_set_from_dataframe(
21672167
for event in row[_evals_constant.INTERMEDIATE_EVENTS]:
21682168
if CONTENT in event:
21692169
intermediate_events.append(event[CONTENT])
2170+
candidate_responses = []
2171+
if _evals_constant.RESPONSE in row:
2172+
candidate_responses.append(
2173+
types.CandidateResponse(
2174+
candidate=candidate_name or "Candidate 1",
2175+
text=row[_evals_constant.RESPONSE],
2176+
events=intermediate_events or None,
2177+
)
2178+
)
21702179
eval_item_requests.append(
21712180
types.EvaluationItemRequest(
21722181
prompt=(
@@ -2179,17 +2188,7 @@ def _create_evaluation_set_from_dataframe(
21792188
if _evals_constant.REFERENCE in row
21802189
else None
21812190
),
2182-
candidate_responses=[
2183-
types.CandidateResponse(
2184-
candidate=candidate_name or "Candidate 1",
2185-
text=row.get(_evals_constant.RESPONSE, None),
2186-
events=(
2187-
intermediate_events
2188-
if len(intermediate_events) > 0
2189-
else None
2190-
),
2191-
)
2192-
],
2191+
candidate_responses=candidate_responses,
21932192
)
21942193
)
21952194
logger.info("Writing evaluation item requests to GCS.")

0 commit comments

Comments
 (0)