Skip to content

Commit d1e4257

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
fix: Only include CandidateResponse if a response is present
PiperOrigin-RevId: 876784701
1 parent 6b5cc8f commit d1e4257

2 files changed

Lines changed: 54 additions & 21 deletions

File tree

tests/unit/vertexai/genai/replays/test_create_evaluation_run.py

Lines changed: 44 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@
6363
)
6464
),
6565
)
66+
INFERENCE_CONFIG = types.EvaluationRunInferenceConfig(
67+
model="projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash"
68+
)
6669

6770

6871
def test_create_eval_run_data_source_evaluation_set(client):
@@ -189,9 +192,6 @@ def test_create_eval_run_data_source_bigquery_request_set(client):
189192
def test_create_eval_run_with_inference_configs(client):
190193
"""Tests that create_evaluation_run() creates a correctly structured EvaluationRun with inference_configs."""
191194
client._api_client._http_options.api_version = "v1beta1"
192-
inference_config = types.EvaluationRunInferenceConfig(
193-
model="projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash"
194-
)
195195
evaluation_run = client.evals.create_evaluation_run(
196196
name="test_inference_config",
197197
display_name="test_inference_config",
@@ -200,7 +200,7 @@ def test_create_eval_run_with_inference_configs(client):
200200
),
201201
dest=GCS_DEST,
202202
metrics=[GENERAL_QUALITY_METRIC],
203-
inference_configs={"model_1": inference_config},
203+
inference_configs={"model_1": INFERENCE_CONFIG},
204204
labels={"label1": "value1"},
205205
)
206206
assert isinstance(evaluation_run, types.EvaluationRun)
@@ -216,7 +216,7 @@ def test_create_eval_run_with_inference_configs(client):
216216
),
217217
metrics=[GENERAL_QUALITY_METRIC],
218218
)
219-
assert evaluation_run.inference_configs["model_1"] == inference_config
219+
assert evaluation_run.inference_configs["model_1"] == INFERENCE_CONFIG
220220
assert evaluation_run.labels == {
221221
"label1": "value1",
222222
}
@@ -318,6 +318,43 @@ def test_create_eval_run_with_inference_configs(client):
318318
# )
319319
# assert evaluation_run.error is None
320320

321+
# def test_create_eval_run_data_source_evaluation_dataset_inference_config(client):
322+
# """Tests that create_evaluation_run() creates a correctly structured EvaluationRun with EvaluationDataset."""
323+
# input_df = pd.DataFrame(
324+
# {
325+
# "prompt": ["prompt1", "prompt2"],
326+
# "reference": ["reference1", "reference2"],
327+
# }
328+
# )
329+
# evaluation_run = client.evals.create_evaluation_run(
330+
# name="test9",
331+
# display_name="test9",
332+
# dataset=types.EvaluationDataset(
333+
# candidate_name="candidate_1",
334+
# eval_dataset_df=input_df,
335+
# ),
336+
# dest=GCS_DEST,
337+
# metrics=[GENERAL_QUALITY_METRIC],
338+
# inference_configs={"candidate_1": INFERENCE_CONFIG},
339+
# )
340+
# assert isinstance(evaluation_run, types.EvaluationRun)
341+
# assert evaluation_run.display_name == "test9"
342+
# assert evaluation_run.state == types.EvaluationRunState.PENDING
343+
# assert isinstance(evaluation_run.data_source, types.EvaluationRunDataSource)
344+
# # Check evaluation set
345+
# assert evaluation_run.data_source.evaluation_set
346+
# eval_set = client.evals.get_evaluation_set(
347+
# name=evaluation_run.data_source.evaluation_set
348+
# )
349+
# assert len(eval_set.evaluation_items) == 2
350+
# assert evaluation_run.inference_configs["candidate_1"] == INFERENCE_CONFIG
351+
# # Check evaluation items
352+
# for i, eval_item_name in enumerate(eval_set.evaluation_items):
353+
# eval_item = client.evals.get_evaluation_item(name=eval_item_name)
354+
# assert eval_item.evaluation_item_type == types.EvaluationItemType.REQUEST
355+
# assert eval_item.evaluation_request.prompt.text == input_df.iloc[i]["prompt"]
356+
# assert eval_item.evaluation_request.candidate_responses == []
357+
# assert evaluation_run.error is None
321358

322359
pytest_plugins = ("pytest_asyncio",)
323360

@@ -370,9 +407,6 @@ async def test_create_eval_run_async(client):
370407
async def test_create_eval_run_async_with_inference_configs(client):
371408
"""Tests that create_evaluation_run() creates a correctly structured EvaluationRun with inference_configs asynchronously."""
372409
client._api_client._http_options.api_version = "v1beta1"
373-
inference_config = types.EvaluationRunInferenceConfig(
374-
model="projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash"
375-
)
376410
evaluation_run = await client.aio.evals.create_evaluation_run(
377411
name="test_inference_config_async",
378412
display_name="test_inference_config_async",
@@ -381,7 +415,7 @@ async def test_create_eval_run_async_with_inference_configs(client):
381415
),
382416
dest=GCS_DEST,
383417
metrics=[GENERAL_QUALITY_METRIC],
384-
inference_configs={"model_1": inference_config},
418+
inference_configs={"model_1": INFERENCE_CONFIG},
385419
labels={"label1": "value1"},
386420
)
387421
assert isinstance(evaluation_run, types.EvaluationRun)
@@ -397,7 +431,7 @@ async def test_create_eval_run_async_with_inference_configs(client):
397431
),
398432
metrics=[GENERAL_QUALITY_METRIC],
399433
)
400-
assert evaluation_run.inference_configs["model_1"] == inference_config
434+
assert evaluation_run.inference_configs["model_1"] == INFERENCE_CONFIG
401435
assert evaluation_run.labels == {
402436
"label1": "value1",
403437
}

vertexai/_genai/_evals_common.py

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1962,6 +1962,15 @@ def _create_evaluation_set_from_dataframe(
19621962
for event in row[_evals_constant.INTERMEDIATE_EVENTS]:
19631963
if CONTENT in event:
19641964
intermediate_events.append(event[CONTENT])
1965+
candidate_responses = []
1966+
if _evals_constant.RESPONSE in row:
1967+
candidate_responses.append(
1968+
types.CandidateResponse(
1969+
candidate=candidate_name or "Candidate 1",
1970+
text=row[_evals_constant.RESPONSE],
1971+
events=intermediate_events or None,
1972+
)
1973+
)
19651974
eval_item_requests.append(
19661975
types.EvaluationItemRequest(
19671976
prompt=(
@@ -1974,17 +1983,7 @@ def _create_evaluation_set_from_dataframe(
19741983
if _evals_constant.REFERENCE in row
19751984
else None
19761985
),
1977-
candidate_responses=[
1978-
types.CandidateResponse(
1979-
candidate=candidate_name or "Candidate 1",
1980-
text=row.get(_evals_constant.RESPONSE, None),
1981-
events=(
1982-
intermediate_events
1983-
if len(intermediate_events) > 0
1984-
else None
1985-
),
1986-
)
1987-
],
1986+
candidate_responses=candidate_responses,
19881987
)
19891988
)
19901989
logger.info("Writing evaluation item requests to GCS.")

0 commit comments

Comments
 (0)