6363 )
6464 ),
6565)
66+ INFERENCE_CONFIG = types .EvaluationRunInferenceConfig (
67+ model = "projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash"
68+ )
6669
6770
6871def test_create_eval_run_data_source_evaluation_set (client ):
@@ -189,9 +192,6 @@ def test_create_eval_run_data_source_bigquery_request_set(client):
189192def test_create_eval_run_with_inference_configs (client ):
190193 """Tests that create_evaluation_run() creates a correctly structured EvaluationRun with inference_configs."""
191194 client ._api_client ._http_options .api_version = "v1beta1"
192- inference_config = types .EvaluationRunInferenceConfig (
193- model = "projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash"
194- )
195195 evaluation_run = client .evals .create_evaluation_run (
196196 name = "test_inference_config" ,
197197 display_name = "test_inference_config" ,
@@ -200,7 +200,7 @@ def test_create_eval_run_with_inference_configs(client):
200200 ),
201201 dest = GCS_DEST ,
202202 metrics = [GENERAL_QUALITY_METRIC ],
203- inference_configs = {"model_1" : inference_config },
203+ inference_configs = {"model_1" : INFERENCE_CONFIG },
204204 labels = {"label1" : "value1" },
205205 )
206206 assert isinstance (evaluation_run , types .EvaluationRun )
@@ -216,7 +216,7 @@ def test_create_eval_run_with_inference_configs(client):
216216 ),
217217 metrics = [GENERAL_QUALITY_METRIC ],
218218 )
219- assert evaluation_run .inference_configs ["model_1" ] == inference_config
219+ assert evaluation_run .inference_configs ["model_1" ] == INFERENCE_CONFIG
220220 assert evaluation_run .labels == {
221221 "label1" : "value1" ,
222222 }
@@ -318,6 +318,43 @@ def test_create_eval_run_with_inference_configs(client):
318318# )
319319# assert evaluation_run.error is None
320320
321+ # def test_create_eval_run_data_source_evaluation_dataset_inference_config(client):
322+ # """Tests that create_evaluation_run() creates a correctly structured EvaluationRun with EvaluationDataset."""
323+ # input_df = pd.DataFrame(
324+ # {
325+ # "prompt": ["prompt1", "prompt2"],
326+ # "reference": ["reference1", "reference2"],
327+ # }
328+ # )
329+ # evaluation_run = client.evals.create_evaluation_run(
330+ # name="test9",
331+ # display_name="test9",
332+ # dataset=types.EvaluationDataset(
333+ # candidate_name="candidate_1",
334+ # eval_dataset_df=input_df,
335+ # ),
336+ # dest=GCS_DEST,
337+ # metrics=[GENERAL_QUALITY_METRIC],
338+ # inference_configs={"candidate_1": INFERENCE_CONFIG},
339+ # )
340+ # assert isinstance(evaluation_run, types.EvaluationRun)
341+ # assert evaluation_run.display_name == "test9"
342+ # assert evaluation_run.state == types.EvaluationRunState.PENDING
343+ # assert isinstance(evaluation_run.data_source, types.EvaluationRunDataSource)
344+ # # Check evaluation set
345+ # assert evaluation_run.data_source.evaluation_set
346+ # eval_set = client.evals.get_evaluation_set(
347+ # name=evaluation_run.data_source.evaluation_set
348+ # )
349+ # assert len(eval_set.evaluation_items) == 2
350+ # assert evaluation_run.inference_configs["candidate_1"] == INFERENCE_CONFIG
351+ # # Check evaluation items
352+ # for i, eval_item_name in enumerate(eval_set.evaluation_items):
353+ # eval_item = client.evals.get_evaluation_item(name=eval_item_name)
354+ # assert eval_item.evaluation_item_type == types.EvaluationItemType.REQUEST
355+ # assert eval_item.evaluation_request.prompt.text == input_df.iloc[i]["prompt"]
356+ # assert eval_item.evaluation_request.candidate_responses == []
357+ # assert evaluation_run.error is None
321358
322359pytest_plugins = ("pytest_asyncio" ,)
323360
@@ -370,9 +407,6 @@ async def test_create_eval_run_async(client):
370407async def test_create_eval_run_async_with_inference_configs (client ):
371408 """Tests that create_evaluation_run() creates a correctly structured EvaluationRun with inference_configs asynchronously."""
372409 client ._api_client ._http_options .api_version = "v1beta1"
373- inference_config = types .EvaluationRunInferenceConfig (
374- model = "projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash"
375- )
376410 evaluation_run = await client .aio .evals .create_evaluation_run (
377411 name = "test_inference_config_async" ,
378412 display_name = "test_inference_config_async" ,
@@ -381,7 +415,7 @@ async def test_create_eval_run_async_with_inference_configs(client):
381415 ),
382416 dest = GCS_DEST ,
383417 metrics = [GENERAL_QUALITY_METRIC ],
384- inference_configs = {"model_1" : inference_config },
418+ inference_configs = {"model_1" : INFERENCE_CONFIG },
385419 labels = {"label1" : "value1" },
386420 )
387421 assert isinstance (evaluation_run , types .EvaluationRun )
@@ -397,7 +431,7 @@ async def test_create_eval_run_async_with_inference_configs(client):
397431 ),
398432 metrics = [GENERAL_QUALITY_METRIC ],
399433 )
400- assert evaluation_run .inference_configs ["model_1" ] == inference_config
434+ assert evaluation_run .inference_configs ["model_1" ] == INFERENCE_CONFIG
401435 assert evaluation_run .labels == {
402436 "label1" : "value1" ,
403437 }
0 commit comments