Skip to content

Commit a99f340

Browse files
jsondaicopybara-github
authored andcommitted
chore: GenAI Client(evals) - Pin all RubricMetric properties to v1
PiperOrigin-RevId: 919914632
1 parent 9770c31 commit a99f340

3 files changed

Lines changed: 65 additions & 21 deletions

File tree

agentplatform/_genai/_evals_metric_loaders.py

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -277,27 +277,27 @@ def __getattr__(
277277

278278
@property
279279
def GENERAL_QUALITY(self) -> LazyLoadedPrebuiltMetric:
280-
return self.__getattr__("GENERAL_QUALITY")
280+
return self.__getattr__("GENERAL_QUALITY", version="v1")
281281

282282
@property
283283
def TEXT_QUALITY(self) -> LazyLoadedPrebuiltMetric:
284-
return self.__getattr__("TEXT_QUALITY")
284+
return self.__getattr__("TEXT_QUALITY", version="v1")
285285

286286
@property
287287
def INSTRUCTION_FOLLOWING(self) -> LazyLoadedPrebuiltMetric:
288-
return self.__getattr__("INSTRUCTION_FOLLOWING")
288+
return self.__getattr__("INSTRUCTION_FOLLOWING", version="v1")
289289

290290
@property
291291
def SAFETY(self) -> LazyLoadedPrebuiltMetric:
292-
return self.__getattr__("SAFETY")
292+
return self.__getattr__("SAFETY", version="v1")
293293

294294
@property
295295
def MULTI_TURN_GENERAL_QUALITY(self) -> LazyLoadedPrebuiltMetric:
296-
return self.__getattr__("MULTI_TURN_GENERAL_QUALITY")
296+
return self.__getattr__("MULTI_TURN_GENERAL_QUALITY", version="v1")
297297

298298
@property
299299
def MULTI_TURN_TEXT_QUALITY(self) -> LazyLoadedPrebuiltMetric:
300-
return self.__getattr__("MULTI_TURN_TEXT_QUALITY")
300+
return self.__getattr__("MULTI_TURN_TEXT_QUALITY", version="v1")
301301

302302
@property
303303
def MULTI_TURN_TOOL_USE_QUALITY(self) -> LazyLoadedPrebuiltMetric:
@@ -317,43 +317,43 @@ def FINAL_RESPONSE_MATCH(self) -> LazyLoadedPrebuiltMetric:
317317

318318
@property
319319
def FINAL_RESPONSE_REFERENCE_FREE(self) -> LazyLoadedPrebuiltMetric:
320-
return self.__getattr__("FINAL_RESPONSE_REFERENCE_FREE")
320+
return self.__getattr__("FINAL_RESPONSE_REFERENCE_FREE", version="v1")
321321

322322
@property
323323
def COHERENCE(self) -> LazyLoadedPrebuiltMetric:
324-
return self.__getattr__("COHERENCE")
324+
return self.__getattr__("COHERENCE", version="v1")
325325

326326
@property
327327
def FLUENCY(self) -> LazyLoadedPrebuiltMetric:
328-
return self.__getattr__("FLUENCY")
328+
return self.__getattr__("FLUENCY", version="v1")
329329

330330
@property
331331
def VERBOSITY(self) -> LazyLoadedPrebuiltMetric:
332-
return self.__getattr__("VERBOSITY")
332+
return self.__getattr__("VERBOSITY", version="v1")
333333

334334
@property
335335
def SUMMARIZATION_QUALITY(self) -> LazyLoadedPrebuiltMetric:
336-
return self.__getattr__("SUMMARIZATION_QUALITY")
336+
return self.__getattr__("SUMMARIZATION_QUALITY", version="v1")
337337

338338
@property
339339
def QUESTION_ANSWERING_QUALITY(self) -> LazyLoadedPrebuiltMetric:
340-
return self.__getattr__("QUESTION_ANSWERING_QUALITY")
340+
return self.__getattr__("QUESTION_ANSWERING_QUALITY", version="v1")
341341

342342
@property
343343
def MULTI_TURN_CHAT_QUALITY(self) -> LazyLoadedPrebuiltMetric:
344-
return self.__getattr__("MULTI_TURN_CHAT_QUALITY")
344+
return self.__getattr__("MULTI_TURN_CHAT_QUALITY", version="v1")
345345

346346
@property
347347
def MULTI_TURN_SAFETY(self) -> LazyLoadedPrebuiltMetric:
348-
return self.__getattr__("MULTI_TURN_SAFETY")
348+
return self.__getattr__("MULTI_TURN_SAFETY", version="v1")
349349

350350
@property
351351
def FINAL_RESPONSE_QUALITY(self) -> LazyLoadedPrebuiltMetric:
352-
return self.__getattr__("FINAL_RESPONSE_QUALITY")
352+
return self.__getattr__("FINAL_RESPONSE_QUALITY", version="v1")
353353

354354
@property
355355
def HALLUCINATION(self) -> LazyLoadedPrebuiltMetric:
356-
return self.__getattr__("HALLUCINATION")
356+
return self.__getattr__("HALLUCINATION", version="v1")
357357

358358
@property
359359
def GROUNDING(self) -> LazyLoadedPrebuiltMetric: # pylint: disable=invalid-name
@@ -374,15 +374,15 @@ def GROUNDEDNESS(self) -> LazyLoadedPrebuiltMetric: # pylint: disable=invalid-n
374374

375375
@property
376376
def TOOL_USE_QUALITY(self) -> LazyLoadedPrebuiltMetric:
377-
return self.__getattr__("TOOL_USE_QUALITY")
377+
return self.__getattr__("TOOL_USE_QUALITY", version="v1")
378378

379379
@property
380380
def GECKO_TEXT2IMAGE(self) -> LazyLoadedPrebuiltMetric:
381-
return self.__getattr__("GECKO_TEXT2IMAGE")
381+
return self.__getattr__("GECKO_TEXT2IMAGE", version="v1")
382382

383383
@property
384384
def GECKO_TEXT2VIDEO(self) -> LazyLoadedPrebuiltMetric:
385-
return self.__getattr__("GECKO_TEXT2VIDEO")
385+
return self.__getattr__("GECKO_TEXT2VIDEO", version="v1")
386386

387387

388388
PrebuiltMetric = PrebuiltMetricLoader()

tests/unit/agentplatform/genai/test_evals.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ def mock_api_client_fixture():
123123

124124
@pytest.fixture
125125
def mock_eval_dependencies(mock_api_client_fixture):
126+
_evals_metric_loaders.LazyLoadedPrebuiltMetric._cache.clear()
126127
# fmt: off
127128
with (
128129
mock.patch("google.cloud.storage.Client") as mock_storage_client,
@@ -6386,6 +6387,49 @@ def test_groundedness_resolve_returns_grounding_v1_metric(self):
63866387
assert resolved.name == "grounding_v1"
63876388

63886389

6390+
class TestPrebuiltMetricLoaderVersionPinning:
6391+
"""Verifies explicit version pinning for all RubricMetric properties."""
6392+
6393+
@pytest.mark.parametrize(
6394+
"prop_name,expected_spec",
6395+
[
6396+
("GENERAL_QUALITY", "general_quality_v1"),
6397+
("TEXT_QUALITY", "text_quality_v1"),
6398+
("INSTRUCTION_FOLLOWING", "instruction_following_v1"),
6399+
("SAFETY", "safety_v1"),
6400+
("MULTI_TURN_GENERAL_QUALITY", "multi_turn_general_quality_v1"),
6401+
("MULTI_TURN_TEXT_QUALITY", "multi_turn_text_quality_v1"),
6402+
("FINAL_RESPONSE_REFERENCE_FREE", "final_response_reference_free_v1"),
6403+
("FINAL_RESPONSE_QUALITY", "final_response_quality_v1"),
6404+
("HALLUCINATION", "hallucination_v1"),
6405+
("TOOL_USE_QUALITY", "tool_use_quality_v1"),
6406+
("GECKO_TEXT2IMAGE", "gecko_text2image_v1"),
6407+
("GECKO_TEXT2VIDEO", "gecko_text2video_v1"),
6408+
],
6409+
)
6410+
def test_predefined_property_pins_to_v1(self, prop_name, expected_spec):
6411+
lazy_metric = getattr(agentplatform_genai_types.RubricMetric, prop_name)
6412+
assert lazy_metric.version == "v1"
6413+
assert lazy_metric._get_api_metric_spec_name() == expected_spec
6414+
6415+
@pytest.mark.parametrize(
6416+
"prop_name",
6417+
[
6418+
"COHERENCE",
6419+
"FLUENCY",
6420+
"VERBOSITY",
6421+
"SUMMARIZATION_QUALITY",
6422+
"QUESTION_ANSWERING_QUALITY",
6423+
"MULTI_TURN_CHAT_QUALITY",
6424+
"MULTI_TURN_SAFETY",
6425+
],
6426+
)
6427+
def test_gcs_backed_property_pins_to_v1(self, prop_name):
6428+
lazy_metric = getattr(agentplatform_genai_types.RubricMetric, prop_name)
6429+
assert lazy_metric.version == "v1"
6430+
assert lazy_metric._get_api_metric_spec_name() is None
6431+
6432+
63896433
class TestMergeResponseDatasets:
63906434
"""Unit tests for the merge_response_datasets_into_canonical_format function."""
63916435

tests/unit/vertexai/test_evaluation.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1882,9 +1882,9 @@ def test_runnable_trajectory_eval_with_runnable_inference(self, api_transport):
18821882
"trajectory_exact_match/score",
18831883
]
18841884
)
1885-
assert list(
1885+
assert sorted(
18861886
test_result.metrics_table["trajectory_exact_match/score"].to_list()
1887-
) == [1.0, 0.0]
1887+
) == [0.0, 1.0]
18881888

18891889
@pytest.mark.parametrize("api_transport", ["grpc", "rest"])
18901890
def test_pointwise_autorater_request_config_enabled(self, api_transport):

0 commit comments

Comments
 (0)