From 8f0968faa725f7fd25255bf64ea9ed85c51f00ee Mon Sep 17 00:00:00 2001 From: william Date: Mon, 2 Jun 2025 07:59:03 -0500 Subject: [PATCH 1/4] see about fixing the smoke test. --- plugins/google/modelgauge/suts/google_genai.py | 10 +++++++++- plugins/validation_tests/test_object_creation.py | 3 ++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/plugins/google/modelgauge/suts/google_genai.py b/plugins/google/modelgauge/suts/google_genai.py index 6bf1f3614..7af91a6d5 100644 --- a/plugins/google/modelgauge/suts/google_genai.py +++ b/plugins/google/modelgauge/suts/google_genai.py @@ -2,6 +2,10 @@ This file defines google SUTs that use Google's genai python SDK. """ +import logging + +logger = logging.getLogger(__name__) + from typing import Optional from google import genai @@ -44,7 +48,11 @@ def __init__(self, uid: str, model_name: str, reasoning: bool, api_key: GoogleAi self.api_key = api_key.value def _load_client(self) -> genai.Client: - return genai.Client(api_key=self.api_key) + try: + return genai.Client(api_key=self.api_key) + except: + logger.exception(f"Failed to load genai.Client with '{self.api_key}'") + raise def translate_text_prompt(self, prompt: TextPrompt, options: SUTOptions) -> GenAiRequest: optional = {} diff --git a/plugins/validation_tests/test_object_creation.py b/plugins/validation_tests/test_object_creation.py index f3b049ae4..47a05dd6c 100644 --- a/plugins/validation_tests/test_object_creation.py +++ b/plugins/validation_tests/test_object_creation.py @@ -133,7 +133,8 @@ def suts_to_test(): # get a sense of a real user's experience. @expensive_tests @pytest.mark.timeout(TIMEOUT) -@pytest.mark.parametrize("sut_name", suts_to_test()) +# @pytest.mark.parametrize("sut_name", suts_to_test()) +@pytest.mark.parametrize("sut_name", ["google-genai-gemini-2.5-flash-preview-05-20-no-reasoning"]) def test_all_suts_can_evaluate(sut_name): sut = SUTS.make_instance(sut_name, secrets=load_secrets_from_config()) assert isinstance(sut, PromptResponseSUT), "Update this test to handle other types." From 0f78ade40bc65736f615f8f6d11d45373231c478 Mon Sep 17 00:00:00 2001 From: william Date: Mon, 2 Jun 2025 08:29:35 -0500 Subject: [PATCH 2/4] fiddling --- plugins/google/modelgauge/suts/google_genai.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/plugins/google/modelgauge/suts/google_genai.py b/plugins/google/modelgauge/suts/google_genai.py index 7af91a6d5..1715f35da 100644 --- a/plugins/google/modelgauge/suts/google_genai.py +++ b/plugins/google/modelgauge/suts/google_genai.py @@ -3,6 +3,7 @@ """ import logging +import sys logger = logging.getLogger(__name__) @@ -51,6 +52,7 @@ def _load_client(self) -> genai.Client: try: return genai.Client(api_key=self.api_key) except: + print(f"Failed to load genai.Client with '{self.api_key}'", file=sys.stderr) logger.exception(f"Failed to load genai.Client with '{self.api_key}'") raise From 3f1428ded8ad4b09c7e6083e229d39d39a09be82 Mon Sep 17 00:00:00 2001 From: william Date: Mon, 2 Jun 2025 09:07:36 -0500 Subject: [PATCH 3/4] tidy up --- plugins/google/modelgauge/suts/google_genai.py | 3 +-- plugins/validation_tests/test_object_creation.py | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/plugins/google/modelgauge/suts/google_genai.py b/plugins/google/modelgauge/suts/google_genai.py index 1715f35da..2dae4d9d5 100644 --- a/plugins/google/modelgauge/suts/google_genai.py +++ b/plugins/google/modelgauge/suts/google_genai.py @@ -52,8 +52,7 @@ def _load_client(self) -> genai.Client: try: return genai.Client(api_key=self.api_key) except: - print(f"Failed to load genai.Client with '{self.api_key}'", file=sys.stderr) - logger.exception(f"Failed to load genai.Client with '{self.api_key}'") + logger.exception(f"Failed to load genai.Client with api_key='{self.api_key}'") raise def translate_text_prompt(self, prompt: TextPrompt, options: SUTOptions) -> GenAiRequest: diff --git a/plugins/validation_tests/test_object_creation.py b/plugins/validation_tests/test_object_creation.py index 47a05dd6c..f3b049ae4 100644 --- a/plugins/validation_tests/test_object_creation.py +++ b/plugins/validation_tests/test_object_creation.py @@ -133,8 +133,7 @@ def suts_to_test(): # get a sense of a real user's experience. @expensive_tests @pytest.mark.timeout(TIMEOUT) -# @pytest.mark.parametrize("sut_name", suts_to_test()) -@pytest.mark.parametrize("sut_name", ["google-genai-gemini-2.5-flash-preview-05-20-no-reasoning"]) +@pytest.mark.parametrize("sut_name", suts_to_test()) def test_all_suts_can_evaluate(sut_name): sut = SUTS.make_instance(sut_name, secrets=load_secrets_from_config()) assert isinstance(sut, PromptResponseSUT), "Update this test to handle other types." From 1b1440522648e38ff6060785eb939a1058766ae4 Mon Sep 17 00:00:00 2001 From: william Date: Mon, 2 Jun 2025 10:46:43 -0500 Subject: [PATCH 4/4] Redact secret --- plugins/google/modelgauge/suts/google_genai.py | 5 ++--- src/modelgauge/secret_values.py | 12 ++++++++++++ tests/modelgauge_tests/test_secret_values.py | 13 +++++++++++++ 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/plugins/google/modelgauge/suts/google_genai.py b/plugins/google/modelgauge/suts/google_genai.py index 2dae4d9d5..3b361e4ab 100644 --- a/plugins/google/modelgauge/suts/google_genai.py +++ b/plugins/google/modelgauge/suts/google_genai.py @@ -3,7 +3,6 @@ """ import logging -import sys logger = logging.getLogger(__name__) @@ -22,7 +21,7 @@ from modelgauge.general import APIException from modelgauge.prompt import TextPrompt from modelgauge.retry_decorator import retry -from modelgauge.secret_values import InjectSecret +from modelgauge.secret_values import InjectSecret, loggable_secret from modelgauge.sut import REFUSAL_RESPONSE, PromptResponseSUT, SUTOptions, SUTResponse # usort: skip from modelgauge.sut_capabilities import AcceptsTextPrompt from modelgauge.sut_decorator import modelgauge_sut @@ -52,7 +51,7 @@ def _load_client(self) -> genai.Client: try: return genai.Client(api_key=self.api_key) except: - logger.exception(f"Failed to load genai.Client with api_key='{self.api_key}'") + logger.exception(f"Failed to load genai.Client with api_key='{loggable_secret(self.api_key)}'") raise def translate_text_prompt(self, prompt: TextPrompt, options: SUTOptions) -> GenAiRequest: diff --git a/src/modelgauge/secret_values.py b/src/modelgauge/secret_values.py index c21569de1..8db1bd815 100644 --- a/src/modelgauge/secret_values.py +++ b/src/modelgauge/secret_values.py @@ -7,6 +7,18 @@ from modelgauge.general import get_concrete_subclasses +def loggable_secret(secret: str) -> str: + if str is None or not isinstance(secret, str): + return secret + if len(secret) == 0: + val = "''" + elif len(secret) < 10: + val = "'…'" + else: + val = f"'{secret[0:2]}…{secret[-2:]}'" + return f"[REDACTED, len={len(secret)}, val={val}]" + + class SecretDescription(BaseModel): """How to look up a secret and how to get the value if you don't have it.""" diff --git a/tests/modelgauge_tests/test_secret_values.py b/tests/modelgauge_tests/test_secret_values.py index a872f33fc..d7da20a96 100644 --- a/tests/modelgauge_tests/test_secret_values.py +++ b/tests/modelgauge_tests/test_secret_values.py @@ -1,4 +1,5 @@ import pytest + from modelgauge.general import get_class from modelgauge.secret_values import ( InjectSecret, @@ -8,6 +9,7 @@ SecretDescription, SerializedSecret, get_all_secrets, + loggable_secret, ) @@ -109,3 +111,14 @@ def test_inject_required_missing(): injector = InjectSecret(SomeRequiredSecret) with pytest.raises(MissingSecretValues): injector.inject({"some-scope": {"different-key": "some-value"}}) + + +def test_loggable_secret_string(): + assert loggable_secret("abcdefghijklmnopqrstuvwxyz") == "[REDACTED, len=26, val='ab…yz']" + + assert loggable_secret(None) is None + assert loggable_secret(1234) is 1234 + assert loggable_secret("") == "[REDACTED, len=0, val='']" + assert loggable_secret("az") == "[REDACTED, len=2, val='…']" + assert loggable_secret("abcd1wxyz") == "[REDACTED, len=9, val='…']" + assert loggable_secret("abcdevwxyz") == "[REDACTED, len=10, val='ab…yz']"