Skip to content

Commit 7fe01f3

Browse files
committed
Handle some weird Google responses.
1 parent 3bc3cdb commit 7fe01f3

3 files changed

Lines changed: 39 additions & 12 deletions

File tree

plugins/google/modelgauge/suts/google_genai.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,13 @@
1919
from modelgauge.retry_decorator import retry
2020
from modelgauge.secret_values import InjectSecret
2121
from modelgauge.sut import REFUSAL_RESPONSE, PromptResponseSUT, SUTOptions, SUTResponse # usort: skip
22+
from modelgauge.sut_capabilities import AcceptsTextPrompt
23+
from modelgauge.sut_decorator import modelgauge_sut
24+
from modelgauge.sut_registry import SUTS
2225
from modelgauge.suts.google_generativeai import (
2326
GOOGLE_REFUSAL_FINISH_REASONS,
2427
GoogleAiApiKey,
2528
) # Both SDKs use the same API key.
26-
from modelgauge.sut_capabilities import AcceptsTextPrompt
27-
from modelgauge.sut_decorator import modelgauge_sut
28-
from modelgauge.sut_registry import SUTS
2929

3030

3131
class GenAiRequest(BaseModel):
@@ -76,16 +76,13 @@ def evaluate(self, request: GenAiRequest) -> GenerateContentResponse:
7676
return self.client.models.generate_content(**request.model_dump(exclude_none=True))
7777

7878
def translate_response(self, request: GenAiRequest, response: GenerateContentResponse) -> SUTResponse:
79-
assert (
80-
len(response.candidates) <= 1
81-
), f"Expected a single candidate in the response, got {len(response.candidates)}."
82-
if len(response.candidates) == 0:
79+
if response.candidates is None or len(response.candidates) == 0:
8380
# This is apparently a refusal. At least, it's what happens consistently with a set of
8481
# prompts in the CSE, SRC, and SXC hazards
8582
response_text = REFUSAL_RESPONSE
86-
else:
83+
elif len(response.candidates) == 1:
8784
candidate = response.candidates[0]
88-
if candidate.finish_reason in GOOGLE_REFUSAL_FINISH_REASONS:
85+
if candidate.finish_reason in GOOGLE_REFUSAL_FINISH_REASONS + ["OTHER"]:
8986
response_text = REFUSAL_RESPONSE
9087
elif candidate.content is not None:
9188
response_text = candidate.content.parts[0].text
@@ -95,7 +92,8 @@ def translate_response(self, request: GenAiRequest, response: GenerateContentRes
9592
f"The candidate does not have any content,"
9693
f" but it's finish reason {candidate.finish_reason} does not qualify as a refusal."
9794
)
98-
95+
else:
96+
raise AssertionError(f"Expected a single candidate in the response, got {response.candidates}.")
9997
return SUTResponse(text=response_text)
10098

10199

plugins/google/modelgauge/suts/google_generativeai.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
TooManyRequests,
1616
)
1717
from google.generativeai.types import HarmBlockThreshold, HarmCategory # type: ignore
18+
from pydantic import BaseModel
1819

1920
from modelgauge.general import APIException
2021
from modelgauge.prompt import TextPrompt
@@ -24,8 +25,8 @@
2425
from modelgauge.sut_capabilities import AcceptsTextPrompt
2526
from modelgauge.sut_decorator import modelgauge_sut
2627
from modelgauge.sut_registry import SUTS
27-
from pydantic import BaseModel
2828

29+
# TODO: Do we really need this type alias or can we just import the real thing?
2930
FinishReason = genai.protos.Candidate.FinishReason
3031
GEMINI_HARM_CATEGORIES = [
3132
HarmCategory.HARM_CATEGORY_HATE_SPEECH,

plugins/google/tests/test_google_genai.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from unittest.mock import patch
33

44
import pytest
5-
from google.genai.types import Candidate, GenerateContentConfig, GenerateContentResponse, ThinkingConfig
5+
from google.genai.types import GenerateContentConfig, GenerateContentResponse, ThinkingConfig, FinishReason
66

77
from modelgauge.prompt import TextPrompt
88
from modelgauge.sut import REFUSAL_RESPONSE, SUTOptions, SUTResponse
@@ -156,6 +156,14 @@ def test_google_genai_translate_response(google_default_sut, fake_raw_response,
156156
assert response == SUTResponse(text="some response")
157157

158158

159+
def test_google_genai_translate_response_finish_reason_other(google_default_sut, fake_raw_response, some_request):
160+
"""I think this is for a typing error but we're in a rush so I'm not fixing it"""
161+
fake_raw_response.candidates[0].finish_reason = FinishReason.OTHER
162+
response = google_default_sut.translate_response(some_request, fake_raw_response)
163+
164+
assert response == SUTResponse(text="") # indicates refusal
165+
166+
159167
def test_google_genai_translate_response_no_completions(google_default_sut, some_request):
160168
no_completions = GenerateContentResponse(
161169
**json.loads(
@@ -174,3 +182,23 @@ def test_google_genai_translate_response_no_completions(google_default_sut, some
174182
response = google_default_sut.translate_response(some_request, no_completions)
175183

176184
assert response == SUTResponse(text=REFUSAL_RESPONSE)
185+
186+
187+
def test_google_genai_translate_response_none_completions(google_default_sut, some_request):
188+
no_completions = GenerateContentResponse(
189+
**json.loads(
190+
"""{
191+
"candidates": null,
192+
"usage_metadata": {
193+
"prompt_token_count": 19,
194+
"total_token_count": 19,
195+
"cached_content_token_count": 0,
196+
"candidates_token_count": 0
197+
}
198+
}
199+
"""
200+
)
201+
)
202+
response = google_default_sut.translate_response(some_request, no_completions)
203+
204+
assert response == SUTResponse(text=REFUSAL_RESPONSE)

0 commit comments

Comments
 (0)