Skip to content

Commit 052e97a

Browse files
authored
fix(eval): bump Gemini model from gemini-2.5-pro to gemini-3.1-pro-preview (#276)
gemini-2.5-pro is only on v1 API; deepeval uses v1beta causing 404. Switch to gemini-3.1-pro-preview and use GoogleGeminiLangChain wrapper. Signed-off-by: Jack Luar <jluar@precisioninno.com>
1 parent 5202098 commit 052e97a

3 files changed

Lines changed: 5 additions & 8 deletions

File tree

evaluation/auto_evaluation/eval_main.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
from dotenv import load_dotenv
1212
from deepeval.test_case import LLMTestCase
1313
from deepeval import evaluate
14-
from deepeval.models import GeminiModel
1514

15+
from auto_evaluation.src.models.gemini import GoogleGeminiLangChain
1616
from auto_evaluation.src.metrics.retrieval import (
1717
make_contextual_precision_metric,
1818
make_contextual_recall_metric,
@@ -41,10 +41,7 @@ def __init__(self, base_url: str, dataset: str, reranker_base_url: str = ""):
4141
self.dataset = dataset
4242
self.reranker_base_url = reranker_base_url
4343
self.qns = preprocess.read_data(self.dataset)
44-
self.eval_model = GeminiModel(
45-
model_name="gemini-2.5-pro",
46-
api_key=os.getenv("GOOGLE_API_KEY"),
47-
)
44+
self.eval_model = GoogleGeminiLangChain(model_name="gemini-3.1-pro-preview")
4845
self.log_dir = "logs"
4946
os.makedirs(self.log_dir, exist_ok=True)
5047
self.sanity_check()

evaluation/auto_evaluation/src/models/gemini.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,15 +68,15 @@ def get_model_name(self):
6868

6969

7070
def main():
71-
model = GoogleGeminiLangChain(model_name="gemini-2.5-pro")
71+
model = GoogleGeminiLangChain(model_name="gemini-3.1-pro-preview")
7272
prompt = "Write me a joke"
7373
print(f"Prompt: {prompt}")
7474
response = model.generate(prompt, schema=Response)
7575
print(f"Response: {response}")
7676

7777

7878
async def main_async():
79-
model = GoogleGeminiLangChain(model_name="gemini-2.5-pro")
79+
model = GoogleGeminiLangChain(model_name="gemini-3.1-pro-preview")
8080
prompt = "Write me a joke"
8181
print(f"Prompt: {prompt}")
8282
response = await model.a_generate(prompt, schema=Response)

evaluation/script_based_evaluation/models/gemini_model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def base_gemini_pro(query: str) -> tuple[str, float]:
5858
try:
5959
start_time = time.time()
6060
response = _client.models.generate_content(
61-
model="gemini-2.5-pro",
61+
model="gemini-3.1-pro-preview",
6262
contents=" " + query,
6363
config=types.GenerateContentConfig(
6464
safety_settings=_safety_config,

0 commit comments

Comments
 (0)