|
24 | 24 | from google.genai import types |
25 | 25 | import json5 |
26 | 26 |
|
| 27 | +from server.lib.feature_flags import ENABLE_GEMINI_3_FLASH |
27 | 28 | from server.lib.feature_flags import is_feature_enabled |
28 | 29 | from server.lib.nl.common import counters |
29 | 30 |
|
| 31 | +_GEMINI_3_0_FLASH = 'gemini-3-flash-preview' |
30 | 32 | _GEMINI_2_5_FLASH = 'gemini-2.5-flash' |
31 | | -_API_VERSION = 'v1' |
| 33 | +_API_VERSION_3 = 'v1beta' |
| 34 | +_API_VERSION_2 = 'v1' |
32 | 35 |
|
33 | 36 | # TODO: Consider tweaking this. And maybe consider passing as url param. |
34 | 37 | _TEMPERATURE = 0.1 |
@@ -80,14 +83,18 @@ def detect_with_gemini(query: str, history: List[List[str]], |
80 | 83 | # NOTE: llm_detector.detect() caller checks this. |
81 | 84 | api_key = current_app.config['LLM_API_KEY'] |
82 | 85 |
|
| 86 | + if is_feature_enabled(ENABLE_GEMINI_3_FLASH): |
| 87 | + api_version = _API_VERSION_3 |
| 88 | + else: |
| 89 | + api_version = _API_VERSION_2 |
83 | 90 | gemini_client = genai.Client( |
84 | 91 | api_key=api_key, |
85 | | - http_options=genai.types.HttpOptions(api_version=_API_VERSION)) |
| 92 | + http_options=genai.types.HttpOptions(api_version=api_version)) |
86 | 93 | model_name = detect_model_name() |
87 | 94 | logging.info(f'Gemini model used for LLM API: {model_name}') |
88 | 95 | ctr.info( |
89 | 96 | 'gemini_model', |
90 | | - f'{_API_VERSION}/{model_name}', |
| 97 | + f'{api_version}/{model_name}', |
91 | 98 | ) |
92 | 99 | gemini_response = gemini_client.models.generate_content(model=model_name, |
93 | 100 | contents=text, |
@@ -186,4 +193,6 @@ def _extract_answer(resp: str) -> str: |
186 | 193 |
|
187 | 194 |
|
188 | 195 | def detect_model_name() -> str: |
| 196 | + if is_feature_enabled(ENABLE_GEMINI_3_FLASH): |
| 197 | + return _GEMINI_3_0_FLASH |
189 | 198 | return _GEMINI_2_5_FLASH |
0 commit comments