Update Gemini From 2.5 Flash to 3.0 Flash preview for website (#6145)

shixiao-coder · web-flow · commit ec62ed101a7c · 2026-04-02T12:57:29.000-04:00
This PR updates the Gemini model used as 2.5 Flash will be deprecated on June 17th. We will update it to 3.0 Flash and run related NL eval test to validate the upgrade of model NL Eval result increases a slightly where a few samples are able to discover SVs compared to not able to find SVs before. No quality decrease in observed: https://docs.google.com/spreadsheets/d/1vQPAQ423MEKHpr3LCoMa_CIs7QTKEeeDjadZ3PagUHk/edit?gid=37321998#gid=37321998
diff --git a/server/__init__.py b/server/__init__.py
@@ -33,6 +33,7 @@
 from server.lib.disaster_dashboard import get_disaster_dashboard_data
 from server.lib.feature_flags import BIOMED_NL_FEATURE_FLAG
 from server.lib.feature_flags import DATA_OVERVIEW_FEATURE_FLAG
+from server.lib.feature_flags import ENABLE_GEMINI_3_FLASH
 from server.lib.feature_flags import ENABLE_NL_AGENT_DETECTOR
 from server.lib.feature_flags import is_feature_enabled
 import server.lib.i18n as i18n
@@ -432,8 +433,12 @@ def create_app(nl_root=DEFAULT_NL_ROOT):
                                                'palm-api-key')
       if is_feature_enabled(ENABLE_NL_AGENT_DETECTOR, app):
         os.environ['GEMINI_API_KEY'] = app.config['LLM_API_KEY']
+        if is_feature_enabled(ENABLE_GEMINI_3_FLASH, app):
+          default_model = "gemini-3-flash-preview"
+        else:
+          default_model = "gemini-2.5-flash"
         app.config['NL_DETECTION_AGENT'] = create_detection_agent(
-            os.environ.get("AGENT_MODEL", "gemini-2.5-flash"),
+            os.environ.get("AGENT_MODEL", default_model),
             os.environ.get("DC_MCP_URL"))
 
     app.config[
diff --git a/server/lib/feature_flags.py b/server/lib/feature_flags.py
@@ -34,6 +34,7 @@
 ENABLE_STAT_VAR_AUTOCOMPLETE = 'enable_stat_var_autocomplete'
 ENABLE_NL_AGENT_DETECTOR = 'enable_nl_agent_detector'
 NEW_RANKING_PAGE = 'new_ranking_page'
+ENABLE_GEMINI_3_FLASH = 'enable_gemini_3_flash'
 USE_V2_API = 'use_v2_api'
 
 
diff --git a/server/lib/nl/detection/llm_api.py b/server/lib/nl/detection/llm_api.py
@@ -24,11 +24,14 @@
 from google.genai import types
 import json5
 
+from server.lib.feature_flags import ENABLE_GEMINI_3_FLASH
 from server.lib.feature_flags import is_feature_enabled
 from server.lib.nl.common import counters
 
+_GEMINI_3_0_FLASH = 'gemini-3-flash-preview'
 _GEMINI_2_5_FLASH = 'gemini-2.5-flash'
-_API_VERSION = 'v1'
+_API_VERSION_3 = 'v1beta'
+_API_VERSION_2 = 'v1'
 
 # TODO: Consider tweaking this. And maybe consider passing as url param.
 _TEMPERATURE = 0.1
@@ -80,14 +83,18 @@ def detect_with_gemini(query: str, history: List[List[str]],
   # NOTE: llm_detector.detect() caller checks this.
   api_key = current_app.config['LLM_API_KEY']
 
+  if is_feature_enabled(ENABLE_GEMINI_3_FLASH):
+    api_version = _API_VERSION_3
+  else:
+    api_version = _API_VERSION_2
   gemini_client = genai.Client(
       api_key=api_key,
-      http_options=genai.types.HttpOptions(api_version=_API_VERSION))
+      http_options=genai.types.HttpOptions(api_version=api_version))
   model_name = detect_model_name()
   logging.info(f'Gemini model used for LLM API: {model_name}')
   ctr.info(
       'gemini_model',
-      f'{_API_VERSION}/{model_name}',
+      f'{api_version}/{model_name}',
   )
   gemini_response = gemini_client.models.generate_content(model=model_name,
                                                           contents=text,
@@ -186,4 +193,6 @@ def _extract_answer(resp: str) -> str:
 
 
 def detect_model_name() -> str:
+  if is_feature_enabled(ENABLE_GEMINI_3_FLASH):
+    return _GEMINI_3_0_FLASH
   return _GEMINI_2_5_FLASH
diff --git a/server/lib/nl/explore/overview.py b/server/lib/nl/explore/overview.py
@@ -20,6 +20,8 @@
 from pydantic import ConfigDict
 from pydantic.alias_generators import to_camel
 
+from server.lib.feature_flags import ENABLE_GEMINI_3_FLASH
+from server.lib.feature_flags import is_feature_enabled
 from server.lib.nl.explore.gemini_prompts import PAGE_OVERVIEW_PROMPT
 from server.lib.utils.gemini_utils import call_gemini
 
@@ -51,7 +53,8 @@ class PageOverview(BaseModel):
 
 _OVERVIEW_GEMINI_CALL_RETRIES = 3
 
-_OVERVIEW_GEMINI_MODEL = "gemini-2.5-flash-lite"
+_OVERVIEW_GEMINI_3_1_LITE = "gemini-3.1-flash-lite-preview"
+_OVERVIEW_GEMINI_2_5_LITE = "gemini-2.5-flash-lite"
 
 
 def generate_page_overview(
@@ -78,10 +81,15 @@ def generate_page_overview(
   formatted_page_overview_prompt = PAGE_OVERVIEW_PROMPT.format(
       initial_query=query, stat_var_titles=stat_var_titles)
 
+  if is_feature_enabled(ENABLE_GEMINI_3_FLASH):
+    overview_gemini_model = _OVERVIEW_GEMINI_3_1_LITE
+  else:
+    overview_gemini_model = _OVERVIEW_GEMINI_2_5_LITE
+
   page_overview = call_gemini(api_key=gemini_api_key,
                               formatted_prompt=formatted_page_overview_prompt,
                               schema=PageOverview,
-                              gemini_model=_OVERVIEW_GEMINI_MODEL)
+                              gemini_model=overview_gemini_model)
   if not page_overview:
     return None, None
 
diff --git a/server/lib/nl/explore/related.py b/server/lib/nl/explore/related.py
@@ -21,6 +21,8 @@
 from flask import current_app
 from pydantic import BaseModel
 
+from server.lib.feature_flags import ENABLE_GEMINI_3_FLASH
+from server.lib.feature_flags import is_feature_enabled
 import server.lib.nl.common.topic as topic
 import server.lib.nl.common.utils as utils
 import server.lib.nl.detection.types as dtypes
@@ -55,7 +57,9 @@ class FollowUpQuestions(BaseModel):
 
 
 _QUESTIONS_GEMINI_CALL_RETRIES = 3
-_QUESTIONS_GEMINI_MODEL = "gemini-2.5-flash"
+
+_QUESTIONS_GEMINI_3 = "gemini-3-flash-preview"
+_QUESTIONS_GEMINI_2 = "gemini-2.5-flash"
 
 
 def compute_related_things(
@@ -312,11 +316,16 @@ def generate_follow_up_questions(query: str,
   formatted_follow_up_questions_prompt = FOLLOW_UP_QUESTIONS_PROMPT.format(
       initial_query=query, related_topics=related_topics)
 
+  if is_feature_enabled(ENABLE_GEMINI_3_FLASH):
+    gemini_model = _QUESTIONS_GEMINI_3
+  else:
+    gemini_model = _QUESTIONS_GEMINI_2
+
   follow_up_questions = call_gemini(
       api_key=gemini_api_key,
       formatted_prompt=formatted_follow_up_questions_prompt,
       schema=FollowUpQuestions,
-      gemini_model=_QUESTIONS_GEMINI_MODEL)
+      gemini_model=gemini_model)
   if not follow_up_questions:
     return []
 
diff --git a/server/lib/utils/gemini_utils.py b/server/lib/utils/gemini_utils.py
@@ -22,8 +22,8 @@
 def call_gemini(
     api_key: str,
     formatted_prompt: str,
-    schema: Optional[BaseModel] = None,
-    gemini_model: str = "gemini-2.5-flash") -> Optional[Union[BaseModel, str]]:
+    gemini_model: str,
+    schema: Optional[BaseModel] = None) -> Optional[Union[BaseModel, str]]:
   """A helper for all Gemini generations through the Python Gen AI client.
     Args:
         api_key: A string representing the API key required for authentication with the Gemini service.
diff --git a/tools/nl/nl_metadata/config.py b/tools/nl/nl_metadata/config.py
@@ -32,7 +32,7 @@
 BIGQUERY_QUERY_BASE = "SELECT * FROM `datcom-store.dc_kg_latest.StatisticalVariable` WHERE name IS NOT NULL AND prov_id != \"dc/base/ExperimentalStatVars\""
 
 # --- Gemini API Config ---
-GEMINI_MODEL = "gemini-2.5-flash"
+GEMINI_MODEL = "gemini-3-flash-preview"
 GEMINI_TEMPERATURE = 1
 GEMINI_TOP_P = 1
 GEMINI_SEED = 0