fix: filter Gemini thinking parts from user-facing message chain (#7196)

he-yufeng · web-flow · commit 8f95ca9d9891 · 2026-04-03T16:35:25.000+08:00
Gemini 3 models return thinking parts (part.thought=True) alongside the actual response text. _process_content_parts was including these thinking parts in the message chain sent to the user, effectively leaking internal reasoning into the output. On platforms that split long messages (e.g. aiocqhttp with realtime segmenting), this caused duplicate or triple replies since the thinking text often mirrors the actual response. The streaming path already handled this correctly via chunk.text which skips thinking parts, but the non-streaming path and the final-chunk processing in streaming both went through _process_content_parts. Also switch the Gemini 3 model name matching from an exhaustive list to prefix matching (gemini-3- / gemini-3.) so new variants like gemini-3.1 get proper thinkingLevel config without code changes. Fixes #7183
diff --git a/astrbot/core/provider/sources/gemini_source.py b/astrbot/core/provider/sources/gemini_source.py
@@ -241,15 +241,10 @@ async def _prepare_query_config(
                 thinking_config = types.ThinkingConfig(
                     thinking_budget=thinking_budget,
                 )
-        elif model_name in [
-            "gemini-3-pro",
-            "gemini-3-pro-preview",
-            "gemini-3-flash",
-            "gemini-3-flash-preview",
-            "gemini-3-flash-lite",
-            "gemini-3-flash-lite-preview",
-        ]:
-            # The thinkingLevel parameter, recommended for Gemini 3 models and onwards
+        elif any(model_name.startswith(p) for p in ("gemini-3-", "gemini-3.")):
+            # The thinkingLevel parameter, recommended for Gemini 3 models and onwards.
+            # Use prefix match so new variants (3.1, 3-flash-lite-preview, etc.) are
+            # covered without needing to keep an exhaustive list up to date.
             # Gemini 2.5 series models don't support thinkingLevel; use thinkingBudget instead.
             thinking_level = self.provider_config.get("gm_thinking_config", {}).get(
                 "level", "HIGH"
@@ -517,7 +512,11 @@ def _process_content_parts(
         ):
             chain.append(Comp.Plain("这是图片"))
         for part in result_parts:
-            if part.text:
+            # Skip thinking parts — their text is already captured via
+            # _extract_reasoning_content above.  Including them here would
+            # leak the model's internal reasoning into the user-facing message,
+            # which also causes duplicate/triple replies on some platforms.
+            if part.text and not part.thought:
                 chain.append(Comp.Plain(part.text))
 
             if (