feat: add thinkingLevel and thinkingBudget config for Gemini models

brendan-kellam · claude · brendan-kellam · commit ffdbbed672bd · 2026-04-13T15:45:44.000-07:00
Add per-model thinking configuration for Google Generative AI and Google
Vertex providers. `thinkingLevel` controls reasoning depth for Gemini 3
models, and `thinkingBudget` sets the thinking token budget for Gemini
2.5 models. Deprecates the GOOGLE_VERTEX_THINKING_BUDGET_TOKENS env var
in favor of the new per-model config (with fallback).

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/docs/snippets/schemas/v3/index.schema.mdx b/docs/snippets/schemas/v3/index.schema.mdx
@@ -2300,6 +2300,20 @@
                 "pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$",
                 "description": "Optional base URL."
               },
+              "thinkingLevel": {
+                "type": "string",
+                "description": "Optional thinking level for Gemini 3 models. Controls the depth of reasoning the model performs. See https://ai.google.dev/gemini-api/docs/thinking#thinking-levels",
+                "enum": [
+                  "minimal",
+                  "low",
+                  "medium",
+                  "high"
+                ]
+              },
+              "thinkingBudget": {
+                "type": "integer",
+                "description": "Optional thinking budget for Gemini 2.5 models. Sets the number of thinking tokens the model can use when generating a response. Set to -1 for dynamic thinking. See https://ai.google.dev/gemini-api/docs/thinking#set-budget"
+              },
               "temperature": {
                 "type": "number",
                 "description": "Optional temperature setting to use with the model."
@@ -2548,6 +2562,20 @@
                 "pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$",
                 "description": "Optional base URL."
               },
+              "thinkingLevel": {
+                "type": "string",
+                "description": "Optional thinking level for Gemini 3 models. Controls the depth of reasoning the model performs. See https://ai.google.dev/gemini-api/docs/thinking#thinking-levels",
+                "enum": [
+                  "minimal",
+                  "low",
+                  "medium",
+                  "high"
+                ]
+              },
+              "thinkingBudget": {
+                "type": "integer",
+                "description": "Optional thinking budget for Gemini 2.5 models. Sets the number of thinking tokens the model can use when generating a response. Set to -1 for dynamic thinking. See https://ai.google.dev/gemini-api/docs/thinking#set-budget"
+              },
               "temperature": {
                 "type": "number",
                 "description": "Optional temperature setting to use with the model."
@@ -3838,6 +3866,20 @@
                 "pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$",
                 "description": "Optional base URL."
               },
+              "thinkingLevel": {
+                "type": "string",
+                "description": "Optional thinking level for Gemini 3 models. Controls the depth of reasoning the model performs. See https://ai.google.dev/gemini-api/docs/thinking#thinking-levels",
+                "enum": [
+                  "minimal",
+                  "low",
+                  "medium",
+                  "high"
+                ]
+              },
+              "thinkingBudget": {
+                "type": "integer",
+                "description": "Optional thinking budget for Gemini 2.5 models. Sets the number of thinking tokens the model can use when generating a response. Set to -1 for dynamic thinking. See https://ai.google.dev/gemini-api/docs/thinking#set-budget"
+              },
               "temperature": {
                 "type": "number",
                 "description": "Optional temperature setting to use with the model."
@@ -4086,6 +4128,20 @@
                 "pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$",
                 "description": "Optional base URL."
               },
+              "thinkingLevel": {
+                "type": "string",
+                "description": "Optional thinking level for Gemini 3 models. Controls the depth of reasoning the model performs. See https://ai.google.dev/gemini-api/docs/thinking#thinking-levels",
+                "enum": [
+                  "minimal",
+                  "low",
+                  "medium",
+                  "high"
+                ]
+              },
+              "thinkingBudget": {
+                "type": "integer",
+                "description": "Optional thinking budget for Gemini 2.5 models. Sets the number of thinking tokens the model can use when generating a response. Set to -1 for dynamic thinking. See https://ai.google.dev/gemini-api/docs/thinking#set-budget"
+              },
               "temperature": {
                 "type": "number",
                 "description": "Optional temperature setting to use with the model."
diff --git a/docs/snippets/schemas/v3/languageModel.schema.mdx b/docs/snippets/schemas/v3/languageModel.schema.mdx
@@ -614,6 +614,20 @@
           "pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$",
           "description": "Optional base URL."
         },
+        "thinkingLevel": {
+          "type": "string",
+          "description": "Optional thinking level for Gemini 3 models. Controls the depth of reasoning the model performs. See https://ai.google.dev/gemini-api/docs/thinking#thinking-levels",
+          "enum": [
+            "minimal",
+            "low",
+            "medium",
+            "high"
+          ]
+        },
+        "thinkingBudget": {
+          "type": "integer",
+          "description": "Optional thinking budget for Gemini 2.5 models. Sets the number of thinking tokens the model can use when generating a response. Set to -1 for dynamic thinking. See https://ai.google.dev/gemini-api/docs/thinking#set-budget"
+        },
         "temperature": {
           "type": "number",
           "description": "Optional temperature setting to use with the model."
@@ -862,6 +876,20 @@
           "pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$",
           "description": "Optional base URL."
         },
+        "thinkingLevel": {
+          "type": "string",
+          "description": "Optional thinking level for Gemini 3 models. Controls the depth of reasoning the model performs. See https://ai.google.dev/gemini-api/docs/thinking#thinking-levels",
+          "enum": [
+            "minimal",
+            "low",
+            "medium",
+            "high"
+          ]
+        },
+        "thinkingBudget": {
+          "type": "integer",
+          "description": "Optional thinking budget for Gemini 2.5 models. Sets the number of thinking tokens the model can use when generating a response. Set to -1 for dynamic thinking. See https://ai.google.dev/gemini-api/docs/thinking#set-budget"
+        },
         "temperature": {
           "type": "number",
           "description": "Optional temperature setting to use with the model."
@@ -2152,6 +2180,20 @@
           "pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$",
           "description": "Optional base URL."
         },
+        "thinkingLevel": {
+          "type": "string",
+          "description": "Optional thinking level for Gemini 3 models. Controls the depth of reasoning the model performs. See https://ai.google.dev/gemini-api/docs/thinking#thinking-levels",
+          "enum": [
+            "minimal",
+            "low",
+            "medium",
+            "high"
+          ]
+        },
+        "thinkingBudget": {
+          "type": "integer",
+          "description": "Optional thinking budget for Gemini 2.5 models. Sets the number of thinking tokens the model can use when generating a response. Set to -1 for dynamic thinking. See https://ai.google.dev/gemini-api/docs/thinking#set-budget"
+        },
         "temperature": {
           "type": "number",
           "description": "Optional temperature setting to use with the model."
@@ -2400,6 +2442,20 @@
           "pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$",
           "description": "Optional base URL."
         },
+        "thinkingLevel": {
+          "type": "string",
+          "description": "Optional thinking level for Gemini 3 models. Controls the depth of reasoning the model performs. See https://ai.google.dev/gemini-api/docs/thinking#thinking-levels",
+          "enum": [
+            "minimal",
+            "low",
+            "medium",
+            "high"
+          ]
+        },
+        "thinkingBudget": {
+          "type": "integer",
+          "description": "Optional thinking budget for Gemini 2.5 models. Sets the number of thinking tokens the model can use when generating a response. Set to -1 for dynamic thinking. See https://ai.google.dev/gemini-api/docs/thinking#set-budget"
+        },
         "temperature": {
           "type": "number",
           "description": "Optional temperature setting to use with the model."
diff --git a/packages/schemas/src/v3/index.schema.ts b/packages/schemas/src/v3/index.schema.ts
@@ -2299,6 +2299,20 @@ const schema = {
                 "pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$",
                 "description": "Optional base URL."
               },
+              "thinkingLevel": {
+                "type": "string",
+                "description": "Optional thinking level for Gemini 3 models. Controls the depth of reasoning the model performs. See https://ai.google.dev/gemini-api/docs/thinking#thinking-levels",
+                "enum": [
+                  "minimal",
+                  "low",
+                  "medium",
+                  "high"
+                ]
+              },
+              "thinkingBudget": {
+                "type": "integer",
+                "description": "Optional thinking budget for Gemini 2.5 models. Sets the number of thinking tokens the model can use when generating a response. Set to -1 for dynamic thinking. See https://ai.google.dev/gemini-api/docs/thinking#set-budget"
+              },
               "temperature": {
                 "type": "number",
                 "description": "Optional temperature setting to use with the model."
@@ -2547,6 +2561,20 @@ const schema = {
                 "pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$",
                 "description": "Optional base URL."
               },
+              "thinkingLevel": {
+                "type": "string",
+                "description": "Optional thinking level for Gemini 3 models. Controls the depth of reasoning the model performs. See https://ai.google.dev/gemini-api/docs/thinking#thinking-levels",
+                "enum": [
+                  "minimal",
+                  "low",
+                  "medium",
+                  "high"
+                ]
+              },
+              "thinkingBudget": {
+                "type": "integer",
+                "description": "Optional thinking budget for Gemini 2.5 models. Sets the number of thinking tokens the model can use when generating a response. Set to -1 for dynamic thinking. See https://ai.google.dev/gemini-api/docs/thinking#set-budget"
+              },
               "temperature": {
                 "type": "number",
                 "description": "Optional temperature setting to use with the model."
@@ -3837,6 +3865,20 @@ const schema = {
                 "pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$",
                 "description": "Optional base URL."
               },
+              "thinkingLevel": {
+                "type": "string",
+                "description": "Optional thinking level for Gemini 3 models. Controls the depth of reasoning the model performs. See https://ai.google.dev/gemini-api/docs/thinking#thinking-levels",
+                "enum": [
+                  "minimal",
+                  "low",
+                  "medium",
+                  "high"
+                ]
+              },
+              "thinkingBudget": {
+                "type": "integer",
+                "description": "Optional thinking budget for Gemini 2.5 models. Sets the number of thinking tokens the model can use when generating a response. Set to -1 for dynamic thinking. See https://ai.google.dev/gemini-api/docs/thinking#set-budget"
+              },
               "temperature": {
                 "type": "number",
                 "description": "Optional temperature setting to use with the model."
@@ -4085,6 +4127,20 @@ const schema = {
                 "pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$",
                 "description": "Optional base URL."
               },
+              "thinkingLevel": {
+                "type": "string",
+                "description": "Optional thinking level for Gemini 3 models. Controls the depth of reasoning the model performs. See https://ai.google.dev/gemini-api/docs/thinking#thinking-levels",
+                "enum": [
+                  "minimal",
+                  "low",
+                  "medium",
+                  "high"
+                ]
+              },
+              "thinkingBudget": {
+                "type": "integer",
+                "description": "Optional thinking budget for Gemini 2.5 models. Sets the number of thinking tokens the model can use when generating a response. Set to -1 for dynamic thinking. See https://ai.google.dev/gemini-api/docs/thinking#set-budget"
+              },
               "temperature": {
                 "type": "number",
                 "description": "Optional temperature setting to use with the model."
diff --git a/packages/schemas/src/v3/index.type.ts b/packages/schemas/src/v3/index.type.ts
@@ -962,6 +962,14 @@ export interface GoogleGenerativeAILanguageModel {
    * Optional base URL.
    */
   baseUrl?: string;
+  /**
+   * Optional thinking level for Gemini 3 models. Controls the depth of reasoning the model performs. See https://ai.google.dev/gemini-api/docs/thinking#thinking-levels
+   */
+  thinkingLevel?: "minimal" | "low" | "medium" | "high";
+  /**
+   * Optional thinking budget for Gemini 2.5 models. Sets the number of thinking tokens the model can use when generating a response. Set to -1 for dynamic thinking. See https://ai.google.dev/gemini-api/docs/thinking#set-budget
+   */
+  thinkingBudget?: number;
   /**
    * Optional temperature setting to use with the model.
    */
@@ -1056,6 +1064,14 @@ export interface GoogleVertexLanguageModel {
    * Optional base URL.
    */
   baseUrl?: string;
+  /**
+   * Optional thinking level for Gemini 3 models. Controls the depth of reasoning the model performs. See https://ai.google.dev/gemini-api/docs/thinking#thinking-levels
+   */
+  thinkingLevel?: "minimal" | "low" | "medium" | "high";
+  /**
+   * Optional thinking budget for Gemini 2.5 models. Sets the number of thinking tokens the model can use when generating a response. Set to -1 for dynamic thinking. See https://ai.google.dev/gemini-api/docs/thinking#set-budget
+   */
+  thinkingBudget?: number;
   /**
    * Optional temperature setting to use with the model.
    */
diff --git a/packages/schemas/src/v3/languageModel.schema.ts b/packages/schemas/src/v3/languageModel.schema.ts
@@ -613,6 +613,20 @@ const schema = {
           "pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$",
           "description": "Optional base URL."
         },
+        "thinkingLevel": {
+          "type": "string",
+          "description": "Optional thinking level for Gemini 3 models. Controls the depth of reasoning the model performs. See https://ai.google.dev/gemini-api/docs/thinking#thinking-levels",
+          "enum": [
+            "minimal",
+            "low",
+            "medium",
+            "high"
+          ]
+        },
+        "thinkingBudget": {
+          "type": "integer",
+          "description": "Optional thinking budget for Gemini 2.5 models. Sets the number of thinking tokens the model can use when generating a response. Set to -1 for dynamic thinking. See https://ai.google.dev/gemini-api/docs/thinking#set-budget"
+        },
         "temperature": {
           "type": "number",
           "description": "Optional temperature setting to use with the model."
@@ -861,6 +875,20 @@ const schema = {
           "pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$",
           "description": "Optional base URL."
         },
+        "thinkingLevel": {
+          "type": "string",
+          "description": "Optional thinking level for Gemini 3 models. Controls the depth of reasoning the model performs. See https://ai.google.dev/gemini-api/docs/thinking#thinking-levels",
+          "enum": [
+            "minimal",
+            "low",
+            "medium",
+            "high"
+          ]
+        },
+        "thinkingBudget": {
+          "type": "integer",
+          "description": "Optional thinking budget for Gemini 2.5 models. Sets the number of thinking tokens the model can use when generating a response. Set to -1 for dynamic thinking. See https://ai.google.dev/gemini-api/docs/thinking#set-budget"
+        },
         "temperature": {
           "type": "number",
           "description": "Optional temperature setting to use with the model."
@@ -2151,6 +2179,20 @@ const schema = {
           "pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$",
           "description": "Optional base URL."
         },
+        "thinkingLevel": {
+          "type": "string",
+          "description": "Optional thinking level for Gemini 3 models. Controls the depth of reasoning the model performs. See https://ai.google.dev/gemini-api/docs/thinking#thinking-levels",
+          "enum": [
+            "minimal",
+            "low",
+            "medium",
+            "high"
+          ]
+        },
+        "thinkingBudget": {
+          "type": "integer",
+          "description": "Optional thinking budget for Gemini 2.5 models. Sets the number of thinking tokens the model can use when generating a response. Set to -1 for dynamic thinking. See https://ai.google.dev/gemini-api/docs/thinking#set-budget"
+        },
         "temperature": {
           "type": "number",
           "description": "Optional temperature setting to use with the model."
@@ -2399,6 +2441,20 @@ const schema = {
           "pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$",
           "description": "Optional base URL."
         },
+        "thinkingLevel": {
+          "type": "string",
+          "description": "Optional thinking level for Gemini 3 models. Controls the depth of reasoning the model performs. See https://ai.google.dev/gemini-api/docs/thinking#thinking-levels",
+          "enum": [
+            "minimal",
+            "low",
+            "medium",
+            "high"
+          ]
+        },
+        "thinkingBudget": {
+          "type": "integer",
+          "description": "Optional thinking budget for Gemini 2.5 models. Sets the number of thinking tokens the model can use when generating a response. Set to -1 for dynamic thinking. See https://ai.google.dev/gemini-api/docs/thinking#set-budget"
+        },
         "temperature": {
           "type": "number",
           "description": "Optional temperature setting to use with the model."
diff --git a/packages/schemas/src/v3/languageModel.type.ts b/packages/schemas/src/v3/languageModel.type.ts
@@ -296,6 +296,14 @@ export interface GoogleGenerativeAILanguageModel {
    * Optional base URL.
    */
   baseUrl?: string;
+  /**
+   * Optional thinking level for Gemini 3 models. Controls the depth of reasoning the model performs. See https://ai.google.dev/gemini-api/docs/thinking#thinking-levels
+   */
+  thinkingLevel?: "minimal" | "low" | "medium" | "high";
+  /**
+   * Optional thinking budget for Gemini 2.5 models. Sets the number of thinking tokens the model can use when generating a response. Set to -1 for dynamic thinking. See https://ai.google.dev/gemini-api/docs/thinking#set-budget
+   */
+  thinkingBudget?: number;
   /**
    * Optional temperature setting to use with the model.
    */
@@ -390,6 +398,14 @@ export interface GoogleVertexLanguageModel {
    * Optional base URL.
    */
   baseUrl?: string;
+  /**
+   * Optional thinking level for Gemini 3 models. Controls the depth of reasoning the model performs. See https://ai.google.dev/gemini-api/docs/thinking#thinking-levels
+   */
+  thinkingLevel?: "minimal" | "low" | "medium" | "high";
+  /**
+   * Optional thinking budget for Gemini 2.5 models. Sets the number of thinking tokens the model can use when generating a response. Set to -1 for dynamic thinking. See https://ai.google.dev/gemini-api/docs/thinking#set-budget
+   */
+  thinkingBudget?: number;
   /**
    * Optional temperature setting to use with the model.
    */
diff --git a/packages/shared/src/env.server.ts b/packages/shared/src/env.server.ts
diff --git a/packages/web/src/features/chat/utils.server.ts b/packages/web/src/features/chat/utils.server.ts
diff --git a/schemas/v3/languageModel.json b/schemas/v3/languageModel.json