UNS-480 [FEAT] Add Gemini LLM adapter for Google AI Studio (#1890)

jaseemjaskp · web-flow · commit 4f517a43d441 · 2026-04-10T19:25:23.000+05:30
* UNS-480 [FEAT] Add Gemini LLM adapter for Google AI Studio

Add a new LLM adapter for Google's Gemini models using LiteLLM's
gemini/ provider prefix. The adapter follows the established SDK
adapter pattern and is auto-discovered by register_adapters().

* UNS-480 [FEAT] Add defaults for max_tokens and max_retries in Gemini JSON schema

* UNS-480 [FIX] Address PR review: avoid dict mutation, validate blank model, update default model

* UNS-482 [FEAT] Add Gemini thinking mode support with tests

Extends the Gemini LLM adapter with optional thinking mode, mirroring
the Anthropic/Bedrock pattern. enable_thinking and budget_tokens are
consumed from adapter metadata (not Pydantic fields); when enabled,
temperature is forced to 1. Schema gains an allOf/if-then conditional
so budget_tokens (min 1024) is only required when thinking is on.

* UNS-482 [FIX] Use pytest.approx for temperature float comparison

* UNS-482 [FIX] Validate budget_tokens when Gemini thinking mode is enabled

Raise ValueError if budget_tokens is missing, not an integer, or below
1024 when enable_thinking=True. Previously these cases silently produced
incomplete or invalid thinking configs that would fail at the API level.

* UNS-480 [FIX] Add gemini-2.5-flash to model description in JSON schema

* UNS-480 [FIX] Clean up Gemini JSON schema descriptions per PR review

Remove internal implementation detail (LiteLLM) from model description
and remove experimental model from thinking mode supported models list.
diff --git a/frontend/public/icons/adapter-icons/Gemini.png b/frontend/public/icons/adapter-icons/Gemini.png
diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/base1.py b/unstract/sdk1/src/unstract/sdk1/adapters/base1.py
@@ -639,6 +639,72 @@ def validate_model(adapter_metadata: dict[str, "Any"]) -> str:
             return f"anthropic/{model}"
 
 
+class GeminiLLMParameters(BaseChatCompletionParameters):
+    """See https://docs.litellm.ai/docs/providers/gemini."""
+
+    api_key: str
+
+    @staticmethod
+    def validate(adapter_metadata: dict[str, "Any"]) -> dict[str, "Any"]:
+        result_metadata = adapter_metadata.copy()
+        result_metadata["model"] = GeminiLLMParameters.validate_model(adapter_metadata)
+
+        # Handle Gemini thinking configuration
+        enable_thinking = adapter_metadata.get("enable_thinking", False)
+
+        # If enable_thinking is not explicitly provided but thinking config is present,
+        # assume thinking was enabled in a previous validation
+        has_thinking_config = (
+            "thinking" in adapter_metadata
+            and adapter_metadata.get("thinking") is not None
+        )
+        if not enable_thinking and has_thinking_config:
+            enable_thinking = True
+
+        if enable_thinking:
+            if has_thinking_config:
+                result_metadata["thinking"] = adapter_metadata["thinking"]
+            else:
+                budget_tokens = adapter_metadata.get("budget_tokens")
+                if budget_tokens is None:
+                    raise ValueError(
+                        "budget_tokens is required when thinking mode is enabled"
+                    )
+                if not isinstance(budget_tokens, int) or budget_tokens < 1024:
+                    raise ValueError(
+                        f"budget_tokens must be an integer >= 1024, got {budget_tokens}"
+                    )
+                result_metadata["thinking"] = {
+                    "type": "enabled",
+                    "budget_tokens": budget_tokens,
+                }
+            # Gemini thinking mode requires temperature=1
+            result_metadata["temperature"] = 1
+
+        # Exclude control fields from pydantic validation
+        exclude_fields = ("enable_thinking", "budget_tokens", "thinking")
+        validation_metadata = {
+            k: v for k, v in result_metadata.items() if k not in exclude_fields
+        }
+
+        validated = GeminiLLMParameters(**validation_metadata).model_dump()
+
+        if enable_thinking and "thinking" in result_metadata:
+            validated["thinking"] = result_metadata["thinking"]
+
+        return validated
+
+    @staticmethod
+    def validate_model(adapter_metadata: dict[str, "Any"]) -> str:
+        model = str(adapter_metadata.get("model", "")).strip()
+        if not model:
+            raise ValueError("model is required")
+        if model.startswith("gemini/"):
+            return model
+        else:
+            return f"gemini/{model}"
+
+
 class AnyscaleLLMParameters(BaseChatCompletionParameters):
     """See https://docs.litellm.ai/docs/providers/anyscale."""
 
diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/llm1/gemini.py b/unstract/sdk1/src/unstract/sdk1/adapters/llm1/gemini.py
@@ -0,0 +1,40 @@
+from typing import Any
+
+from unstract.sdk1.adapters.base1 import BaseAdapter, GeminiLLMParameters
+from unstract.sdk1.adapters.enums import AdapterTypes
+
+
+class GeminiLLMAdapter(GeminiLLMParameters, BaseAdapter):
+    @staticmethod
+    def get_id() -> str:
+        return "gemini|085f6c03-b57e-4594-85bb-40e2616c2736"
+
+    @staticmethod
+    def get_metadata() -> dict[str, Any]:
+        return {
+            "name": "Gemini",
+            "version": "1.0.0",
+            "adapter": GeminiLLMAdapter,
+            "description": "Google Gemini LLM adapter via Google AI Studio",
+            "is_active": True,
+        }
+
+    @staticmethod
+    def get_name() -> str:
+        return "Gemini"
+
+    @staticmethod
+    def get_description() -> str:
+        return "Google Gemini LLM adapter via Google AI Studio"
+
+    @staticmethod
+    def get_provider() -> str:
+        return "gemini"
+
+    @staticmethod
+    def get_icon() -> str:
+        return "/icons/adapter-icons/Gemini.png"
+
+    @staticmethod
+    def get_adapter_type() -> AdapterTypes:
+        return AdapterTypes.LLM
diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/llm1/static/gemini.json b/unstract/sdk1/src/unstract/sdk1/adapters/llm1/static/gemini.json
@@ -0,0 +1,86 @@
+{
+  "title": "Gemini LLM",
+  "type": "object",
+  "required": ["adapter_name", "api_key", "model"],
+  "properties": {
+    "adapter_name": {
+      "type": "string",
+      "title": "Name",
+      "default": "",
+      "description": "Provide a unique name for this adapter instance. Example: gemini-group-1"
+    },
+    "api_key": {
+      "type": "string",
+      "title": "API Key",
+      "default": "",
+      "description": "Google AI Studio API key",
+      "format": "password"
+    },
+    "model": {
+      "type": "string",
+      "title": "Model",
+      "default": "gemini-2.0-flash",
+      "description": "Supported: gemini-2.0-flash, gemini-2.5-pro, gemini-2.5-flash, gemini-1.5-pro, gemini-1.5-flash. The gemini/ prefix will be added automatically if omitted."
+    },
+    "temperature": {
+      "type": "number",
+      "minimum": 0,
+      "maximum": 2,
+      "title": "Temperature",
+      "default": 0.1,
+      "description": "Sampling temperature between 0 and 2"
+    },
+    "max_tokens": {
+      "type": "number",
+      "minimum": 0,
+      "multipleOf": 1,
+      "default": 8192,
+      "title": "Maximum Output Tokens",
+      "description": "Maximum number of output tokens to limit LLM replies, the maximum possible differs from model to model."
+    },
+    "timeout": {
+      "type": "number",
+      "minimum": 0,
+      "multipleOf": 1,
+      "title": "Timeout",
+      "default": 600,
+      "description": "Timeout in seconds"
+    },
+    "max_retries": {
+      "type": "number",
+      "minimum": 0,
+      "multipleOf": 1,
+      "default": 3,
+      "title": "Max Retries",
+      "description": "Maximum number of retries"
+    },
+    "enable_thinking": {
+      "type": "boolean",
+      "title": "Enable Thinking Mode",
+      "default": false,
+      "description": "Enable extended thinking for supported models. Thinking mode is only supported on: gemini-2.5-pro, gemini-2.5-flash. When enabled, temperature is forced to 1."
+    }
+  },
+  "allOf": [
+    {
+      "if": {
+        "properties": {
+          "enable_thinking": { "const": true }
+        },
+        "required": ["enable_thinking"]
+      },
+      "then": {
+        "required": ["budget_tokens"],
+        "properties": {
+          "budget_tokens": {
+            "type": "integer",
+            "minimum": 1024,
+            "default": 1024,
+            "title": "Budget Tokens",
+            "description": "Number of tokens allocated for the thinking process. Minimum: 1024."
+          }
+        }
+      }
+    }
+  ]
+}
diff --git a/unstract/sdk1/tests/test_gemini_adapter.py b/unstract/sdk1/tests/test_gemini_adapter.py
@@ -0,0 +1,167 @@
+"""Unit tests for the Gemini LLM adapter (UNS-480 / UNS-482)."""
+
+import json
+from pathlib import Path
+
+import pytest
+from unstract.sdk1.adapters.base1 import GeminiLLMParameters
+from unstract.sdk1.adapters.llm1.gemini import GeminiLLMAdapter
+
+BASE_METADATA = {"api_key": "test-key", "model": "gemini-2.5-flash"}
+
+
+# ── validate_model ───────────────────────────────────────────────────────────
+
+
+def test_validate_model_prefixes_when_missing() -> None:
+    assert (
+        GeminiLLMParameters.validate_model({"model": "gemini-2.5-flash"})
+        == "gemini/gemini-2.5-flash"
+    )
+
+
+def test_validate_model_does_not_double_prefix() -> None:
+    assert (
+        GeminiLLMParameters.validate_model({"model": "gemini/gemini-2.5-pro"})
+        == "gemini/gemini-2.5-pro"
+    )
+
+
+def test_validate_model_blank_raises() -> None:
+    with pytest.raises(ValueError, match="model is required"):
+        GeminiLLMParameters.validate_model({"model": "   "})
+
+
+# ── validate: thinking disabled ──────────────────────────────────────────────
+
+
+def test_validate_thinking_disabled_by_default() -> None:
+    result = GeminiLLMParameters.validate({**BASE_METADATA, "temperature": 0.3})
+    assert result["model"] == "gemini/gemini-2.5-flash"
+    assert "thinking" not in result
+    assert result["temperature"] == pytest.approx(0.3)
+
+
+def test_validate_excludes_control_fields_from_model() -> None:
+    result = GeminiLLMParameters.validate(BASE_METADATA.copy())
+    assert "enable_thinking" not in result
+    assert "budget_tokens" not in result
+
+
+# ── validate: thinking enabled ───────────────────────────────────────────────
+
+
+def test_validate_thinking_enabled_with_budget() -> None:
+    result = GeminiLLMParameters.validate(
+        {**BASE_METADATA, "enable_thinking": True, "budget_tokens": 2048}
+    )
+    assert result["thinking"] == {"type": "enabled", "budget_tokens": 2048}
+    assert result["temperature"] == 1
+
+
+def test_validate_thinking_overrides_user_temperature() -> None:
+    result = GeminiLLMParameters.validate(
+        {
+            **BASE_METADATA,
+            "temperature": 0.7,
+            "enable_thinking": True,
+            "budget_tokens": 1024,
+        }
+    )
+    assert result["temperature"] == 1
+
+
+def test_validate_thinking_enabled_without_budget_raises() -> None:
+    with pytest.raises(ValueError, match="budget_tokens is required"):
+        GeminiLLMParameters.validate({**BASE_METADATA, "enable_thinking": True})
+
+
+def test_validate_thinking_budget_tokens_invalid_type_raises() -> None:
+    with pytest.raises(ValueError, match="budget_tokens must be an integer >= 1024"):
+        GeminiLLMParameters.validate(
+            {**BASE_METADATA, "enable_thinking": True, "budget_tokens": "hello"}
+        )
+
+
+def test_validate_thinking_budget_tokens_too_small_raises() -> None:
+    with pytest.raises(ValueError, match="budget_tokens must be an integer >= 1024"):
+        GeminiLLMParameters.validate(
+            {**BASE_METADATA, "enable_thinking": True, "budget_tokens": 512}
+        )
+
+
+def test_validate_preserves_existing_thinking_config() -> None:
+    existing = {"type": "enabled", "budget_tokens": 4096}
+    result = GeminiLLMParameters.validate({**BASE_METADATA, "thinking": existing})
+    assert result["thinking"] == existing
+    assert result["temperature"] == 1
+
+
+def test_validate_does_not_mutate_input() -> None:
+    metadata = {**BASE_METADATA, "enable_thinking": True, "budget_tokens": 2048}
+    snapshot = metadata.copy()
+    GeminiLLMParameters.validate(metadata)
+    assert metadata == snapshot
+
+
+# ── Pydantic field surface ───────────────────────────────────────────────────
+
+
+def test_thinking_controls_not_pydantic_fields() -> None:
+    fields = GeminiLLMParameters.model_fields
+    assert "enable_thinking" not in fields
+    assert "budget_tokens" not in fields
+    assert "thinking" not in fields
+    assert "api_key" in fields
+
+
+def test_api_key_is_required() -> None:
+    from pydantic import ValidationError
+
+    with pytest.raises(ValidationError):
+        GeminiLLMParameters(model="gemini/gemini-2.5-flash")
+
+
+# ── Adapter identity ─────────────────────────────────────────────────────────
+
+
+def test_adapter_identity() -> None:
+    assert GeminiLLMAdapter.get_name() == "Gemini"
+    assert GeminiLLMAdapter.get_provider() == "gemini"
+    assert GeminiLLMAdapter.get_id().startswith("gemini|")
+    metadata = GeminiLLMAdapter.get_metadata()
+    assert metadata["is_active"] is True
+    assert metadata["name"] == "Gemini"
+
+
+# ── JSON schema ──────────────────────────────────────────────────────────────
+
+
+@pytest.fixture
+def gemini_schema() -> dict:
+    schema_path = (
+        Path(__file__).parent.parent
+        / "src/unstract/sdk1/adapters/llm1/static/gemini.json"
+    )
+    return json.loads(schema_path.read_text())
+
+
+def test_schema_required_fields(gemini_schema: dict) -> None:
+    assert set(gemini_schema["required"]) >= {"adapter_name", "api_key", "model"}
+
+
+def test_schema_enable_thinking_default_false(gemini_schema: dict) -> None:
+    assert gemini_schema["properties"]["enable_thinking"]["default"] is False
+
+
+def test_schema_budget_tokens_conditional(gemini_schema: dict) -> None:
+    all_of = gemini_schema["allOf"]
+    assert len(all_of) == 1
+    conditional = all_of[0]
+    assert conditional["if"]["properties"]["enable_thinking"]["const"] is True
+    then_block = conditional["then"]
+    assert "budget_tokens" in then_block["required"]
+    budget = then_block["properties"]["budget_tokens"]
+    assert budget["minimum"] == 1024
+    assert budget["default"] == 1024
+    assert "maximum" not in budget