refactor: replace set_gemini_safety_settings() with LLMProvider.with_config() (#474)

nicoloboschi · web-flow · commit 861295dd7cdb · 2026-03-03T15:00:32.000+01:00
* refactor: replace set_gemini_safety_settings() with LLMProvider.with_config()

Removes the fragile ContextVar-setter pattern where callers had to remember
to call set_gemini_safety_settings() at every operation entry point.

Instead, LLMProvider.with_config(resolved_config) returns a
ConfiguredLLMProvider wrapper that:
- injects per-bank settings (Gemini safety settings) on every call via
  token-based ContextVar set/reset — properly scoped, no leakage
- proxies all attribute access to the underlying provider via __getattr__
- requires zero changes to LLMInterface or any provider implementations

Call sites (retain, reflect, consolidation) now pass
llm_config.with_config(resolved_config) to sub-components instead of
setting a global context var and hoping nothing else runs in between.
This pattern also composes naturally with a future per-bank provider
factory: callers always receive something with a .call() method.

* fix: pass messages/tools as kwargs in ConfiguredLLMProvider to preserve class-level patch compatibility
diff --git a/hindsight-api/hindsight_api/engine/consolidation/consolidator.py b/hindsight-api/hindsight_api/engine/consolidation/consolidator.py
@@ -127,10 +127,9 @@ async def run_consolidation_job(
     # Resolve bank-specific config with hierarchical overrides
     config = await memory_engine._config_resolver.resolve_full_config(bank_id, request_context)
 
-    # Apply bank-specific Gemini safety settings for this request context
-    from ..providers.gemini_llm import set_gemini_safety_settings
-
-    set_gemini_safety_settings(config.llm_gemini_safety_settings)
+    # Build a configured LLM wrapper that applies per-bank settings (e.g. safety settings)
+    # to every call without leaking across operations.
+    llm_config = memory_engine._consolidation_llm_config.with_config(config)
 
     perf = ConsolidationPerfLog(bank_id)
     max_memories_per_batch = config.consolidation_batch_size
@@ -281,6 +280,7 @@ async def run_consolidation_job(
                         pass_results = await _process_memory_batch(
                             conn=conn,
                             memory_engine=memory_engine,
+                            llm_config=llm_config,
                             bank_id=bank_id,
                             memories=llm_batch,
                             request_context=request_context,
@@ -318,6 +318,7 @@ async def run_consolidation_job(
                     results = await _process_memory_batch(
                         conn=conn,
                         memory_engine=memory_engine,
+                        llm_config=llm_config,
                         bank_id=bank_id,
                         memories=llm_batch,
                         request_context=request_context,
@@ -513,6 +514,7 @@ async def _trigger_mental_model_refreshes(
 async def _process_memory_batch(
     conn: "Connection",
     memory_engine: "MemoryEngine",
+    llm_config: Any,
     bank_id: str,
     memories: list[dict[str, Any]],
     request_context: "RequestContext",
@@ -581,7 +583,7 @@ async def _process_memory_batch(
     # 3. Single LLM call
     t0 = time.time()
     llm_result = await _consolidate_batch_with_llm(
-        memory_engine=memory_engine,
+        llm_config=llm_config,
         memories=memories,
         union_observations=union_observations,
         union_source_facts=union_source_facts,
@@ -945,7 +947,7 @@ def _build_observations_for_llm(
 
 
 async def _consolidate_batch_with_llm(
-    memory_engine: "MemoryEngine",
+    llm_config: Any,
     memories: list[dict[str, Any]],
     union_observations: "list[MemoryFact]",
     union_source_facts: "dict[str, MemoryFact]",
@@ -981,7 +983,7 @@ def _fact_line(m: dict[str, Any]) -> str:
     last_exc: Exception | None = None
     for attempt in range(1, max_attempts + 1):
         try:
-            response: _ConsolidationBatchResponse = await memory_engine._consolidation_llm_config.call(
+            response: _ConsolidationBatchResponse = await llm_config.call(
                 messages=[{"role": "user", "content": prompt}],
                 response_format=_ConsolidationBatchResponse,
                 scope="consolidation",
diff --git a/hindsight-api/hindsight_api/engine/llm_wrapper.py b/hindsight-api/hindsight_api/engine/llm_wrapper.py
@@ -622,6 +622,23 @@ def _verify_claude_code_available(self) -> None:
         # SDK will automatically check for authentication when first used
         # No need to verify here - let it fail gracefully on first call with helpful error
 
+    def with_config(self, config: Any) -> "ConfiguredLLMProvider":
+        """
+        Return a configured wrapper for a specific bank operation.
+
+        The wrapper applies per-bank overrides (e.g. Gemini safety settings)
+        to every ``call()`` / ``call_with_tools()`` invocation without
+        changing the underlying provider or its long-lived client connection.
+
+        Args:
+            config: Resolved ``HindsightConfig`` for the current bank/request.
+
+        Returns:
+            A ``ConfiguredLLMProvider`` that delegates to this provider with
+            the supplied config applied.
+        """
+        return ConfiguredLLMProvider(self, config.llm_gemini_safety_settings)
+
     async def cleanup(self) -> None:
         """Clean up resources."""
         pass
@@ -683,5 +700,58 @@ def for_judge(cls) -> "LLMProvider":
         return cls(provider=provider, api_key=api_key, base_url=base_url, model=model, reasoning_effort="high")
 
 
+class ConfiguredLLMProvider:
+    """
+    Thin wrapper around LLMProvider that applies bank-specific config to every call.
+
+    Obtained via ``LLMProvider.with_config(resolved_config)``.  The wrapper
+    sets any provider-specific overrides (currently Gemini safety settings)
+    immediately before each call using a ContextVar token, then resets it
+    afterwards — so nesting is safe and the configuration cannot leak across
+    operations.
+
+    All attribute access falls through to the underlying provider so callers
+    that read ``llm.provider``, ``llm.model``, etc. continue to work without
+    any changes.
+    """
+
+    def __init__(self, provider: "LLMProvider", gemini_safety_settings: list | None) -> None:
+        # Use object.__setattr__ to avoid triggering __getattr__
+        object.__setattr__(self, "_provider", provider)
+        object.__setattr__(self, "_gemini_safety_settings", gemini_safety_settings)
+
+    # ── attribute passthrough ──────────────────────────────────────────────────
+
+    def __getattr__(self, name: str) -> Any:
+        return getattr(object.__getattribute__(self, "_provider"), name)
+
+    # ── overridden call methods ────────────────────────────────────────────────
+
+    async def call(self, messages: list[dict[str, Any]], **kwargs: Any) -> Any:
+        from .providers.gemini_llm import _safety_settings_ctx
+
+        token = _safety_settings_ctx.set(object.__getattribute__(self, "_gemini_safety_settings"))
+        try:
+            return await object.__getattribute__(self, "_provider").call(messages=messages, **kwargs)
+        finally:
+            _safety_settings_ctx.reset(token)
+
+    async def call_with_tools(
+        self,
+        messages: list[dict[str, Any]],
+        tools: list[dict[str, Any]],
+        **kwargs: Any,
+    ) -> "LLMToolCallResult":
+        from .providers.gemini_llm import _safety_settings_ctx
+
+        token = _safety_settings_ctx.set(object.__getattribute__(self, "_gemini_safety_settings"))
+        try:
+            return await object.__getattribute__(self, "_provider").call_with_tools(
+                messages=messages, tools=tools, **kwargs
+            )
+        finally:
+            _safety_settings_ctx.reset(token)
+
+
 # Backwards compatibility alias
 LLMConfig = LLMProvider
diff --git a/hindsight-api/hindsight_api/engine/memory_engine.py b/hindsight-api/hindsight_api/engine/memory_engine.py
@@ -1831,17 +1831,12 @@ async def _retain_batch_async_internal(
             # Resolve bank-specific config for this operation
             resolved_config = await self._config_resolver.resolve_full_config(bank_id, request_context)
 
-            # Apply bank-specific Gemini safety settings for this request context
-            from .providers.gemini_llm import set_gemini_safety_settings
-
-            set_gemini_safety_settings(resolved_config.llm_gemini_safety_settings)
-
             # Create parent span for retain operation
             with create_operation_span("retain", bank_id):
                 return await orchestrator.retain_batch(
                     pool=pool,
                     embeddings_model=self.embeddings,
-                    llm_config=self._retain_llm_config,
+                    llm_config=self._retain_llm_config.with_config(resolved_config),
                     entity_resolver=self.entity_resolver,
                     format_date_fn=self._format_readable_date,
                     bank_id=bank_id,
@@ -4468,11 +4463,7 @@ async def reflect_async(
         # The agent can call lookup() to list available models if needed.
         # This is critical for banks with many mental models to avoid huge prompts.
 
-        # Apply bank-specific Gemini safety settings for this request context
         resolved_reflect_config = await self._config_resolver.resolve_full_config(bank_id, request_context)
-        from .providers.gemini_llm import set_gemini_safety_settings
-
-        set_gemini_safety_settings(resolved_reflect_config.llm_gemini_safety_settings)
 
         # Compute max iterations based on budget
         config = get_config()
@@ -4576,7 +4567,7 @@ async def expand_fn(memory_ids: list[str], depth: str) -> dict[str, Any]:
 
         try:
             agent_result = await run_reflect_agent(
-                llm_config=self._reflect_llm_config,
+                llm_config=self._reflect_llm_config.with_config(resolved_reflect_config),
                 bank_id=bank_id,
                 query=query,
                 bank_profile=profile,
diff --git a/hindsight-api/hindsight_api/engine/providers/gemini_llm.py b/hindsight-api/hindsight_api/engine/providers/gemini_llm.py
@@ -25,25 +25,12 @@
 
 logger = logging.getLogger(__name__)
 
-# Context variable for per-request Gemini safety settings override (supports per-bank configuration)
+# Per-request Gemini safety settings override.
+# Set exclusively by ConfiguredLLMProvider.call() / call_with_tools() via token-based
+# set/reset, so it is properly scoped to each individual LLM call and never leaks.
 _safety_settings_ctx: ContextVar[list | None] = ContextVar("gemini_safety_settings", default=None)
 
 
-def set_gemini_safety_settings(settings: list | None) -> None:
-    """
-    Set Gemini safety settings for the current async context.
-
-    This allows per-bank safety settings to be applied without changing
-    the LLM provider interface. Call this before making LLM calls within
-    an operation that has resolved bank-specific configuration.
-
-    Args:
-        settings: List of safety setting dicts with 'category' and 'threshold' keys,
-                  or None to use the instance default (from env var).
-    """
-    _safety_settings_ctx.set(settings)
-
-
 # Vertex AI imports (optional)
 try:
     import google.auth
diff --git a/hindsight-api/tests/test_gemini_safety_settings.py b/hindsight-api/tests/test_gemini_safety_settings.py
@@ -216,81 +216,96 @@ async def test_call_with_tools_applies_safety_settings():
     assert "HARM_CATEGORY_HARASSMENT" in categories
 
 
-# ─── Context variable override ────────────────────────────────────────────────
+# ─── with_config() override ───────────────────────────────────────────────────
 
 
-@pytest.mark.asyncio
-async def test_context_var_overrides_instance_settings():
-    """The context var safety settings take precedence over instance defaults."""
-    from hindsight_api.engine.providers.gemini_llm import set_gemini_safety_settings
+def _make_llm_provider(safety_settings=None):
+    """Return an LLMProvider (wrapping GeminiLLM) with a mocked genai.Client."""
+    with patch("google.genai.Client") as mock_client_cls:
+        mock_client_cls.return_value = MagicMock()
+        from hindsight_api.engine.llm_wrapper import LLMProvider
 
-    # Instance has settings, but we'll override via context var with different settings
-    instance_settings = [{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_ONLY_HIGH"}]
-    ctx_settings = [{"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"}]
+        provider = LLMProvider(
+            provider="gemini",
+            api_key="fake-api-key",
+            base_url="",
+            model="gemini-2.5-flash",
+            gemini_safety_settings=safety_settings,
+        )
+        # Replace the underlying Gemini client with a fresh mock
+        provider._provider_impl._client = MagicMock()
+        return provider
 
-    provider = _make_gemini_provider(safety_settings=instance_settings)
 
-    fake_response = MagicMock()
-    fake_response.text = "hello"
-    fake_response.candidates = [MagicMock(finish_reason="STOP")]
-    fake_response.usage_metadata = MagicMock(prompt_token_count=5, candidates_token_count=2)
+def _fake_response():
+    r = MagicMock()
+    r.text = "hello"
+    r.candidates = [MagicMock(finish_reason="STOP")]
+    r.usage_metadata = MagicMock(prompt_token_count=5, candidates_token_count=2)
+    return r
 
-    provider._client.aio.models.generate_content = AsyncMock(return_value=fake_response)
 
-    # Set context var override
-    set_gemini_safety_settings(ctx_settings)
-    try:
-        await provider.call(
-            messages=[{"role": "user", "content": "hi"}],
-            scope="test",
-        )
-    finally:
-        set_gemini_safety_settings(None)  # Reset context
+def _make_config(safety_settings):
+    """Return a minimal config-like object with llm_gemini_safety_settings."""
+    cfg = MagicMock()
+    cfg.llm_gemini_safety_settings = safety_settings
+    return cfg
 
-    call_args = provider._client.aio.models.generate_content.call_args
-    config_arg = call_args.kwargs.get("config")
 
-    assert config_arg is not None
-    assert config_arg.safety_settings is not None
+@pytest.mark.asyncio
+async def test_with_config_overrides_instance_settings():
+    """with_config() settings take precedence over the provider instance defaults."""
+    instance_settings = [{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_ONLY_HIGH"}]
+    override_settings = [{"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"}]
+
+    provider = _make_llm_provider(safety_settings=instance_settings)
+    provider._provider_impl._client.aio.models.generate_content = AsyncMock(return_value=_fake_response())
 
-    # Should use ctx_settings (HATE_SPEECH/BLOCK_NONE), not instance_settings (HARASSMENT/BLOCK_ONLY_HIGH)
+    configured = provider.with_config(_make_config(override_settings))
+    await configured.call(messages=[{"role": "user", "content": "hi"}], scope="test")
+
+    config_arg = provider._provider_impl._client.aio.models.generate_content.call_args.kwargs.get("config")
+    assert config_arg is not None
     categories = [s.category.value if hasattr(s.category, "value") else str(s.category) for s in config_arg.safety_settings]
+    # Should use override_settings (HATE_SPEECH), not instance_settings (HARASSMENT)
     assert "HARM_CATEGORY_HATE_SPEECH" in categories
     assert "HARM_CATEGORY_HARASSMENT" not in categories
 
 
 @pytest.mark.asyncio
-async def test_context_var_none_falls_back_to_instance():
-    """When context var is None (not set), instance settings are used."""
-    from hindsight_api.engine.providers.gemini_llm import set_gemini_safety_settings
-
+async def test_with_config_none_falls_back_to_instance():
+    """When with_config() supplies None, the instance default is used."""
     instance_settings = [{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"}]
-    provider = _make_gemini_provider(safety_settings=instance_settings)
-
-    fake_response = MagicMock()
-    fake_response.text = "hello"
-    fake_response.candidates = [MagicMock(finish_reason="STOP")]
-    fake_response.usage_metadata = MagicMock(prompt_token_count=5, candidates_token_count=2)
-
-    provider._client.aio.models.generate_content = AsyncMock(return_value=fake_response)
-
-    # Explicitly set context var to None (fallback)
-    set_gemini_safety_settings(None)
 
-    await provider.call(
-        messages=[{"role": "user", "content": "hi"}],
-        scope="test",
-    )
+    provider = _make_llm_provider(safety_settings=instance_settings)
+    provider._provider_impl._client.aio.models.generate_content = AsyncMock(return_value=_fake_response())
 
-    call_args = provider._client.aio.models.generate_content.call_args
-    config_arg = call_args.kwargs.get("config")
+    configured = provider.with_config(_make_config(None))
+    await configured.call(messages=[{"role": "user", "content": "hi"}], scope="test")
 
+    config_arg = provider._provider_impl._client.aio.models.generate_content.call_args.kwargs.get("config")
     assert config_arg is not None
-    assert config_arg.safety_settings is not None
     categories = [s.category.value if hasattr(s.category, "value") else str(s.category) for s in config_arg.safety_settings]
     assert "HARM_CATEGORY_HARASSMENT" in categories
 
 
+@pytest.mark.asyncio
+async def test_with_config_resets_after_call():
+    """The ContextVar is properly reset after a with_config() call (no leakage)."""
+    from hindsight_api.engine.providers.gemini_llm import _safety_settings_ctx
+
+    settings = [{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"}]
+    provider = _make_llm_provider(safety_settings=None)
+    provider._provider_impl._client.aio.models.generate_content = AsyncMock(return_value=_fake_response())
+
+    before = _safety_settings_ctx.get()
+    configured = provider.with_config(_make_config(settings))
+    await configured.call(messages=[{"role": "user", "content": "hi"}], scope="test")
+    after = _safety_settings_ctx.get()
+
+    assert after == before  # ContextVar restored to its original value
+
+
 # ─── LLMProvider reads safety settings from config ────────────────────────────