feat(sync): re-apply deployment-critical adjacencies on top of upstream merge

offendingcommit · offendingcommit · commit 2e444f82d69d · 2026-05-03T21:02:29.000-05:00
These edits should have been folded into the merge commit (a901f34) but were left uncommitted — pushing now to actually deliver CF Gateway support and clean up leftovers from the -X theirs auto-resolution. src/config.py - Add LLMSettings.CF_GATEWAY_AUTH_TOKEN (single global needed for the cf-aig-authorization header on any provider override client whose base_url targets a CF gateway URL). src/llm/registry.py - Inject cf-aig-authorization header in get_openai_override_client, get_anthropic_override_client, and get_gemini_override_client when base_url contains 'gateway.ai.cloudflare.com' AND LLM.CF_GATEWAY_AUTH_TOKEN is set. Rides on the existing openai/ anthropic/gemini transports — no parallel CF backend. src/embedding_client.py - Mirror the same header injection on the openai/gemini branches so embeddings through CF Gateway authenticate correctly. Helper is duplicated locally so the embedding client doesn't depend on the LLM runtime registry module. src/dreamer/specialists.py - Drop get_provider() / get_thinking_budget() override methods on BaseSpecialist + the per-specialist references to settings.DREAM. DEDUCTION_PROVIDER / INDUCTION_PROVIDER / *_THINKING_BUDGET_TOKENS. Those settings fields no longer exist upstream — same functionality is reachable via DREAM_DEDUCTION_MODEL_CONFIG__TRANSPORT etc. - Drop the orphan thinking_budget_tokens=llm_settings.THINKING_BUDGET_TOKENS arg on the honcho_llm_call site that survived the auto-merge — the value now lives on model_config which is already passed. src/main.py - ruff isort fix (autofixed) — uuid/time import order. Verification: ruff check src/ ✓, basedpyright src/ ✓ (0 errors).
diff --git a/src/config.py b/src/config.py
@@ -648,6 +648,10 @@ class LLMSettings(HonchoSettings):
     OPENAI_API_KEY: str | None = None
     GEMINI_API_KEY: str | None = None
 
+    # Cloudflare AI Gateway: when set, injected as cf-aig-authorization header
+    # on any provider override client whose base_url targets a CF gateway.
+    CF_GATEWAY_AUTH_TOKEN: str | None = None
+
     # General LLM settings
     DEFAULT_MAX_TOKENS: Annotated[int, Field(default=1000, gt=0, le=100_000)] = 2500
 
diff --git a/src/dreamer/specialists.py b/src/dreamer/specialists.py
@@ -87,14 +87,6 @@ def get_model_config(self) -> ConfiguredModelSettings:
         """Get the configured model to use for this specialist."""
         ...
 
-    def get_provider(self) -> str | None:
-        """Get the provider override for this specialist, or None to inherit from DREAM."""
-        return None
-
-    def get_thinking_budget(self) -> int | None:
-        """Get the thinking budget override, or None to inherit from DREAM."""
-        return None
-
     def get_max_tokens(self) -> int:
         """Get max output tokens for this specialist."""
         return 16384
@@ -249,7 +241,6 @@ def iteration_callback(data: Any) -> None:
             messages=messages,
             track_name=f"Dreamer/{self.name}",
             iteration_callback=iteration_callback,
-            thinking_budget_tokens=llm_settings.THINKING_BUDGET_TOKENS,
         )
 
         # Log metrics
@@ -342,12 +333,6 @@ def get_model_config(self) -> ConfiguredModelSettings:
             specialist_name="DREAM DEDUCTION",
         )
 
-    def get_provider(self) -> str | None:
-        return settings.DREAM.DEDUCTION_PROVIDER
-
-    def get_thinking_budget(self) -> int | None:
-        return settings.DREAM.DEDUCTION_THINKING_BUDGET_TOKENS
-
     def get_max_tokens(self) -> int:
         return 8192
 
@@ -496,12 +481,6 @@ def get_model_config(self) -> ConfiguredModelSettings:
             specialist_name="DREAM INDUCTION",
         )
 
-    def get_provider(self) -> str | None:
-        return settings.DREAM.INDUCTION_PROVIDER
-
-    def get_thinking_budget(self) -> int | None:
-        return settings.DREAM.INDUCTION_THINKING_BUDGET_TOKENS
-
     def get_max_tokens(self) -> int:
         return 8192
 
diff --git a/src/embedding_client.py b/src/embedding_client.py
@@ -2,7 +2,7 @@
 import logging
 import threading
 from collections import defaultdict
-from typing import Any, NamedTuple
+from typing import NamedTuple
 
 import tiktoken
 from google import genai
@@ -22,6 +22,19 @@ class BatchItem(NamedTuple):
     chunk_index: int
 
 
+def _cf_gateway_headers(base_url: str | None) -> dict[str, str] | None:
+    """Cloudflare AI Gateway requires a per-account auth token in the
+    cf-aig-authorization header. Mirrors src/llm/registry._cf_gateway_headers
+    so the embedding client doesn't depend on the LLM runtime registry.
+    """
+    if not base_url or "gateway.ai.cloudflare.com" not in base_url:
+        return None
+    token = settings.LLM.CF_GATEWAY_AUTH_TOKEN
+    if not token:
+        return None
+    return {"cf-aig-authorization": f"Bearer {token}"}
+
+
 class _EmbeddingClient:
     """
     Embedding client supporting OpenAI and Gemini with chunking and batching support.
@@ -42,11 +55,13 @@ def __init__(
         if self.transport == "gemini":
             if not config.api_key:
                 raise ValueError("Gemini API key is required")
-            http_options = (
-                genai_types.HttpOptions(base_url=config.base_url)
-                if config.base_url
-                else None
-            )
+            cf_headers = _cf_gateway_headers(config.base_url)
+            if config.base_url or cf_headers:
+                http_options = genai_types.HttpOptions(
+                    base_url=config.base_url, headers=cf_headers
+                )
+            else:
+                http_options = None
             self.client: genai.Client | AsyncOpenAI = genai.Client(
                 api_key=config.api_key,
                 http_options=http_options,
@@ -58,10 +73,18 @@ def __init__(
         else:  # openai
             if not config.api_key:
                 raise ValueError("OpenAI API key is required")
-            self.client = AsyncOpenAI(
-                api_key=config.api_key,
-                base_url=config.base_url,
-            )
+            cf_headers = _cf_gateway_headers(config.base_url)
+            if cf_headers:
+                self.client = AsyncOpenAI(
+                    api_key=config.api_key,
+                    base_url=config.base_url,
+                    default_headers=cf_headers,
+                )
+            else:
+                self.client = AsyncOpenAI(
+                    api_key=config.api_key,
+                    base_url=config.base_url,
+                )
             self.max_embedding_tokens = max_input_tokens
             self.max_batch_size = 2048  # OpenAI batch limit
 
diff --git a/src/llm/registry.py b/src/llm/registry.py
@@ -63,6 +63,11 @@ def get_openai_override_client(
     base_url: str | None, api_key: str | None
 ) -> AsyncOpenAI:
     """OpenAI client for a specific (base_url, api_key) pair. Cached by key."""
+    headers = _cf_gateway_headers(base_url)
+    if headers:
+        return AsyncOpenAI(
+            api_key=api_key, base_url=base_url, default_headers=headers
+        )
     return AsyncOpenAI(api_key=api_key, base_url=base_url)
 
 
@@ -72,6 +77,14 @@ def get_anthropic_override_client(
     api_key: str | None,
 ) -> AsyncAnthropic:
     """Anthropic client for a specific (base_url, api_key) pair. Cached by key."""
+    headers = _cf_gateway_headers(base_url)
+    if headers:
+        return AsyncAnthropic(
+            api_key=api_key,
+            base_url=base_url,
+            timeout=600.0,
+            default_headers=headers,
+        )
     return AsyncAnthropic(api_key=api_key, base_url=base_url, timeout=600.0)
 
 
@@ -80,10 +93,28 @@ def get_gemini_override_client(
     base_url: str | None, api_key: str | None
 ) -> genai.Client:
     """Gemini client for a specific (base_url, api_key) pair. Cached by key."""
-    http_options = genai_types.HttpOptions(base_url=base_url) if base_url else None
+    headers = _cf_gateway_headers(base_url)
+    if base_url or headers:
+        http_options = genai_types.HttpOptions(base_url=base_url, headers=headers)
+    else:
+        http_options = None
     return genai.Client(api_key=api_key, http_options=http_options)
 
 
+def _cf_gateway_headers(base_url: str | None) -> dict[str, str] | None:
+    """Cloudflare AI Gateway requires a per-account auth token in the
+    cf-aig-authorization header when account-scoped auth is enabled. Inject it
+    on any override client routed through a CF gateway URL when
+    LLM.CF_GATEWAY_AUTH_TOKEN is configured.
+    """
+    if not base_url or "gateway.ai.cloudflare.com" not in base_url:
+        return None
+    token = settings.LLM.CF_GATEWAY_AUTH_TOKEN
+    if not token:
+        return None
+    return {"cf-aig-authorization": f"Bearer {token}"}
+
+
 # Module-level default-client registry, populated at import time. Tests patch
 # this dict via `patch.dict(CLIENTS, {...})` to inject mock provider clients.
 CLIENTS: dict[ModelTransport, ProviderClient] = {}
diff --git a/src/main.py b/src/main.py
@@ -1,8 +1,8 @@
 import logging
 import os
 import re
-import uuid
 import time
+import uuid
 from collections.abc import Awaitable, Callable
 from contextlib import asynccontextmanager
 from typing import TYPE_CHECKING