Skip to content

Commit 2e444f8

Browse files
feat(sync): re-apply deployment-critical adjacencies on top of upstream merge
These edits should have been folded into the merge commit (a901f34) but were left uncommitted — pushing now to actually deliver CF Gateway support and clean up leftovers from the -X theirs auto-resolution. src/config.py - Add LLMSettings.CF_GATEWAY_AUTH_TOKEN (single global needed for the cf-aig-authorization header on any provider override client whose base_url targets a CF gateway URL). src/llm/registry.py - Inject cf-aig-authorization header in get_openai_override_client, get_anthropic_override_client, and get_gemini_override_client when base_url contains 'gateway.ai.cloudflare.com' AND LLM.CF_GATEWAY_AUTH_TOKEN is set. Rides on the existing openai/ anthropic/gemini transports — no parallel CF backend. src/embedding_client.py - Mirror the same header injection on the openai/gemini branches so embeddings through CF Gateway authenticate correctly. Helper is duplicated locally so the embedding client doesn't depend on the LLM runtime registry module. src/dreamer/specialists.py - Drop get_provider() / get_thinking_budget() override methods on BaseSpecialist + the per-specialist references to settings.DREAM. DEDUCTION_PROVIDER / INDUCTION_PROVIDER / *_THINKING_BUDGET_TOKENS. Those settings fields no longer exist upstream — same functionality is reachable via DREAM_DEDUCTION_MODEL_CONFIG__TRANSPORT etc. - Drop the orphan thinking_budget_tokens=llm_settings.THINKING_BUDGET_TOKENS arg on the honcho_llm_call site that survived the auto-merge — the value now lives on model_config which is already passed. src/main.py - ruff isort fix (autofixed) — uuid/time import order. Verification: ruff check src/ ✓, basedpyright src/ ✓ (0 errors).
1 parent a901f34 commit 2e444f8

5 files changed

Lines changed: 70 additions & 33 deletions

File tree

src/config.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,10 @@ class LLMSettings(HonchoSettings):
648648
OPENAI_API_KEY: str | None = None
649649
GEMINI_API_KEY: str | None = None
650650

651+
# Cloudflare AI Gateway: when set, injected as cf-aig-authorization header
652+
# on any provider override client whose base_url targets a CF gateway.
653+
CF_GATEWAY_AUTH_TOKEN: str | None = None
654+
651655
# General LLM settings
652656
DEFAULT_MAX_TOKENS: Annotated[int, Field(default=1000, gt=0, le=100_000)] = 2500
653657

src/dreamer/specialists.py

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -87,14 +87,6 @@ def get_model_config(self) -> ConfiguredModelSettings:
8787
"""Get the configured model to use for this specialist."""
8888
...
8989

90-
def get_provider(self) -> str | None:
91-
"""Get the provider override for this specialist, or None to inherit from DREAM."""
92-
return None
93-
94-
def get_thinking_budget(self) -> int | None:
95-
"""Get the thinking budget override, or None to inherit from DREAM."""
96-
return None
97-
9890
def get_max_tokens(self) -> int:
9991
"""Get max output tokens for this specialist."""
10092
return 16384
@@ -249,7 +241,6 @@ def iteration_callback(data: Any) -> None:
249241
messages=messages,
250242
track_name=f"Dreamer/{self.name}",
251243
iteration_callback=iteration_callback,
252-
thinking_budget_tokens=llm_settings.THINKING_BUDGET_TOKENS,
253244
)
254245

255246
# Log metrics
@@ -342,12 +333,6 @@ def get_model_config(self) -> ConfiguredModelSettings:
342333
specialist_name="DREAM DEDUCTION",
343334
)
344335

345-
def get_provider(self) -> str | None:
346-
return settings.DREAM.DEDUCTION_PROVIDER
347-
348-
def get_thinking_budget(self) -> int | None:
349-
return settings.DREAM.DEDUCTION_THINKING_BUDGET_TOKENS
350-
351336
def get_max_tokens(self) -> int:
352337
return 8192
353338

@@ -496,12 +481,6 @@ def get_model_config(self) -> ConfiguredModelSettings:
496481
specialist_name="DREAM INDUCTION",
497482
)
498483

499-
def get_provider(self) -> str | None:
500-
return settings.DREAM.INDUCTION_PROVIDER
501-
502-
def get_thinking_budget(self) -> int | None:
503-
return settings.DREAM.INDUCTION_THINKING_BUDGET_TOKENS
504-
505484
def get_max_tokens(self) -> int:
506485
return 8192
507486

src/embedding_client.py

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import logging
33
import threading
44
from collections import defaultdict
5-
from typing import Any, NamedTuple
5+
from typing import NamedTuple
66

77
import tiktoken
88
from google import genai
@@ -22,6 +22,19 @@ class BatchItem(NamedTuple):
2222
chunk_index: int
2323

2424

25+
def _cf_gateway_headers(base_url: str | None) -> dict[str, str] | None:
26+
"""Cloudflare AI Gateway requires a per-account auth token in the
27+
cf-aig-authorization header. Mirrors src/llm/registry._cf_gateway_headers
28+
so the embedding client doesn't depend on the LLM runtime registry.
29+
"""
30+
if not base_url or "gateway.ai.cloudflare.com" not in base_url:
31+
return None
32+
token = settings.LLM.CF_GATEWAY_AUTH_TOKEN
33+
if not token:
34+
return None
35+
return {"cf-aig-authorization": f"Bearer {token}"}
36+
37+
2538
class _EmbeddingClient:
2639
"""
2740
Embedding client supporting OpenAI and Gemini with chunking and batching support.
@@ -42,11 +55,13 @@ def __init__(
4255
if self.transport == "gemini":
4356
if not config.api_key:
4457
raise ValueError("Gemini API key is required")
45-
http_options = (
46-
genai_types.HttpOptions(base_url=config.base_url)
47-
if config.base_url
48-
else None
49-
)
58+
cf_headers = _cf_gateway_headers(config.base_url)
59+
if config.base_url or cf_headers:
60+
http_options = genai_types.HttpOptions(
61+
base_url=config.base_url, headers=cf_headers
62+
)
63+
else:
64+
http_options = None
5065
self.client: genai.Client | AsyncOpenAI = genai.Client(
5166
api_key=config.api_key,
5267
http_options=http_options,
@@ -58,10 +73,18 @@ def __init__(
5873
else: # openai
5974
if not config.api_key:
6075
raise ValueError("OpenAI API key is required")
61-
self.client = AsyncOpenAI(
62-
api_key=config.api_key,
63-
base_url=config.base_url,
64-
)
76+
cf_headers = _cf_gateway_headers(config.base_url)
77+
if cf_headers:
78+
self.client = AsyncOpenAI(
79+
api_key=config.api_key,
80+
base_url=config.base_url,
81+
default_headers=cf_headers,
82+
)
83+
else:
84+
self.client = AsyncOpenAI(
85+
api_key=config.api_key,
86+
base_url=config.base_url,
87+
)
6588
self.max_embedding_tokens = max_input_tokens
6689
self.max_batch_size = 2048 # OpenAI batch limit
6790

src/llm/registry.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,11 @@ def get_openai_override_client(
6363
base_url: str | None, api_key: str | None
6464
) -> AsyncOpenAI:
6565
"""OpenAI client for a specific (base_url, api_key) pair. Cached by key."""
66+
headers = _cf_gateway_headers(base_url)
67+
if headers:
68+
return AsyncOpenAI(
69+
api_key=api_key, base_url=base_url, default_headers=headers
70+
)
6671
return AsyncOpenAI(api_key=api_key, base_url=base_url)
6772

6873

@@ -72,6 +77,14 @@ def get_anthropic_override_client(
7277
api_key: str | None,
7378
) -> AsyncAnthropic:
7479
"""Anthropic client for a specific (base_url, api_key) pair. Cached by key."""
80+
headers = _cf_gateway_headers(base_url)
81+
if headers:
82+
return AsyncAnthropic(
83+
api_key=api_key,
84+
base_url=base_url,
85+
timeout=600.0,
86+
default_headers=headers,
87+
)
7588
return AsyncAnthropic(api_key=api_key, base_url=base_url, timeout=600.0)
7689

7790

@@ -80,10 +93,28 @@ def get_gemini_override_client(
8093
base_url: str | None, api_key: str | None
8194
) -> genai.Client:
8295
"""Gemini client for a specific (base_url, api_key) pair. Cached by key."""
83-
http_options = genai_types.HttpOptions(base_url=base_url) if base_url else None
96+
headers = _cf_gateway_headers(base_url)
97+
if base_url or headers:
98+
http_options = genai_types.HttpOptions(base_url=base_url, headers=headers)
99+
else:
100+
http_options = None
84101
return genai.Client(api_key=api_key, http_options=http_options)
85102

86103

104+
def _cf_gateway_headers(base_url: str | None) -> dict[str, str] | None:
105+
"""Cloudflare AI Gateway requires a per-account auth token in the
106+
cf-aig-authorization header when account-scoped auth is enabled. Inject it
107+
on any override client routed through a CF gateway URL when
108+
LLM.CF_GATEWAY_AUTH_TOKEN is configured.
109+
"""
110+
if not base_url or "gateway.ai.cloudflare.com" not in base_url:
111+
return None
112+
token = settings.LLM.CF_GATEWAY_AUTH_TOKEN
113+
if not token:
114+
return None
115+
return {"cf-aig-authorization": f"Bearer {token}"}
116+
117+
87118
# Module-level default-client registry, populated at import time. Tests patch
88119
# this dict via `patch.dict(CLIENTS, {...})` to inject mock provider clients.
89120
CLIENTS: dict[ModelTransport, ProviderClient] = {}

src/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import logging
22
import os
33
import re
4-
import uuid
54
import time
5+
import uuid
66
from collections.abc import Awaitable, Callable
77
from contextlib import asynccontextmanager
88
from typing import TYPE_CHECKING

0 commit comments

Comments
 (0)