@@ -258,17 +258,36 @@ def _get_effective_temperature(temperature: float | None) -> float | None:
258258 CLIENTS ["anthropic" ] = anthropic
259259
260260if settings .LLM .OPENAI_API_KEY :
261- openai_client = AsyncOpenAI (
262- api_key = settings .LLM .OPENAI_API_KEY ,
263- )
264- CLIENTS ["openai" ] = openai_client
261+ _openai_kwargs : dict [str , Any ] = {"api_key" : settings .LLM .OPENAI_API_KEY }
262+ if settings .LLM .OPENAI_BASE_URL :
263+ _openai_kwargs ["base_url" ] = settings .LLM .OPENAI_BASE_URL
264+ if settings .LLM .CF_GATEWAY_AUTH_TOKEN :
265+ _openai_kwargs ["default_headers" ] = {
266+ "cf-aig-authorization" : f"Bearer { settings .LLM .CF_GATEWAY_AUTH_TOKEN } "
267+ }
268+ CLIENTS ["openai" ] = AsyncOpenAI (** _openai_kwargs )
265269
266270if settings .LLM .OPENAI_COMPATIBLE_API_KEY and settings .LLM .OPENAI_COMPATIBLE_BASE_URL :
267271 CLIENTS ["custom" ] = AsyncOpenAI (
268272 api_key = settings .LLM .OPENAI_COMPATIBLE_API_KEY ,
269273 base_url = settings .LLM .OPENAI_COMPATIBLE_BASE_URL ,
270274 )
271275
276+ # Cloudflare AI Gateway (OpenAI-compatible universal endpoint)
277+ # CF_GATEWAY_API_KEY = provider key passed in Authorization (e.g. Gemini key for google-ai-studio/)
278+ # CF_GATEWAY_AUTH_TOKEN = cfut_ gateway token passed in cf-aig-authorization (optional, for gateway auth)
279+ if settings .LLM .CF_GATEWAY_API_KEY and settings .LLM .CF_GATEWAY_BASE_URL :
280+ _cf_extra_headers : dict [str , str ] = {}
281+ if settings .LLM .CF_GATEWAY_AUTH_TOKEN :
282+ _cf_extra_headers ["cf-aig-authorization" ] = (
283+ f"Bearer { settings .LLM .CF_GATEWAY_AUTH_TOKEN } "
284+ )
285+ CLIENTS ["cf" ] = AsyncOpenAI (
286+ api_key = settings .LLM .CF_GATEWAY_API_KEY ,
287+ base_url = settings .LLM .CF_GATEWAY_BASE_URL ,
288+ default_headers = _cf_extra_headers ,
289+ )
290+
272291# vLLM uses separate settings for local model serving
273292if settings .LLM .VLLM_API_KEY and settings .LLM .VLLM_BASE_URL :
274293 CLIENTS ["vllm" ] = AsyncOpenAI (
@@ -334,9 +353,9 @@ def convert_tools_for_provider(
334353 if provider == "anthropic" :
335354 # Anthropic format: input_schema
336355 return tools
337- elif provider in ("openai" , "custom" , "vllm" ):
356+ elif provider in ("openai" , "custom" , "vllm" , "cf" ):
338357 # OpenAI format: parameters instead of input_schema
339- # custom and vllm use AsyncOpenAI client so need OpenAI format
358+ # custom, vllm, and cf use AsyncOpenAI client so need OpenAI format
340359 return [
341360 {
342361 "type" : "function" ,
@@ -1103,16 +1122,20 @@ def _format_assistant_tool_message(
11031122 # OpenAI format - must include tool_calls in the assistant message
11041123 openai_tool_calls : list [Any ] = []
11051124 for tool_call in tool_calls :
1106- openai_tool_calls .append (
1107- {
1108- "id" : tool_call ["id" ],
1109- "type" : "function" ,
1110- "function" : {
1111- "name" : tool_call ["name" ],
1112- "arguments" : json .dumps (tool_call ["input" ]),
1113- },
1114- }
1115- )
1125+ oa_call : dict [str , Any ] = {
1126+ "id" : tool_call ["id" ],
1127+ "type" : "function" ,
1128+ "function" : {
1129+ "name" : tool_call ["name" ],
1130+ "arguments" : json .dumps (tool_call ["input" ]),
1131+ },
1132+ }
1133+ # Preserve thought_signature for Gemini thinking models via CF Gateway.
1134+ # Required for multi-turn tool use — Gemini rejects requests where a
1135+ # function call in the history is missing its thought_signature.
1136+ if "thought_signature" in tool_call :
1137+ oa_call ["thought_signature" ] = tool_call ["thought_signature" ]
1138+ openai_tool_calls .append (oa_call )
11161139 msg : dict [str , Any ] = {
11171140 "role" : "assistant" ,
11181141 "content" : content if isinstance (content , str ) else None ,
@@ -2046,15 +2069,23 @@ async def honcho_llm_call_inner(
20462069 tool_calls_list : list [dict [str , Any ]] = []
20472070 if response .choices [0 ].message .tool_calls : # pyright: ignore
20482071 for tool_call in response .choices [0 ].message .tool_calls : # pyright: ignore
2049- tool_calls_list .append (
2050- {
2051- "id" : tool_call .id , # pyright: ignore
2052- "name" : tool_call .function .name , # pyright: ignore
2053- "input" : json .loads (tool_call .function .arguments ) # pyright: ignore
2054- if tool_call .function .arguments # pyright: ignore
2055- else {},
2056- }
2072+ call_data : dict [str , Any ] = {
2073+ "id" : tool_call .id , # pyright: ignore
2074+ "name" : tool_call .function .name , # pyright: ignore
2075+ "input" : json .loads (tool_call .function .arguments ) # pyright: ignore
2076+ if tool_call .function .arguments # pyright: ignore
2077+ else {},
2078+ }
2079+ # Preserve thought_signature for Gemini thinking models via CF
2080+ # Gateway — required for multi-turn tool use replay.
2081+ thought_sig = getattr (tool_call , "thought_signature" , None ) or ( # pyright: ignore
2082+ tool_call .model_extra .get ("thought_signature" ) # pyright: ignore
2083+ if getattr (tool_call , "model_extra" , None ) # pyright: ignore
2084+ else None
20572085 )
2086+ if thought_sig :
2087+ call_data ["thought_signature" ] = thought_sig
2088+ tool_calls_list .append (call_data )
20582089
20592090 cache_creation , cache_read = extract_openai_cache_tokens (usage )
20602091 return HonchoLLMCallResponse (
0 commit comments