2121
2222from src .config import ConfiguredModelSettings , ModelConfig
2323from src .exceptions import ValidationException
24+ from src .telemetry .llm_call_metrics import (
25+ finalize_success ,
26+ mark_max_iterations ,
27+ observe_llm_call ,
28+ )
2429from src .telemetry .logging import conditional_observe
2530from src .telemetry .reasoning_traces import log_reasoning_trace
2631
@@ -193,6 +198,11 @@ async def honcho_llm_call(
193198 # tenacity uses 1-indexed attempts.
194199 current_attempt .set (1 )
195200
201+ # Captures the AttemptPlan that produced the most recent (and on success,
202+ # the winning) call so observability can label by the model that actually
203+ # answered — primary on early attempts, backup on the final retry.
204+ last_plan : dict [str , AttemptPlan | None ] = {"value" : None }
205+
196206 def _get_attempt_plan () -> AttemptPlan :
197207 plan = plan_attempt (
198208 runtime_model_config = runtime_model_config ,
@@ -201,6 +211,7 @@ def _get_attempt_plan() -> AttemptPlan:
201211 call_thinking_budget_tokens = thinking_budget_tokens ,
202212 call_reasoning_effort = reasoning_effort ,
203213 )
214+ last_plan ["value" ] = plan
204215 update_current_langfuse_observation (
205216 plan .provider ,
206217 plan .model ,
@@ -304,11 +315,92 @@ def _trace_stop_seqs() -> list[str] | None:
304315 stop_seqs if stop_seqs is not None else runtime_model_config .stop_sequences
305316 )
306317
307- # Tool-less path: call once and return.
308- if not tools or not tool_executor :
309- result : (
310- HonchoLLMCallResponse [Any ] | AsyncIterator [HonchoLLMCallStreamChunk ]
311- ) = await decorated ()
318+ with observe_llm_call (
319+ track_name = track_name ,
320+ trace_name = trace_name ,
321+ runtime_model_config = runtime_model_config ,
322+ ) as obs_state :
323+ # Tool-less path: call once and return.
324+ if not tools or not tool_executor :
325+ result : (
326+ HonchoLLMCallResponse [Any ] | AsyncIterator [HonchoLLMCallStreamChunk ]
327+ ) = await decorated ()
328+ response_for_metrics = (
329+ result if isinstance (result , HonchoLLMCallResponse ) else None
330+ )
331+ winning = last_plan ["value" ]
332+ finalize_success (
333+ obs_state ,
334+ response = response_for_metrics ,
335+ final_provider = str (winning .provider ) if winning else None ,
336+ final_model = winning .model if winning else None ,
337+ attempts = current_attempt .get (),
338+ iterations = None ,
339+ has_backup = runtime_model_config .fallback is not None ,
340+ )
341+ if trace_name and isinstance (result , HonchoLLMCallResponse ):
342+ log_reasoning_trace (
343+ task_type = trace_name ,
344+ model_config = runtime_model_config ,
345+ prompt = prompt ,
346+ response = result ,
347+ max_tokens = max_tokens ,
348+ thinking_budget_tokens = _trace_thinking_budget (),
349+ reasoning_effort = _trace_reasoning_effort (),
350+ json_mode = json_mode ,
351+ stop_seqs = _trace_stop_seqs (),
352+ messages = messages ,
353+ )
354+ return result
355+
356+ # execute_tool_loop raises ValidationException on out-of-range
357+ # max_tool_iterations; fail-fast is cheaper than silent clamping here.
358+ result = await execute_tool_loop (
359+ prompt = prompt ,
360+ max_tokens = max_tokens ,
361+ messages = messages ,
362+ tools = tools ,
363+ tool_choice = tool_choice ,
364+ tool_executor = tool_executor ,
365+ max_tool_iterations = max_tool_iterations ,
366+ response_model = response_model ,
367+ json_mode = json_mode ,
368+ temperature = temperature ,
369+ stop_seqs = stop_seqs ,
370+ verbosity = verbosity ,
371+ enable_retry = enable_retry ,
372+ retry_attempts = retry_attempts ,
373+ max_input_tokens = max_input_tokens ,
374+ get_attempt_plan = _get_attempt_plan ,
375+ before_retry_callback = before_retry_callback ,
376+ stream_final = stream_final_only ,
377+ iteration_callback = iteration_callback ,
378+ track_name = track_name ,
379+ trace_name = trace_name ,
380+ )
381+ response_for_metrics = (
382+ result if isinstance (result , HonchoLLMCallResponse ) else None
383+ )
384+ winning = last_plan ["value" ]
385+ iterations = (
386+ response_for_metrics .iterations
387+ if response_for_metrics
388+ else (getattr (result , "iterations" , None ))
389+ )
390+ finalize_success (
391+ obs_state ,
392+ response = response_for_metrics ,
393+ final_provider = str (winning .provider ) if winning else None ,
394+ final_model = winning .model if winning else None ,
395+ attempts = current_attempt .get (),
396+ iterations = iterations ,
397+ has_backup = runtime_model_config .fallback is not None ,
398+ )
399+ if response_for_metrics is not None and getattr (
400+ response_for_metrics , "hit_max_iterations" , False
401+ ):
402+ mark_max_iterations (obs_state , iterations or max_tool_iterations )
403+
312404 if trace_name and isinstance (result , HonchoLLMCallResponse ):
313405 log_reasoning_trace (
314406 task_type = trace_name ,
@@ -324,43 +416,5 @@ def _trace_stop_seqs() -> list[str] | None:
324416 )
325417 return result
326418
327- # execute_tool_loop raises ValidationException on out-of-range
328- # max_tool_iterations; fail-fast is cheaper than silent clamping here.
329- result = await execute_tool_loop (
330- prompt = prompt ,
331- max_tokens = max_tokens ,
332- messages = messages ,
333- tools = tools ,
334- tool_choice = tool_choice ,
335- tool_executor = tool_executor ,
336- max_tool_iterations = max_tool_iterations ,
337- response_model = response_model ,
338- json_mode = json_mode ,
339- temperature = temperature ,
340- stop_seqs = stop_seqs ,
341- verbosity = verbosity ,
342- enable_retry = enable_retry ,
343- retry_attempts = retry_attempts ,
344- max_input_tokens = max_input_tokens ,
345- get_attempt_plan = _get_attempt_plan ,
346- before_retry_callback = before_retry_callback ,
347- stream_final = stream_final_only ,
348- iteration_callback = iteration_callback ,
349- )
350- if trace_name and isinstance (result , HonchoLLMCallResponse ):
351- log_reasoning_trace (
352- task_type = trace_name ,
353- model_config = runtime_model_config ,
354- prompt = prompt ,
355- response = result ,
356- max_tokens = max_tokens ,
357- thinking_budget_tokens = _trace_thinking_budget (),
358- reasoning_effort = _trace_reasoning_effort (),
359- json_mode = json_mode ,
360- stop_seqs = _trace_stop_seqs (),
361- messages = messages ,
362- )
363- return result
364-
365419
366420__all__ = ["honcho_llm_call" ]
0 commit comments