@@ -495,6 +495,69 @@ def test_records_error_on_span(self, _reset_otel_globals):
495495 assert s .attributes [ATTR_ERROR_TYPE ] == "ValueError"
496496 assert s .status .status_code == trace .StatusCode .ERROR
497497
498+ def test_execute_tool_error_from_define_tool_handler (self , _reset_otel_globals ):
499+ """Verify that errors from @define_tool handlers propagate and get recorded on spans.
500+
501+ This validates the fix where @define_tool no longer catches exceptions internally,
502+ allowing _execute_tool_call to record error.type and ERROR status on the
503+ execute_tool span — consistent with Node.js, .NET, and Go SDKs.
504+ """
505+ from copilot import ToolInvocation , define_tool
506+
507+ exporter , reader , tp , mp = _get_exporter_and_reader (_reset_otel_globals )
508+ telemetry = _make_telemetry (tracer_provider = tp , meter_provider = mp )
509+
510+ # Use zero-param handler signature to avoid Pydantic + from __future__ import annotations issue
511+ @define_tool (description = "A tool that always fails" )
512+ def failing_tool () -> str :
513+ raise RuntimeError ("deliberate failure" )
514+
515+ # Start an execute_tool span (as _execute_tool_call would)
516+ span = telemetry .start_execute_tool_span (
517+ tool_name = "failing_tool" ,
518+ tool_call_id = "tc-fail" ,
519+ description = "A tool that always fails" ,
520+ arguments = {},
521+ )
522+
523+ # Simulate _execute_tool_call: invoke the handler, catch the error, record it
524+ invocation : ToolInvocation = {
525+ "session_id" : "s1" ,
526+ "tool_call_id" : "tc-fail" ,
527+ "tool_name" : "failing_tool" ,
528+ "arguments" : {},
529+ }
530+ operation_error = None
531+ try :
532+ import asyncio
533+
534+ loop = asyncio .new_event_loop ()
535+ loop .run_until_complete (failing_tool .handler (invocation ))
536+ loop .close ()
537+ except Exception as exc :
538+ operation_error = exc
539+ telemetry .record_error (span , exc )
540+
541+ span .end ()
542+
543+ # The exception MUST have propagated (not swallowed by @define_tool)
544+ assert operation_error is not None , "@define_tool must not catch handler exceptions"
545+ assert isinstance (operation_error , RuntimeError )
546+
547+ # The span MUST have ERROR status and error.type
548+ s = exporter .get_finished_spans ()[0 ]
549+ assert s .status .status_code == trace .StatusCode .ERROR
550+ assert s .attributes [ATTR_ERROR_TYPE ] == "RuntimeError"
551+
552+ # Operation duration metric should include error.type
553+ telemetry .record_operation_duration (
554+ 0.1 , None , None , "github" , None , None , operation_error , OP_EXECUTE_TOOL
555+ )
556+ dps = _get_metric_data_points (reader , METRIC_OPERATION_DURATION )
557+ assert len (dps ) > 0
558+ error_dp = [dp for dp in dps if dp .attributes .get (ATTR_ERROR_TYPE ) == "RuntimeError" ]
559+ assert len (error_dp ) > 0 , "duration metric includes error.type for failed tool"
560+
498561
499562# ---------------------------------------------------------------------------
500563# Tests: Tool result recording
0 commit comments