@@ -495,6 +495,68 @@ def test_records_error_on_span(self, _reset_otel_globals):
495495 assert s .attributes [ATTR_ERROR_TYPE ] == "ValueError"
496496 assert s .status .status_code == trace .StatusCode .ERROR
497497
498+ def test_execute_tool_error_from_define_tool_handler (self , _reset_otel_globals ):
499+ """Verify that errors from @define_tool handlers propagate and get recorded on spans.
500+
501+ This validates the fix where @define_tool no longer catches exceptions internally,
502+ allowing _execute_tool_call to record error.type and ERROR status on the
503+ execute_tool span — consistent with Node.js, .NET, and Go SDKs.
504+ """
505+ from copilot import ToolInvocation , define_tool
506+
507+ exporter , reader , tp , mp = _get_exporter_and_reader (_reset_otel_globals )
508+ telemetry = _make_telemetry (tracer_provider = tp , meter_provider = mp )
509+
510+ # Use zero-param handler signature to avoid Pydantic + from __future__ import annotations issue
511+ @define_tool (description = "A tool that always fails" )
512+ def failing_tool () -> str :
513+ raise RuntimeError ("deliberate failure" )
514+
515+ # Start an execute_tool span (as _execute_tool_call would)
516+ span = telemetry .start_execute_tool_span (
517+ tool_name = "failing_tool" ,
518+ tool_call_id = "tc-fail" ,
519+ description = "A tool that always fails" ,
520+ arguments = {},
521+ )
522+
523+ # Simulate _execute_tool_call: invoke the handler, catch the error, record it
524+ invocation : ToolInvocation = {
525+ "session_id" : "s1" ,
526+ "tool_call_id" : "tc-fail" ,
527+ "tool_name" : "failing_tool" ,
528+ "arguments" : {},
529+ }
530+ operation_error = None
531+ try :
532+ import asyncio
533+ loop = asyncio .new_event_loop ()
534+ loop .run_until_complete (failing_tool .handler (invocation ))
535+ loop .close ()
536+ except Exception as exc :
537+ operation_error = exc
538+ telemetry .record_error (span , exc )
539+
540+ span .end ()
541+
542+ # The exception MUST have propagated (not swallowed by @define_tool)
543+ assert operation_error is not None , "@define_tool must not catch handler exceptions"
544+ assert isinstance (operation_error , RuntimeError )
545+
546+ # The span MUST have ERROR status and error.type
547+ s = exporter .get_finished_spans ()[0 ]
548+ assert s .status .status_code == trace .StatusCode .ERROR
549+ assert s .attributes [ATTR_ERROR_TYPE ] == "RuntimeError"
550+
551+ # Operation duration metric should include error.type
552+ telemetry .record_operation_duration (
553+ 0.1 , None , None , "github" , None , None , operation_error , OP_EXECUTE_TOOL
554+ )
555+ dps = _get_metric_data_points (reader , METRIC_OPERATION_DURATION )
556+ assert len (dps ) > 0
557+ error_dp = [dp for dp in dps if dp .attributes .get (ATTR_ERROR_TYPE ) == "RuntimeError" ]
558+ assert len (error_dp ) > 0 , "duration metric includes error.type for failed tool"
559+
498560
499561# ---------------------------------------------------------------------------
500562# Tests: Tool result recording
0 commit comments