@@ -495,6 +495,70 @@ def test_records_error_on_span(self, _reset_otel_globals):
495495 assert s .attributes [ATTR_ERROR_TYPE ] == "ValueError"
496496 assert s .status .status_code == trace .StatusCode .ERROR
497497
498+ def test_execute_tool_error_from_define_tool_handler (self , _reset_otel_globals ):
499+ """Verify that errors from @define_tool handlers propagate and get recorded on spans.
500+
501+ This validates the fix where @define_tool no longer catches exceptions internally,
502+ allowing _execute_tool_call to record error.type and ERROR status on the
503+ execute_tool span — consistent with Node.js, .NET, and Go SDKs.
504+ """
505+ from copilot import ToolInvocation , define_tool
506+
507+ exporter , reader , tp , mp = _get_exporter_and_reader (_reset_otel_globals )
508+ telemetry = _make_telemetry (tracer_provider = tp , meter_provider = mp )
509+
510+ # Use zero-param handler signature to avoid Pydantic
511+ # + from __future__ import annotations issue
512+ @define_tool (description = "A tool that always fails" )
513+ def failing_tool () -> str :
514+ raise RuntimeError ("deliberate failure" )
515+
516+ # Start an execute_tool span (as _execute_tool_call would)
517+ span = telemetry .start_execute_tool_span (
518+ tool_name = "failing_tool" ,
519+ tool_call_id = "tc-fail" ,
520+ description = "A tool that always fails" ,
521+ arguments = {},
522+ )
523+
524+ # Simulate _execute_tool_call: invoke the handler, catch the error, record it
525+ invocation : ToolInvocation = {
526+ "session_id" : "s1" ,
527+ "tool_call_id" : "tc-fail" ,
528+ "tool_name" : "failing_tool" ,
529+ "arguments" : {},
530+ }
531+ operation_error = None
532+ try :
533+ import asyncio
534+
535+ loop = asyncio .new_event_loop ()
536+ loop .run_until_complete (failing_tool .handler (invocation ))
537+ loop .close ()
538+ except Exception as exc :
539+ operation_error = exc
540+ telemetry .record_error (span , exc )
541+
542+ span .end ()
543+
544+ # The exception MUST have propagated (not swallowed by @define_tool)
545+ assert operation_error is not None , "@define_tool must not catch handler exceptions"
546+ assert isinstance (operation_error , RuntimeError )
547+
548+ # The span MUST have ERROR status and error.type
549+ s = exporter .get_finished_spans ()[0 ]
550+ assert s .status .status_code == trace .StatusCode .ERROR
551+ assert s .attributes [ATTR_ERROR_TYPE ] == "RuntimeError"
552+
553+ # Operation duration metric should include error.type
554+ telemetry .record_operation_duration (
555+ 0.1 , None , None , "github" , None , None , operation_error , OP_EXECUTE_TOOL
556+ )
557+ dps = _get_metric_data_points (reader , METRIC_OPERATION_DURATION )
558+ assert len (dps ) > 0
559+ error_dp = [dp for dp in dps if dp .attributes .get (ATTR_ERROR_TYPE ) == "RuntimeError" ]
560+ assert len (error_dp ) > 0 , "duration metric includes error.type for failed tool"
561+
498562
499563# ---------------------------------------------------------------------------
500564# Tests: Tool result recording
0 commit comments