@@ -621,6 +621,180 @@ def get_injected_authorizer_data(event, is_http_api) -> dict:
621621 logger .debug ("Failed to check if invocated by an authorizer. error %s" , e )
622622
623623
624+ def is_durable_execution_replay (event ):
625+ """
626+ Check if this Lambda invocation is a durable execution replay.
627+
628+ A replay occurs when there are existing operations in InitialExecutionState,
629+ meaning this invocation is resuming from a previous checkpoint rather than
630+ starting fresh.
631+
632+ For replay invocations, we should skip creating inferred spans because:
633+ - The trace context is being continued from the checkpoint
634+ - Creating an inferred span would create a duplicate
635+
636+ Returns:
637+ True if this is a replay invocation (should skip inferred span)
638+ False if this is first invocation or not a durable execution
639+ """
640+ if not isinstance (event , dict ):
641+ return False
642+
643+ if "DurableExecutionArn" not in event :
644+ return False
645+
646+ initial_state = event .get ("InitialExecutionState" , {})
647+ operations = initial_state .get ("Operations" , [])
648+
649+ # The SDK always includes the EXECUTION operation itself (1 operation on first invocation).
650+ # A replay has >1 operations (the EXECUTION + previously completed operations).
651+ # This aligns with the SDK's ReplayStatus logic in execution.py.
652+ is_replay = len (operations ) > 1
653+
654+ if is_replay :
655+ print (f"[DD-DURABLE] Detected replay invocation with { len (operations )} existing operations" )
656+ else :
657+ print (f"[DD-DURABLE] Detected first invocation ({ len (operations )} operations)" )
658+
659+ return is_replay
660+
661+
662+ def extract_context_from_durable_execution (event , lambda_context ):
663+ """
664+ Extract Datadog trace context from AWS Lambda Durable Execution event.
665+
666+ Looks for extra trace context checkpoints created by the dd-trace plugin.
667+ These are STEP operations with Name="_dd_trace_context" that store trace
668+ headers in their StepDetails.Result payload. Customer operation payloads
669+ are never read or modified.
670+
671+ Scans operations in reverse to find the LAST trace checkpoint, which
672+ corresponds to the most recently completed customer operation. This gives
673+ proper parent chaining: each invocation's root span is parented to the
674+ last operation span from the previous invocation.
675+ """
676+ try :
677+ if not isinstance (event , dict ):
678+ return None
679+
680+ if "DurableExecutionArn" not in event or "InitialExecutionState" not in event :
681+ return None
682+
683+ print ("[DD-DURABLE] Detected AWS Lambda Durable Execution event" )
684+
685+ initial_state = event .get ("InitialExecutionState" , {})
686+ operations = initial_state .get ("Operations" , [])
687+
688+ print (f"[DD-DURABLE] Found { len (operations )} operations in InitialExecutionState" )
689+
690+ # Scan in reverse to find the LAST trace context checkpoint
691+ # (corresponds to the most recently completed customer operation)
692+ for idx in range (len (operations ) - 1 , - 1 , - 1 ):
693+ operation = operations [idx ]
694+ op_name = operation .get ("Name" )
695+
696+ if op_name != "_dd_trace_context" :
697+ continue
698+
699+ operation_id = operation .get ("Id" )
700+ print (f"[DD-DURABLE] Found trace checkpoint: id={ operation_id } , index={ idx } " )
701+
702+ # Trace context is in StepDetails.Result (standard STEP format)
703+ step_details = operation .get ("StepDetails" , {})
704+ payload_str = step_details .get ("Result" )
705+
706+ if not payload_str :
707+ print (f"[DD-DURABLE] Trace checkpoint { operation_id } has no Result, skipping" )
708+ continue
709+
710+ try :
711+ payload = json .loads (payload_str )
712+ if not isinstance (payload , dict ):
713+ print (f"[DD-DURABLE] Trace checkpoint payload is not a dict: { type (payload )} " )
714+ continue
715+
716+ trace_id = payload .get ("x-datadog-trace-id" )
717+ span_id = payload .get ("x-datadog-parent-id" )
718+
719+ if trace_id and span_id :
720+ # Use HTTPPropagator to restore full context including
721+ # baggage, _dd.p.* tags, origin, and sampling priority
722+ context = propagator .extract (payload )
723+ if context and context .trace_id :
724+ print (f"[DD-DURABLE] Extracted trace context from trace checkpoint { operation_id } " )
725+ print (f"[DD-DURABLE] trace_id={ trace_id } , span_id={ span_id } , headers={ list (payload .keys ())} " )
726+ logger .debug (
727+ "Extracted Datadog trace context from trace checkpoint %s: %s" ,
728+ operation_id ,
729+ context ,
730+ )
731+ return context
732+ except (json .JSONDecodeError , TypeError , ValueError ) as e :
733+ print (f"[DD-DURABLE] Failed to parse trace checkpoint payload: { e } " )
734+ logger .debug ("Failed to parse trace checkpoint payload: %s" , e )
735+ continue
736+
737+ print ("[DD-DURABLE] No trace context checkpoints found in operations" )
738+ except Exception as e :
739+ logger .debug ("Failed to extract trace context from durable execution: %s" , e )
740+
741+ return None
742+
743+
744+ def create_durable_execution_root_span (event ):
745+ """
746+ Create the durable execution root span on the FIRST invocation only.
747+
748+ Component 1 & 4 of extracheckpoint trace propagation:
749+ - First invocation (no checkpoint context): creates root span, returns it
750+ - Subsequent invocations (checkpoint context found): returns None
751+ (context already activated by extract_context_from_durable_execution,
752+ no need to recreate root span)
753+
754+ Returns the root span (caller must call span.finish() when invocation ends),
755+ or None if not a durable execution or if this is a replay.
756+ """
757+ print (f"[DD-DURABLE] create_durable_execution_root_span called, event type={ type (event ).__name__ } " )
758+ try :
759+ if not isinstance (event , dict ):
760+ return None
761+
762+ execution_arn = event .get ("DurableExecutionArn" )
763+ has_initial_state = "InitialExecutionState" in event
764+ if not execution_arn or not has_initial_state :
765+ return None
766+
767+ # Component 4: On replay, context is already activated from checkpoint.
768+ # Don't recreate root span — it was already emitted in a prior invocation.
769+ if is_durable_execution_replay (event ):
770+ print ("[DD-DURABLE] Replay invocation — skipping root span creation (context from checkpoint)" )
771+ return None
772+
773+ # Component 1: First invocation — create new root span
774+ service_name = os .environ .get ("DD_DURABLE_EXECUTION_SERVICE" ) or "aws.durable-execution"
775+ resource = execution_arn .split (":" )[- 1 ] if ":" in execution_arn else execution_arn
776+
777+ span = tracer .trace (
778+ "aws.durable-execution" ,
779+ service = service_name ,
780+ resource = resource ,
781+ span_type = "serverless" ,
782+ )
783+
784+ if span :
785+ span .set_tag ("durable.execution_arn" , execution_arn )
786+ print (f"[DD-DURABLE] Created root span: trace_id={ span .trace_id } , span_id={ span .span_id } , resource={ resource } " )
787+ else :
788+ print ("[DD-DURABLE] tracer.trace() returned None" )
789+
790+ return span
791+
792+ except Exception as e :
793+ logger .debug ("Failed to create durable execution root span: %s" , e )
794+ print (f"[DD-DURABLE] Failed to create root span: { e } " )
795+ return None
796+
797+
624798def extract_dd_trace_context (
625799 event , lambda_context , extractor = None , decode_authorizer_context : bool = True
626800):
@@ -634,6 +808,16 @@ def extract_dd_trace_context(
634808 trace_context_source = None
635809 event_source = parse_event_source (event )
636810
811+ # Check for AWS Lambda Durable Execution events first (before other checks)
812+ # This ensures trace context is properly continued across durable invocations
813+ durable_context = extract_context_from_durable_execution (event , lambda_context )
814+ if _is_context_complete (durable_context ):
815+ logger .debug ("Extracted Datadog trace context from durable execution" )
816+ dd_trace_context = durable_context
817+ trace_context_source = TraceContextSource .EVENT
818+ logger .debug ("extracted dd trace context from durable execution: %s" , dd_trace_context )
819+ return dd_trace_context , trace_context_source , event_source
820+
637821 if extractor is not None :
638822 context = extract_context_custom_extractor (extractor , event , lambda_context )
639823 elif isinstance (event , (set , dict )) and "request" in event :
0 commit comments