Skip to content

Commit f040916

Browse files
joeyzhao2018claude
andauthored
durable: extract trace context from checkpoints (#818)
* durable: extract trace context from checkpoints and input payload * reorganize the code * further simplify * separate out the cross-execution case * clean up tests * format * the extraction part of the simplification done by dd-trace-py side injection * format * format * reformat again * update .flake8 to be compatible with black per black doc https://black.readthedocs.io/en/stable/guides/using_black_with_other_tools.html#flake8 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * use regular propagator.extract * move the durable function related extraction logic to durable.py * adjust the sequence of extraction logic so that event headers/request takes higher priority than the durable checkpoints --------- Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
1 parent 67ce9b6 commit f040916

4 files changed

Lines changed: 103 additions & 1 deletion

File tree

.flake8

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
[flake8]
2-
max-line-length = 100
2+
max-line-length = 100
3+
extend-ignore = E203,E701

datadog_lambda/durable.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,12 @@
44
# Copyright 2019 Datadog, Inc.
55
import logging
66
import re
7+
import ujson as json
78

89
logger = logging.getLogger(__name__)
910

11+
_TRACE_CHECKPOINT_PREFIX = "_datadog_"
12+
1013

1114
def _parse_durable_execution_arn(arn):
1215
"""
@@ -56,6 +59,62 @@ def extract_durable_function_tags(event):
5659
VALID_DURABLE_STATUSES = {"SUCCEEDED", "FAILED", "PENDING"}
5760

5861

62+
def _extract_context_from_durable_checkpoint(operation):
63+
# Checkpoint data is written by the dd-trace-py in Datadog style
64+
# (x-datadog-* headers). Extraction goes through the standard
65+
# propagator.extract path, which honors DD_TRACE_PROPAGATION_STYLE_EXTRACT.
66+
# The default extract list (datadog, tracecontext, baggage) already
67+
# includes datadog. Customers who override the extract list MUST keep
68+
# datadog in it.
69+
if not isinstance(operation, dict):
70+
return None
71+
72+
step_details = operation.get("StepDetails")
73+
if not isinstance(step_details, dict):
74+
return None
75+
76+
result = step_details.get("Result")
77+
if isinstance(result, str):
78+
try:
79+
result = json.loads(result)
80+
except Exception:
81+
return None
82+
83+
if not isinstance(result, dict):
84+
return None
85+
86+
from datadog_lambda.tracing import propagator
87+
88+
return propagator.extract(result)
89+
90+
91+
def extract_context_from_durable_execution(event):
92+
operations = event.get("InitialExecutionState", {}).get("Operations")
93+
if isinstance(operations, dict):
94+
operations = list(operations.values())
95+
if not isinstance(operations, list) or not operations:
96+
return None
97+
98+
highest = -1
99+
best_operation = None
100+
for operation in operations:
101+
if not isinstance(operation, dict):
102+
continue
103+
name = operation.get("Name")
104+
if not isinstance(name, str) or not name.startswith(_TRACE_CHECKPOINT_PREFIX):
105+
continue
106+
suffix = name[len(_TRACE_CHECKPOINT_PREFIX) :]
107+
try:
108+
number = int(suffix)
109+
except (TypeError, ValueError):
110+
continue
111+
if number > highest:
112+
highest = number
113+
best_operation = operation
114+
115+
return _extract_context_from_durable_checkpoint(best_operation)
116+
117+
59118
def extract_durable_execution_status(response, event):
60119
if not isinstance(event, dict) or "DurableExecutionArn" not in event:
61120
return None

datadog_lambda/tracing.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
EventTypes,
4545
EventSubtypes,
4646
)
47+
from datadog_lambda.durable import extract_context_from_durable_execution
4748

4849
if config.otel_enabled:
4950
from opentelemetry.trace import set_tracer_provider
@@ -633,6 +634,7 @@ def extract_dd_trace_context(
633634
global dd_trace_context
634635
trace_context_source = None
635636
event_source = parse_event_source(event)
637+
context = None
636638

637639
if extractor is not None:
638640
context = extract_context_custom_extractor(extractor, event, lambda_context)
@@ -654,6 +656,8 @@ def extract_dd_trace_context(
654656
context = extract_context_from_kinesis_event(event, lambda_context)
655657
elif event_source.equals(EventTypes.STEPFUNCTIONS):
656658
context = extract_context_from_step_functions(event, lambda_context)
659+
elif isinstance(event, dict) and "DurableExecutionArn" in event:
660+
context = extract_context_from_durable_execution(event)
657661
else:
658662
context = extract_context_from_lambda_context(lambda_context)
659663

tests/test_tracing.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,44 @@ def test_with_w3c_trace_headers(self):
394394
headers, {"headers": headers}
395395
)
396396

397+
@with_trace_propagation_style("datadog")
398+
def test_extracts_durable_trace_context_from_latest_checkpoint_operation_map(self):
399+
lambda_ctx = get_mock_context()
400+
headers = {
401+
TraceHeader.TRACE_ID: "123",
402+
TraceHeader.PARENT_ID: "321",
403+
TraceHeader.SAMPLING_PRIORITY: "1",
404+
}
405+
406+
event = {
407+
"DurableExecutionArn": "arn:aws:lambda:us-east-2:123456789012:function:demo:1/durable-execution/demo/abc",
408+
"CheckpointToken": "token",
409+
"InitialExecutionState": {
410+
"Operations": {
411+
"0": {"Type": "EXECUTION"},
412+
"1": {
413+
"Name": "_datadog_0",
414+
"StepDetails": {
415+
"Result": {
416+
TraceHeader.TRACE_ID: "999",
417+
TraceHeader.PARENT_ID: "888",
418+
TraceHeader.SAMPLING_PRIORITY: "1",
419+
}
420+
},
421+
},
422+
"2": {
423+
"Name": "_datadog_1",
424+
"StepDetails": {"Result": headers},
425+
},
426+
}
427+
},
428+
}
429+
430+
ctx, source, _ = extract_dd_trace_context(event, lambda_ctx)
431+
432+
self.assertEqual(source, "event")
433+
self.assertEqual(ctx, Context(trace_id=123, span_id=321, sampling_priority=1))
434+
397435
@with_trace_propagation_style("datadog")
398436
def test_with_extractor_function(self):
399437
def extractor_foo(event, context):

0 commit comments

Comments
 (0)