Skip to content

Commit dbf5a20

Browse files
Preserve durable trace context in SQL history (#306)
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 91609c0 commit dbf5a20

3 files changed

Lines changed: 1106 additions & 9 deletions

File tree

src/DurableTask.SqlServer/SqlUtils.cs

Lines changed: 298 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,21 @@ static class SqlUtils
2424
{
2525
static readonly Random random = new Random();
2626
static readonly char[] TraceContextSeparators = new char[] { '\n' };
27+
28+
// Durable Task MSSQL provider's W3C tracestate vendor key. The extended trace-context
29+
// fields (durable orchestration span identity, sub-orchestration client span id) are
30+
// carried as values of this single tracestate entry so the on-the-wire payload is
31+
// rolling-upgrade safe: older workers preserve the entire tracestate string (W3C spec
32+
// requires unknown vendor keys to be propagated unchanged), and newer workers know
33+
// to extract our fields out of it.
34+
const string TracestateVendorKey = "durabletask-mssql";
35+
const string TracestateVendorKeyEquals = TracestateVendorKey + "=";
36+
37+
// Field names inside the vendor key's value. Format: "id:...;span:...;client:..."
38+
const string VendorFieldId = "id";
39+
const string VendorFieldSpanId = "span";
40+
const string VendorFieldClientSpanId = "client";
41+
2742
const int MaxTagsPayloadSize = 8000;
2843

2944
public static string? GetStringOrNull(this DbDataReader reader, int columnIndex)
@@ -135,6 +150,7 @@ public static HistoryEvent GetHistoryEvent(this DbDataReader reader, bool isOrch
135150
{
136151
Input = GetPayloadText(reader),
137152
InstanceId = "", // Placeholder - shouldn't technically be needed (adding it requires a SQL schema change)
153+
ClientSpanId = GetSubOrchestrationClientSpanId(reader),
138154
Name = GetName(reader),
139155
Version = null,
140156
};
@@ -441,6 +457,24 @@ static DateTime GetUtcDateTime(DbDataReader reader, int ordinal)
441457

442458
internal static SqlString GetTraceContext(HistoryEvent e)
443459
{
460+
if (e is SubOrchestrationInstanceCreatedEvent subOrchestrationEvent)
461+
{
462+
if (string.IsNullOrEmpty(subOrchestrationEvent.ClientSpanId))
463+
{
464+
return SqlString.Null;
465+
}
466+
467+
// Wire format for SubOrchestrationInstanceCreated history rows:
468+
// "durabletask-mssql=client:<spanId>"
469+
// This is a single line — no traceparent, no newlines. SQL history queries return
470+
// the TraceContext column together with the row's EventType, and legacy readers
471+
// do not call GetTraceContext for SubOrchestrationInstanceCreated. The payload is
472+
// not projected into ExecutionStarted/EventRaised/TaskScheduled rows, so legacy
473+
// workers never interpret this vendor key as a traceparent.
474+
return new SqlString(
475+
BuildVendorKeyValue(id: null, spanId: null, clientSpanId: subOrchestrationEvent.ClientSpanId));
476+
}
477+
444478
if (e is not ISupportsDurableTraceContext eventWithTraceContext ||
445479
eventWithTraceContext.ParentTraceContext == null)
446480
{
@@ -449,18 +483,112 @@ internal static SqlString GetTraceContext(HistoryEvent e)
449483

450484
DistributedTraceContext traceContext = eventWithTraceContext.ParentTraceContext;
451485

452-
// We prefer a simple format instead of JSON because external callers may interact with this
453-
// data and we don't want to expose them to some internal JSON serialization format.
486+
// Wire format (rolling-upgrade safe, fully W3C-compatible):
487+
// line 1: traceparent (unchanged)
488+
// line 2: tracestate, optionally with our "durabletask-mssql=..." vendor key
489+
// prepended. The legacy reader uses
490+
// Split({'\n'}, count: 2, RemoveEmptyEntries) and assigns parts[1] to
491+
// TraceState wholesale, so the line MUST be a single W3C-valid tracestate
492+
// (no embedded newlines, no equals/comma except at the standard positions).
493+
//
494+
// The Id, SpanId, and (for sub-orch rows) ClientSpanId fields ride inside the
495+
// vendor key's value. W3C tracestate explicitly requires unknown vendor keys to
496+
// be preserved and propagated, so an older worker on a new row reads the entire
497+
// tracestate string (vendor key included), assigns it to Activity.TraceStateString,
498+
// and forwards it downstream untouched.
499+
//
500+
// Format of the vendor key value:
501+
// id:<durable-id>;span:<durable-spanid>[;client:<client-span-id>]
502+
// Field separator is ';' (allowed in W3C tracestate values).
454503
var sb = new StringBuilder(traceContext.TraceParent, capacity: 800);
455-
if (!string.IsNullOrEmpty(traceContext.TraceState))
504+
505+
bool hasId = !string.IsNullOrEmpty(traceContext.Id);
506+
bool hasSpanId = !string.IsNullOrEmpty(traceContext.SpanId);
507+
bool hasUserTraceState = !string.IsNullOrEmpty(traceContext.TraceState);
508+
509+
string? vendorValue = (hasId || hasSpanId)
510+
? BuildVendorKeyValue(traceContext.Id, traceContext.SpanId, clientSpanId: null)
511+
: null;
512+
513+
if (vendorValue != null || hasUserTraceState)
456514
{
457-
sb.Append('\n').Append(traceContext.TraceState);
515+
sb.Append('\n');
516+
if (vendorValue != null)
517+
{
518+
sb.Append(vendorValue);
519+
if (hasUserTraceState)
520+
{
521+
// Multiple tracestate entries are comma-separated per W3C spec.
522+
sb.Append(',').Append(traceContext.TraceState);
523+
}
524+
}
525+
else
526+
{
527+
sb.Append(traceContext.TraceState);
528+
}
458529
}
459530

460531
return sb.ToString();
461532
}
462533

463-
static DistributedTraceContext? GetTraceContext(DbDataReader reader)
534+
/// <summary>
535+
/// Builds the value portion of the "durabletask-mssql=..." W3C tracestate vendor key.
536+
/// Output looks like one of:
537+
/// durabletask-mssql=id:...;span:...
538+
/// durabletask-mssql=id:...;span:...;client:...
539+
/// durabletask-mssql=client:...
540+
/// </summary>
541+
static string BuildVendorKeyValue(string? id, string? spanId, string? clientSpanId)
542+
{
543+
var sb = new StringBuilder(TracestateVendorKeyEquals, capacity: 200);
544+
bool first = true;
545+
546+
void AppendField(string fieldName, string value)
547+
{
548+
if (!first)
549+
{
550+
sb.Append(';');
551+
}
552+
553+
sb.Append(fieldName).Append(':').Append(value);
554+
first = false;
555+
}
556+
557+
if (!string.IsNullOrEmpty(id))
558+
{
559+
AppendField(VendorFieldId, id!);
560+
}
561+
562+
if (!string.IsNullOrEmpty(spanId))
563+
{
564+
AppendField(VendorFieldSpanId, spanId!);
565+
}
566+
567+
if (!string.IsNullOrEmpty(clientSpanId))
568+
{
569+
AppendField(VendorFieldClientSpanId, clientSpanId!);
570+
}
571+
572+
return sb.ToString();
573+
}
574+
575+
/// <summary>
576+
/// Parsed result of a TraceContext column payload. The on-the-wire format mirrors the
577+
/// legacy "traceparent\ntracestate" layout, with the extended Durable Task MSSQL fields
578+
/// (Id, SpanId, ClientSpanId) carried inside the tracestate as a single W3C vendor key
579+
/// named "durabletask-mssql". Centralizing parsing here keeps every reader in lock-step
580+
/// with the writer's wire-format contract.
581+
/// </summary>
582+
struct ParsedTraceContext
583+
{
584+
public string? TraceParent { get; set; }
585+
public string? TraceState { get; set; }
586+
public string? Id { get; set; }
587+
public string? SpanId { get; set; }
588+
public string? ClientSpanId { get; set; }
589+
}
590+
591+
static ParsedTraceContext? ParseTraceContext(DbDataReader reader)
464592
{
465593
int ordinal = reader.GetOrdinal("TraceContext");
466594
if (reader.IsDBNull(ordinal))
@@ -474,18 +602,179 @@ internal static SqlString GetTraceContext(HistoryEvent e)
474602
return null;
475603
}
476604

605+
// Use the same split semantics as the pre-PR reader so all readers (old and new)
606+
// see identical (traceparent, tracestate) pairs for any given on-the-wire payload.
477607
string[] parts = text.Split(TraceContextSeparators, count: 2, StringSplitOptions.RemoveEmptyEntries);
478-
var traceContext = new DistributedTraceContext(traceParent: parts[0]);
479608

480-
if (parts.Length > 1)
609+
string? traceParent = null;
610+
string? rawTraceState = null;
611+
612+
if (parts.Length == 0)
613+
{
614+
return null;
615+
}
616+
else if (parts.Length == 1)
617+
{
618+
// Either a single-line "durabletask-mssql=client:..." payload (new sub-orchestration
619+
// wire format) or a one-line legacy payload that only contained a traceparent.
620+
if (parts[0].StartsWith(TracestateVendorKeyEquals, StringComparison.Ordinal))
621+
{
622+
rawTraceState = parts[0];
623+
}
624+
else
625+
{
626+
traceParent = parts[0];
627+
}
628+
}
629+
else
630+
{
631+
traceParent = parts[0];
632+
rawTraceState = parts[1];
633+
}
634+
635+
// Extract the durabletask-mssql vendor key from the tracestate, leaving any
636+
// user-supplied tracestate entries intact in the returned TraceState value.
637+
string? userTraceState = rawTraceState;
638+
string? id = null;
639+
string? spanId = null;
640+
string? clientSpanId = null;
641+
642+
if (!string.IsNullOrEmpty(rawTraceState))
643+
{
644+
userTraceState = ExtractVendorFields(
645+
rawTraceState!,
646+
out id,
647+
out spanId,
648+
out clientSpanId);
649+
}
650+
651+
return new ParsedTraceContext
652+
{
653+
TraceParent = traceParent,
654+
TraceState = string.IsNullOrEmpty(userTraceState) ? null : userTraceState,
655+
Id = id,
656+
SpanId = spanId,
657+
ClientSpanId = clientSpanId,
658+
};
659+
}
660+
661+
/// <summary>
662+
/// Scans a W3C tracestate string for the "durabletask-mssql=..." vendor key, splits its
663+
/// value into id/span/client fields, and returns the tracestate with that vendor key
664+
/// removed (so the caller can hand the unmodified user tracestate downstream).
665+
/// </summary>
666+
static string? ExtractVendorFields(
667+
string tracestate,
668+
out string? id,
669+
out string? spanId,
670+
out string? clientSpanId)
671+
{
672+
id = null;
673+
spanId = null;
674+
clientSpanId = null;
675+
676+
// W3C tracestate entries are comma-separated. Use a List<string> so the order and
677+
// optional whitespace of user-supplied entries are preserved when reconstructing the
678+
// user tracestate.
679+
string[] entries = tracestate.Split(',');
680+
List<string>? userEntries = null;
681+
bool foundVendorKey = false;
682+
683+
for (int i = 0; i < entries.Length; i++)
684+
{
685+
string entry = entries[i];
686+
string trimmedEntry = entry.Trim();
687+
if (trimmedEntry.Length == 0)
688+
{
689+
continue;
690+
}
691+
692+
if (trimmedEntry.StartsWith(TracestateVendorKeyEquals, StringComparison.Ordinal))
693+
{
694+
foundVendorKey = true;
695+
string vendorValue = trimmedEntry.Substring(TracestateVendorKeyEquals.Length);
696+
ParseVendorFields(vendorValue, out id, out spanId, out clientSpanId);
697+
}
698+
else
699+
{
700+
userEntries ??= new List<string>(entries.Length);
701+
userEntries.Add(entry);
702+
}
703+
}
704+
705+
if (!foundVendorKey)
706+
{
707+
return tracestate;
708+
}
709+
710+
if (userEntries == null)
711+
{
712+
return null;
713+
}
714+
715+
userEntries[0] = userEntries[0].TrimStart();
716+
userEntries[userEntries.Count - 1] = userEntries[userEntries.Count - 1].TrimEnd();
717+
return string.Join(",", userEntries);
718+
}
719+
720+
static void ParseVendorFields(string vendorValue, out string? id, out string? spanId, out string? clientSpanId)
721+
{
722+
id = null;
723+
spanId = null;
724+
clientSpanId = null;
725+
726+
foreach (string field in vendorValue.Split(';'))
727+
{
728+
int colonIndex = field.IndexOf(':');
729+
if (colonIndex <= 0)
730+
{
731+
continue;
732+
}
733+
734+
string fieldName = field.Substring(0, colonIndex);
735+
string fieldValue = field.Substring(colonIndex + 1);
736+
737+
if (string.Equals(fieldName, VendorFieldId, StringComparison.Ordinal))
738+
{
739+
id = fieldValue;
740+
}
741+
else if (string.Equals(fieldName, VendorFieldSpanId, StringComparison.Ordinal))
742+
{
743+
spanId = fieldValue;
744+
}
745+
else if (string.Equals(fieldName, VendorFieldClientSpanId, StringComparison.Ordinal))
746+
{
747+
clientSpanId = fieldValue;
748+
}
749+
}
750+
}
751+
752+
static DistributedTraceContext? GetTraceContext(DbDataReader reader)
753+
{
754+
ParsedTraceContext? parsed = ParseTraceContext(reader);
755+
if (parsed == null || string.IsNullOrEmpty(parsed.Value.TraceParent))
481756
{
482-
traceContext.TraceState = parts[1];
757+
// No traceparent means this row carries only sub-orchestration-specific data
758+
// (e.g. durabletask-mssql=client:...) which is not a DistributedTraceContext.
759+
return null;
483760
}
484761

485-
traceContext.ActivityStartTime = GetTimestamp(reader);
762+
ParsedTraceContext value = parsed.Value;
763+
var traceContext = new DistributedTraceContext(traceParent: value.TraceParent!)
764+
{
765+
TraceState = value.TraceState,
766+
Id = value.Id,
767+
SpanId = value.SpanId,
768+
ActivityStartTime = GetTimestamp(reader),
769+
};
486770
return traceContext;
487771
}
488772

773+
static string? GetSubOrchestrationClientSpanId(DbDataReader reader)
774+
{
775+
return ParseTraceContext(reader)?.ClientSpanId;
776+
}
777+
489778
internal static IDictionary<string, string>? GetTags(DbDataReader reader)
490779
{
491780
int ordinal = reader.GetOrdinal("Tags");

0 commit comments

Comments
 (0)