Skip to content

Commit 44c9b91

Browse files
committed
fixup
1 parent 915ea54 commit 44c9b91

3 files changed

Lines changed: 129 additions & 83 deletions

File tree

py/src/braintrust/logger.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,9 @@
7777
TRACEPARENT_HEADER,
7878
TRACESTATE_HEADER,
7979
PropagatedState,
80-
format_baggage,
8180
format_traceparent,
8281
get_header,
82+
merge_baggage,
8383
parse_baggage,
8484
parse_traceparent,
8585
)
@@ -2543,12 +2543,11 @@ def _inject_into_carrier(
25432543
if tracestate:
25442544
_set_header(carrier, TRACESTATE_HEADER, tracestate)
25452545

2546-
# Merge braintrust.parent into any existing baggage, preserving other keys.
2546+
# Merge braintrust.parent into any existing baggage. Other vendors' members
2547+
# are forwarded byte-for-byte (see merge_baggage) so we never rewrite their
2548+
# percent-encoding.
25472549
existing = get_header(carrier, BAGGAGE_HEADER)
2548-
entries = parse_baggage(existing) if existing else {}
2549-
if braintrust_parent:
2550-
entries[BRAINTRUST_PARENT_KEY] = braintrust_parent
2551-
baggage_value = format_baggage(entries)
2550+
baggage_value = merge_baggage(existing, braintrust_parent)
25522551
if baggage_value is not None:
25532552
_set_header(carrier, BAGGAGE_HEADER, baggage_value)
25542553

py/src/braintrust/propagation.py

Lines changed: 59 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import logging
1414
import re
1515
from typing import NamedTuple
16+
from urllib.parse import unquote
1617

1718

1819
__all__ = [
@@ -27,6 +28,7 @@
2728
"format_traceparent",
2829
"parse_baggage",
2930
"format_baggage",
31+
"merge_baggage",
3032
"get_header",
3133
]
3234

@@ -181,6 +183,48 @@ def format_baggage(entries):
181183
return ",".join(parts)
182184

183185

186+
def merge_baggage(existing, braintrust_parent):
187+
"""Merge a ``braintrust.parent`` value into an existing ``baggage`` header.
188+
189+
Unlike ``format_baggage(parse_baggage(...))``, this preserves every other
190+
vendor's baggage member byte-for-byte: their raw ``key=value`` substrings
191+
are forwarded exactly as received rather than decoded and re-encoded. This
192+
keeps Braintrust a transparent relay so we never silently rewrite another
193+
vendor's percent-encoding (e.g. ``path=a%2Fb`` must not become
194+
``path=a/b``).
195+
196+
Only the ``braintrust.parent`` member is (re)serialized, by us, from the
197+
``braintrust_parent`` argument. Any pre-existing ``braintrust.parent``
198+
member in ``existing`` is dropped in favor of the supplied value.
199+
200+
Returns the merged header value, or None if there is nothing to emit (so
201+
callers omit the header rather than emit an empty one).
202+
"""
203+
members = []
204+
if existing and isinstance(existing, str):
205+
capped = existing[:_MAX_BAGGAGE_LENGTH] if len(existing) > _MAX_BAGGAGE_LENGTH else existing
206+
for raw_member in capped.split(","):
207+
member = raw_member.strip()
208+
if not member or "=" not in member:
209+
continue
210+
# Identify the key (ignoring ';'-delimited properties) only to skip
211+
# any inbound braintrust.parent; everything else is forwarded raw.
212+
key_part = member.split(";", 1)[0].partition("=")[0]
213+
key = _percent_decode(key_part.strip())
214+
if key == BRAINTRUST_PARENT_KEY:
215+
continue
216+
members.append(member)
217+
218+
if braintrust_parent:
219+
encoded_key = _percent_encode(BRAINTRUST_PARENT_KEY)
220+
encoded_val = _percent_encode(str(braintrust_parent))
221+
members.append(f"{encoded_key}={encoded_val}")
222+
223+
if not members:
224+
return None
225+
return ",".join(members)
226+
227+
184228
def _is_hex(value, length):
185229
if not isinstance(value, str) or len(value) != length:
186230
return False
@@ -191,17 +235,17 @@ def _is_hex(value, length):
191235
return all(c in "0123456789abcdef" for c in value)
192236

193237

194-
# Baggage values percent-encode a small set of characters. We keep this minimal
195-
# and robust: encode only the characters that would break the baggage grammar
196-
# (the member/list delimiters and property separator), plus ``%`` so the
197-
# encoding is self-describing.
238+
# Per W3C Baggage, member values are percent-encoded. We decode inbound values
239+
# with a full RFC3986 ``unquote`` so we interoperate with standard encoders
240+
# (e.g. OpenTelemetry's propagator percent-encodes ``:`` as ``%3A``). On emit we
241+
# percent-encode our own values (the ``braintrust.parent`` entry and any key we
242+
# re-serialize). Byte-for-byte pass-through of *other* vendors' baggage is
243+
# handled separately by :func:`merge_baggage`, which forwards their raw member
244+
# strings unchanged rather than round-tripping them through this codec.
198245
#
199-
# Decoding is the exact inverse of this set rather than a full RFC3986
200-
# ``unquote``. This keeps the codec a transparent relay: a percent-escape we did
201-
# not emit (e.g. an upstream vendor's ``%2F``) is left byte-for-byte intact
202-
# through a parse -> format round trip, instead of being silently rewritten
203-
# (``path=a%2Fb`` -> ``path=a/b``). Because ``%`` itself is always encoded as
204-
# ``%25``, our own values still round-trip exactly.
246+
# Encoded set is kept minimal: the characters that would otherwise break the
247+
# baggage grammar (member/list delimiters and the property separator) plus
248+
# ``%`` so the encoding is self-describing.
205249
_PERCENT_ENCODE = {
206250
",": "%2C",
207251
";": "%3B",
@@ -210,39 +254,18 @@ def _is_hex(value, length):
210254
"%": "%25",
211255
}
212256

213-
# Inverse of _PERCENT_ENCODE. Both hex-case variants are accepted on decode so a
214-
# value we encoded (always uppercase) survives a round trip, while still being
215-
# lenient about an upstream that happens to lowercase these specific escapes.
216-
_PERCENT_DECODE = {}
217-
for _ch, _esc in _PERCENT_ENCODE.items():
218-
_PERCENT_DECODE[_esc.lower()] = _ch
219-
_PERCENT_DECODE[_esc.upper()] = _ch
220-
221-
_PERCENT_ESCAPE_RE = re.compile(r"%[0-9a-fA-F]{2}")
222-
223257

224258
def _percent_encode(value):
225259
out = []
226-
i = 0
227-
n = len(value)
228-
while i < n:
229-
ch = value[i]
230-
if ch == "%" and _PERCENT_ESCAPE_RE.match(value, i):
231-
# Already a well-formed percent-escape (ours or an upstream
232-
# vendor's). Forward it verbatim so the value is byte-preserved and
233-
# we never double-encode (``%2F`` must not become ``%252F``).
234-
out.append(value[i : i + 3])
235-
i += 3
236-
continue
260+
for ch in value:
237261
out.append(_PERCENT_ENCODE.get(ch, ch))
238-
i += 1
239262
return "".join(out)
240263

241264

242265
def _percent_decode(value):
243266
if "%" not in value:
244267
return value
245-
# Only decode the escapes we ourselves emit; leave any other percent-escape
246-
# (an upstream vendor's encoding) untouched so the value is forwarded
247-
# byte-for-byte.
248-
return _PERCENT_ESCAPE_RE.sub(lambda m: _PERCENT_DECODE.get(m.group(0), m.group(0)), value)
268+
try:
269+
return unquote(value)
270+
except Exception:
271+
return value

py/src/braintrust/test_propagation.py

Lines changed: 65 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
format_baggage,
2222
format_traceparent,
2323
get_header,
24+
merge_baggage,
2425
parse_baggage,
2526
parse_traceparent,
2627
)
@@ -139,50 +140,73 @@ def test_format_encodes_keys_with_reserved_chars(self):
139140
# The serialized header must round-trip back to the original key/value.
140141
assert parse_baggage(formatted) == entries
141142

142-
def test_unrelated_baggage_value_is_byte_preserved(self):
143+
def test_parse_decodes_standard_encoder_values(self):
144+
# Per the W3C Baggage spec, values are percent-encoded, and standard
145+
# encoders (e.g. OpenTelemetry's propagator) percent-encode `:` as
146+
# `%3A`. We must fully decode inbound values to interoperate, otherwise
147+
# `braintrust.parent=project_id%3Aabc` is not recognized downstream.
148+
assert parse_baggage(f"{BRAINTRUST_PARENT_KEY}=project_id%3Aabc123") == {
149+
BRAINTRUST_PARENT_KEY: "project_id:abc123"
150+
}
151+
152+
153+
class TestMergeBaggage:
154+
def test_adds_braintrust_parent_when_no_existing(self):
155+
merged = merge_baggage(None, "project_id:abc")
156+
assert parse_baggage(merged) == {BRAINTRUST_PARENT_KEY: "project_id:abc"}
157+
158+
def test_none_parent_and_no_existing_returns_none(self):
159+
assert merge_baggage(None, None) is None
160+
assert merge_baggage("", None) is None
161+
162+
def test_preserves_unrelated_baggage_byte_for_byte(self):
143163
# An upstream vendor may percent-encode octets outside the small set
144164
# Braintrust itself encodes (e.g. `/` as `%2F`). Forwarding such baggage
145-
# must be a transparent relay: parse -> format must reproduce the exact
146-
# inbound wire bytes for keys Braintrust does not own, otherwise we
147-
# silently rewrite another vendor's value (`path=a%2Fb` -> `path=a/b`).
148-
inbound = "path=a%2Fb,user=alice"
149-
formatted = format_baggage(parse_baggage(inbound))
150-
assert formatted == inbound
151-
152-
def test_round_trip_does_not_decode_unowned_percent_sequences(self):
165+
# must be a transparent relay: the raw member is forwarded unchanged, so
166+
# we never silently rewrite another vendor's value (`path=a%2Fb` must not
167+
# become `path=a/b`).
168+
merged = merge_baggage("path=a%2Fb,user=alice", "project_id:abc")
169+
assert "path=a%2Fb" in merged
170+
assert "user=alice" in merged
171+
assert f"{BRAINTRUST_PARENT_KEY}=project_id:abc" in merged
172+
173+
def test_does_not_decode_unowned_percent_sequences(self):
153174
# `%41` is the percent-encoding of `A`. A transparent relay must not
154-
# collapse `a%41b` to `aAb`; the inbound wire form must survive a
155-
# parse -> format round trip unchanged.
156-
inbound = "k=a%41b"
157-
assert format_baggage(parse_baggage(inbound)) == inbound
158-
159-
def test_bare_percent_is_encoded_and_round_trips(self):
160-
# A literal `%` that is not part of a valid percent-escape must still be
161-
# encoded as `%25` so the value round-trips and the next hop does not
162-
# mis-decode it.
163-
for value in ["50% off", "100%", "a%4"]:
164-
entries = {"k": value}
165-
assert parse_baggage(format_baggage(entries)) == entries
166-
167-
def test_braintrust_owned_value_with_reserved_chars_round_trips(self):
168-
# Braintrust does own its `braintrust.parent` value, which can carry an
169-
# arbitrary `project_name` containing reserved characters. That value
170-
# must be encoded on emit and decoded on parse.
171-
entries = {BRAINTRUST_PARENT_KEY: "project_name:a,b c"}
172-
assert parse_baggage(format_baggage(entries)) == entries
173-
174-
def test_mixed_owned_and_unowned_values_round_trip(self):
175-
# A well-formed inbound header carrying both a Braintrust value with an
176-
# encoded reserved char and an upstream vendor's pre-encoded value must
177-
# preserve both: the owned value is decoded then re-encoded, the unowned
178-
# escape is forwarded byte-for-byte.
179-
inbound = f"{BRAINTRUST_PARENT_KEY}=project_name:a%2Cb,vendor=x%2Fy"
180-
formatted = format_baggage(parse_baggage(inbound))
181-
parsed = parse_baggage(formatted)
182-
assert parsed[BRAINTRUST_PARENT_KEY] == "project_name:a,b"
183-
assert parsed["vendor"] == "x%2Fy"
184-
# The unowned escape survives on the wire byte-for-byte.
185-
assert "vendor=x%2Fy" in formatted
175+
# collapse `a%41b` to `aAb`; the inbound wire form is forwarded unchanged.
176+
merged = merge_baggage("k=a%41b", None)
177+
assert merged == "k=a%41b"
178+
179+
def test_replaces_existing_braintrust_parent(self):
180+
# A stale inbound braintrust.parent must be dropped in favor of the
181+
# value we supply, not duplicated.
182+
merged = merge_baggage(
183+
f"{BRAINTRUST_PARENT_KEY}=project_id:old,vendor=x",
184+
"project_id:new",
185+
)
186+
parsed = parse_baggage(merged)
187+
assert parsed[BRAINTRUST_PARENT_KEY] == "project_id:new"
188+
assert parsed["vendor"] == "x"
189+
# Only one braintrust.parent member is emitted.
190+
assert merged.count(f"{BRAINTRUST_PARENT_KEY}=") == 1
191+
192+
def test_drops_existing_braintrust_parent_when_no_new_value(self):
193+
# If we have no braintrust.parent to add, an inbound one is still
194+
# consumed (it is ours to own) rather than forwarded raw.
195+
merged = merge_baggage(f"{BRAINTRUST_PARENT_KEY}=project_id:old,vendor=x", None)
196+
assert merged == "vendor=x"
197+
198+
def test_encodes_braintrust_parent_with_reserved_chars(self):
199+
# Braintrust owns its braintrust.parent value, which can carry an
200+
# arbitrary project_name containing reserved characters. That value must
201+
# be encoded on emit and decode back cleanly.
202+
merged = merge_baggage(None, "project_name:a,b c")
203+
assert parse_baggage(merged) == {BRAINTRUST_PARENT_KEY: "project_name:a,b c"}
204+
205+
def test_skips_malformed_existing_members(self):
206+
merged = merge_baggage("garbage,,k=v,no-equals", "project_id:abc")
207+
parsed = parse_baggage(merged)
208+
assert parsed["k"] == "v"
209+
assert parsed[BRAINTRUST_PARENT_KEY] == "project_id:abc"
186210

187211

188212
def test_get_header_case_insensitive():

0 commit comments

Comments
 (0)