braintrustdata
diff --git a/‎py/src/braintrust/env.py‎
Lines changed: 48 additions & 0 deletions b/‎py/src/braintrust/env.py‎
Lines changed: 48 additions & 0 deletions
diff --git a/‎py/src/braintrust/id_gen.py‎
Lines changed: 4 additions & 2 deletions b/‎py/src/braintrust/id_gen.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎py/src/braintrust/integrations/claude_agent_sdk/test_claude_agent_sdk.py‎
Lines changed: 14 additions & 7 deletions b/‎py/src/braintrust/integrations/claude_agent_sdk/test_claude_agent_sdk.py‎
Lines changed: 14 additions & 7 deletions
diff --git a/‎py/src/braintrust/integrations/huggingface_hub/test_huggingface_hub.py‎
Lines changed: 0 additions & 3 deletions b/‎py/src/braintrust/integrations/huggingface_hub/test_huggingface_hub.py‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎py/src/braintrust/integrations/langchain/test_callbacks.py‎
Lines changed: 22 additions & 14 deletions b/‎py/src/braintrust/integrations/langchain/test_callbacks.py‎
Lines changed: 22 additions & 14 deletions
diff --git a/‎py/src/braintrust/integrations/langchain/test_context.py‎
Lines changed: 6 additions & 3 deletions b/‎py/src/braintrust/integrations/langchain/test_context.py‎
Lines changed: 6 additions & 3 deletions
@@ -1,4 +1,5 @@
 import io
+import logging
 import math
 import os
 import shlex
@@ -8,6 +9,9 @@
 from typing import TypeVar, cast
 
 
+_logger = logging.getLogger(__name__)
+
+
 T = TypeVar("T")
 EnvValue = bool | float | int | str
 _Parser = Callable[[str], EnvValue | None]
@@ -150,6 +154,47 @@ def _parse_dotenv_contents(self, contents: str) -> EnvValue | None:
         return None
 
 
+_warned_legacy_uuid_conflict = False
+
+
+def _resolve_use_legacy_uuid_ids() -> bool:
+    """Resolve whether the SDK should generate legacy UUID-based span/trace IDs.
+
+    The default is OpenTelemetry-compatible hex IDs (16-byte trace id / 8-byte
+    span id) with V4 span-component export. Setting BRAINTRUST_LEGACY_IDS
+    opts back into UUID IDs with V3 export.
+
+    BRAINTRUST_OTEL_COMPAT (which selects the OpenTelemetry context manager)
+    requires hex IDs, so it always wins: if both it and BRAINTRUST_LEGACY_IDS
+    are set, legacy IDs are disabled and a warning is logged (at most once per
+    process, even though this is re-resolved lazily on each access).
+    """
+    global _warned_legacy_uuid_conflict
+
+    legacy = EnvVar("BRAINTRUST_LEGACY_IDS", EnvParser.BOOL).get(False)
+    if EnvVar("BRAINTRUST_OTEL_COMPAT", EnvParser.BOOL).get(False):
+        if legacy and not _warned_legacy_uuid_conflict:
+            _warned_legacy_uuid_conflict = True
+            _logger.warning(
+                "BRAINTRUST_LEGACY_IDS is ignored because BRAINTRUST_OTEL_COMPAT "
+                "requires OpenTelemetry-compatible hex span IDs. Using hex IDs."
+            )
+        return False
+    return legacy
+
+
+class _LegacyUuidIdsField:
+    """Lazy, read-only descriptor for the legacy-UUID-IDs setting.
+
+    Like the other entries on BraintrustEnv, this re-reads the environment on
+    each access rather than caching at import time, so changing the relevant env
+    vars (e.g. in tests) is reflected immediately.
+    """
+
+    def __get__(self, instance: object, owner: type | None = None) -> bool:
+        return _resolve_use_legacy_uuid_ids()
+
+
 class BraintrustEnv:
     API_KEY = EnvVar("BRAINTRUST_API_KEY", EnvParser.STRING)
     HTTP_TIMEOUT = EnvVar("BRAINTRUST_HTTP_TIMEOUT", EnvParser.FLOAT)
@@ -163,3 +208,6 @@ class BraintrustEnv:
     ALL_PUBLISH_PAYLOADS_DIR = EnvVar("BRAINTRUST_ALL_PUBLISH_PAYLOADS_DIR", EnvParser.STRING)
     DISABLE_ATEXIT_FLUSH = EnvVar("BRAINTRUST_DISABLE_ATEXIT_FLUSH", EnvParser.BOOL)
     OTEL_COMPAT = EnvVar("BRAINTRUST_OTEL_COMPAT", EnvParser.BOOL)
+    # Opt out of the default OpenTelemetry-compatible hex span/trace IDs and use
+    # legacy UUID-based IDs (and V3 span-component export) instead.
+    LEGACY_IDS = _LegacyUuidIdsField()
@@ -10,9 +10,11 @@ def get_id_generator():
 
     This eliminates global state and makes tests parallelizable.
     Each caller gets their own generator instance.
+
+    Defaults to OpenTelemetry-compatible hex IDs. Set BRAINTRUST_LEGACY_IDS
+    to opt back into legacy UUID-based IDs.
     """
-    use_otel = BraintrustEnv.OTEL_COMPAT.get(False)
-    return OTELIDGenerator() if use_otel else UUIDGenerator()
+    return UUIDGenerator() if BraintrustEnv.LEGACY_IDS else OTELIDGenerator()
 
 
 class IDGenerator(ABC):
 
@@ -210,13 +210,16 @@ async def calculator_handler(args):
         assert tool_span["output"] is not None
         assert any(parent_id in llm_span_ids for parent_id in tool_span["span_parents"])
 
-    root_span_id = task_span["span_id"]
+    # Descendants share the task's trace (``root_span_id``); direct children
+    # reference the task's ``span_id`` in ``span_parents``.
+    task_root_span_id = task_span["root_span_id"]
+    task_span_id = task_span["span_id"]
     for llm_span in llm_spans:
-        assert llm_span["root_span_id"] == root_span_id
-        assert root_span_id in llm_span["span_parents"]
+        assert llm_span["root_span_id"] == task_root_span_id
+        assert task_span_id in llm_span["span_parents"]
 
     for tool_span in tool_spans:
-        assert tool_span["root_span_id"] == root_span_id
+        assert tool_span["root_span_id"] == task_root_span_id
         assert any(parent_id in llm_span_ids for parent_id in tool_span["span_parents"])
 
 
@@ -454,7 +457,8 @@ async def user_prompt_hook(input_data: Any, tool_use_id: str | None, context: An
 
     hook_span = function_spans[0]
     assert task_span["input"] == prompt
-    assert hook_span["root_span_id"] == task_span["span_id"]
+    # The hook span is a descendant of the task span, so they share a trace.
+    assert hook_span["root_span_id"] == task_span["root_span_id"]
     assert hook_span["input"]["hook_event_name"] == "UserPromptSubmit"
     assert hook_span["input"]["prompt"] == prompt
     assert hook_span["output"]["hookSpecificOutput"]["hookEventName"] == "UserPromptSubmit"
@@ -546,7 +550,8 @@ async def post_tool_hook(input_data: Any, tool_use_id: str | None, context: Any)
     post_span = hook_span_by_event["PostToolUse"]
 
     for hook_span in (pre_span, post_span):
-        assert hook_span["root_span_id"] == task_span["span_id"]
+        # Hook spans are descendants of the task span, so they share a trace.
+        assert hook_span["root_span_id"] == task_span["root_span_id"]
         assert hook_span["input"]["tool_name"] == "Bash"
 
     assert pre_span["output"]["hookSpecificOutput"]["hookEventName"] == "PreToolUse"
@@ -681,7 +686,9 @@ async def test_bundled_subagent_creates_task_span(memory_logger):
     assert subagent_spans, "Expected at least one subagent task span"
     assert any(s.get("metadata", {}).get("task_id") for s in subagent_spans)
     for subagent_span in subagent_spans:
-        assert subagent_span["root_span_id"] == root_task_span["span_id"]
+        # Subagent spans are descendants of the root task span, so they share a
+        # trace; the root task ``span_id`` appears in ``span_parents`` below.
+        assert subagent_span["root_span_id"] == root_task_span["root_span_id"]
         parents = set(subagent_span["span_parents"])
         tool_use_id = subagent_span.get("metadata", {}).get("tool_use_id")
         matching_tool_span = next(
 
@@ -249,7 +249,6 @@ def test_wrap_huggingface_hub_chat_completion_sync(memory_logger):
     # With no parent span on the stack, the LLM span is its own root and has
     # no ``span_parents``.
     assert not span.get("span_parents")
-    assert span["span_id"] == span["root_span_id"]
     # The user's ``provider=`` kwarg overrides the default "huggingface"
     # identity so the span reflects the actual routing target.
     assert span["metadata"]["provider"] == CHAT_PROVIDER
@@ -317,7 +316,6 @@ def test_wrap_huggingface_hub_chat_completion_streaming(memory_logger):
     # when the iterator is exhausted, with no parent on the stack the span is
     # still its own root.
     assert not span.get("span_parents")
-    assert span["span_id"] == span["root_span_id"]
     assert span["metadata"]["provider"] == CHAT_PROVIDER
 
     # Aggregated output is ``{"choices": [{"index", "message": {...}, "finish_reason"?}]}``.
@@ -476,7 +474,6 @@ async def _run():
     span = spans[0]
     assert span["span_attributes"]["name"] == "huggingface.chat_completion"
     assert not span.get("span_parents")
-    assert span["span_id"] == span["root_span_id"]
     assert span["metadata"]["provider"] == CHAT_PROVIDER
 
 
 
@@ -62,7 +62,10 @@ def test_llm_calls(logger_memory_logger):
     spans = memory_logger.pop()
     assert len(spans) == 3
 
+    # ``root_span_id`` is the root span's own span_id (the parent reference for
+    # its children); ``trace_root_id`` is the trace shared by every span.
     root_span_id = spans[0]["span_id"]
+    trace_root_id = spans[0]["root_span_id"]
 
     assert_matches_object(
         spans,
@@ -81,7 +84,7 @@ def test_llm_calls(logger_memory_logger):
                 },
                 "metadata": {"tags": []},
                 "span_id": root_span_id,
-                "root_span_id": root_span_id,
+                "root_span_id": trace_root_id,
             },
             {
                 "span_attributes": {"name": "ChatPromptTemplate"},
@@ -97,7 +100,7 @@ def test_llm_calls(logger_memory_logger):
                     ]
                 },
                 "metadata": {"tags": ["seq:step:1"]},
-                "root_span_id": root_span_id,
+                "root_span_id": trace_root_id,
                 "span_parents": [root_span_id],
             },
             {
@@ -144,7 +147,7 @@ def test_llm_calls(logger_memory_logger):
                     "tags": ["seq:step:2"],
                     "model": "gpt-4o-mini-2024-07-18",
                 },
-                "root_span_id": root_span_id,
+                "root_span_id": trace_root_id,
                 "span_parents": [root_span_id],
             },
         ],
@@ -171,6 +174,7 @@ def test_chain_with_memory(logger_memory_logger):
     assert len(spans) == 3
 
     root_span_id = spans[0]["span_id"]
+    trace_root_id = spans[0]["root_span_id"]
 
     assert_matches_object(
         spans,
@@ -189,7 +193,7 @@ def test_chain_with_memory(logger_memory_logger):
                 },
                 "metadata": {"tags": ["test"]},
                 "span_id": root_span_id,
-                "root_span_id": root_span_id,
+                "root_span_id": trace_root_id,
             },
             {
                 "span_attributes": {"name": "ChatPromptTemplate"},
@@ -205,7 +209,7 @@ def test_chain_with_memory(logger_memory_logger):
                     ]
                 },
                 "metadata": {"tags": ["seq:step:1", "test"]},
-                "root_span_id": root_span_id,
+                "root_span_id": trace_root_id,
                 "span_parents": [root_span_id],
             },
             {
@@ -252,7 +256,7 @@ def test_chain_with_memory(logger_memory_logger):
                     "tags": ["seq:step:2", "test"],
                     "model": "gpt-4o-mini-2024-07-18",
                 },
-                "root_span_id": root_span_id,
+                "root_span_id": trace_root_id,
                 "span_parents": [root_span_id],
             },
         ],
@@ -301,13 +305,14 @@ def calculator(input: CalculatorInput) -> str:
 
     spans = memory_logger.pop()
     root_span_id = spans[0]["span_id"]
+    trace_root_id = spans[0]["root_span_id"]
 
     assert_matches_object(
         spans,
         [
             {
                 "span_id": root_span_id,
-                "root_span_id": root_span_id,
+                "root_span_id": trace_root_id,
                 "span_attributes": {
                     "name": "ChatOpenAI",
                     "type": "llm",
@@ -640,13 +645,13 @@ def test_chain_null_values(logger_memory_logger):
     flush()
 
     spans = memory_logger.pop()
-    root_span_id = spans[0]["span_id"]
+    trace_root_id = spans[0]["root_span_id"]
 
     assert_matches_object(
         spans,
         [
             {
-                "root_span_id": root_span_id,
+                "root_span_id": trace_root_id,
                 "span_attributes": {
                     "name": "TestChain",
                     "type": "task",
@@ -721,7 +726,10 @@ def task_fn(input, hooks):
 
     # Find the root eval span
     root_eval_span = [s for s in spans if s.get("span_attributes", {}).get("name") == "test-consecutive-eval"][0]
+    # ``root_eval_span_id`` is the eval root's own span_id (the parent reference
+    # for its children); ``trace_root_id`` is the trace shared by every span.
     root_eval_span_id = root_eval_span["span_id"]
+    trace_root_id = root_eval_span["root_span_id"]
 
     # Find the eval dataset record spans (direct children of root eval span)
     eval_record_spans = [
@@ -751,7 +759,7 @@ def task_fn(input, hooks):
         [
             {
                 "span_id": root_eval_span_id,
-                "root_span_id": root_eval_span_id,
+                "root_span_id": trace_root_id,
                 "span_attributes": {
                     "name": "test-consecutive-eval",
                     "type": "eval",
@@ -765,7 +773,7 @@ def task_fn(input, hooks):
         [eval_record_1],
         [
             {
-                "root_span_id": root_eval_span_id,
+                "root_span_id": trace_root_id,
                 "span_parents": [root_eval_span_id],
                 "span_attributes": {
                     "name": "eval",
@@ -781,7 +789,7 @@ def task_fn(input, hooks):
         [eval_record_2],
         [
             {
-                "root_span_id": root_eval_span_id,
+                "root_span_id": trace_root_id,
                 "span_parents": [root_eval_span_id],
                 "span_attributes": {
                     "name": "eval",
@@ -797,7 +805,7 @@ def task_fn(input, hooks):
         [task_1_span],
         [
             {
-                "root_span_id": root_eval_span_id,
+                "root_span_id": trace_root_id,
                 "span_parents": [eval_record_1["span_id"]],
                 "span_attributes": {
                     "name": "task",
@@ -813,7 +821,7 @@ def task_fn(input, hooks):
         [task_2_span],
         [
             {
-                "root_span_id": root_eval_span_id,
+                "root_span_id": trace_root_id,
                 "span_parents": [eval_record_2["span_id"]],
                 "span_attributes": {
                     "name": "task",
 
@@ -63,7 +63,10 @@ def test_global_handler(logger_memory_logger):
     spans = memory_logger.pop()
     assert len(spans) > 0
 
+    # ``root_span_id`` is the root span's own span_id (the parent reference for
+    # its children); ``trace_root_id`` is the trace shared by every span.
     root_span_id = spans[0]["span_id"]
+    trace_root_id = spans[0]["root_span_id"]
 
     # Spans would be empty if the handler was not registered, let's make sure it logged what we expect
     assert_matches_object(
@@ -83,7 +86,7 @@ def test_global_handler(logger_memory_logger):
                 },
                 "metadata": {"tags": []},
                 "span_id": root_span_id,
-                "root_span_id": root_span_id,
+                "root_span_id": trace_root_id,
             },
             {
                 "span_attributes": {"name": "ChatPromptTemplate"},
@@ -99,7 +102,7 @@ def test_global_handler(logger_memory_logger):
                     ]
                 },
                 "metadata": {"tags": ["seq:step:1"]},
-                "root_span_id": root_span_id,
+                "root_span_id": trace_root_id,
                 "span_parents": [root_span_id],
             },
             {
@@ -146,7 +149,7 @@ def test_global_handler(logger_memory_logger):
                     "tags": ["seq:step:2"],
                     "model": "gpt-4o-mini-2024-07-18",
                 },
-                "root_span_id": root_span_id,
+                "root_span_id": trace_root_id,
                 "span_parents": [root_span_id],
             },
         ],