getsentry
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 0 additions & 1 deletion b/‎.github/workflows/ci.yml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎.github/workflows/codeql-analysis.yml‎
Lines changed: 0 additions & 1 deletion b/‎.github/workflows/codeql-analysis.yml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎.github/workflows/enforce-license-compliance.yml‎
Lines changed: 0 additions & 1 deletion b/‎.github/workflows/enforce-license-compliance.yml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎sentry_sdk/_span_batcher.py‎
Lines changed: 58 additions & 32 deletions b/‎sentry_sdk/_span_batcher.py‎
Lines changed: 58 additions & 32 deletions
diff --git a/‎sentry_sdk/scope.py‎
Lines changed: 54 additions & 1 deletion b/‎sentry_sdk/scope.py‎
Lines changed: 54 additions & 1 deletion
@@ -5,7 +5,6 @@ on:
     branches:
       - master
       - release/**
-      - potel-base
 
   pull_request:
 
 
@@ -15,7 +15,6 @@ on:
   push:
     branches:
       - master
-      - potel-base
   pull_request:
   schedule:
     - cron: '18 18 * * 3'
 
@@ -6,7 +6,6 @@ on:
       - master
       - main
       - release/*
-      - potel-base
   pull_request:
 
 # Cancel in progress workflows on pull_requests.
 
@@ -15,10 +15,15 @@
 
 
 class SpanBatcher(Batcher["StreamedSpan"]):
-    # TODO[span-first]: size-based flushes
-    # TODO[span-first]: adjust flush/drop defaults
+    # MAX_BEFORE_FLUSH should be lower than MAX_BEFORE_DROP, so that there is
+    # a bit of a buffer for spans that appear between setting the flush event
+    # and actually flushing the buffer.
+    #
+    # The max limits are all per trace.
+    MAX_ENVELOPE_SIZE = 1000  # spans
     MAX_BEFORE_FLUSH = 1000
-    MAX_BEFORE_DROP = 5000
+    MAX_BEFORE_DROP = 2000
+    MAX_BYTES_BEFORE_FLUSH = 5 * 1024 * 1024  # 5 MB
     FLUSH_WAIT_TIME = 5.0
 
     TYPE = "span"
@@ -35,6 +40,7 @@ def __init__(
         # envelope.
         # trace_id -> span buffer
         self._span_buffer: dict[str, list["StreamedSpan"]] = defaultdict(list)
+        self._running_size: dict[str, int] = defaultdict(lambda: 0)
         self._capture_func = capture_func
         self._record_lost_func = record_lost_func
         self._running = True
@@ -45,16 +51,12 @@ def __init__(
         self._flusher: "Optional[threading.Thread]" = None
         self._flusher_pid: "Optional[int]" = None
 
-    def get_size(self) -> int:
-        # caller is responsible for locking before checking this
-        return sum(len(buffer) for buffer in self._span_buffer.values())
-
     def add(self, span: "StreamedSpan") -> None:
         if not self._ensure_thread() or self._flusher is None:
             return None
 
         with self._lock:
-            size = self.get_size()
+            size = len(self._span_buffer[span.trace_id])
             if size >= self.MAX_BEFORE_DROP:
                 self._record_lost_func(
                     reason="queue_overflow",
@@ -64,18 +66,36 @@ def add(self, span: "StreamedSpan") -> None:
                 return None
 
             self._span_buffer[span.trace_id].append(span)
+            self._running_size[span.trace_id] += self._estimate_size(span)
+
             if size + 1 >= self.MAX_BEFORE_FLUSH:
                 self._flush_event.set()
+                return
+
+            if self._running_size[span.trace_id] >= self.MAX_BYTES_BEFORE_FLUSH:
+                self._flush_event.set()
+                return
+
+    @staticmethod
+    def _estimate_size(item: "StreamedSpan") -> int:
+        # Rough estimate of serialized span size that's quick to compute.
+        # 210 is the rough size of the payload without attributes, and we
+        # estimate additional 70 bytes on top of that per attribute.
+        return 210 + 70 * len(item._attributes)
 
     @staticmethod
     def _to_transport_format(item: "StreamedSpan") -> "Any":
         # TODO[span-first]
         res: "dict[str, Any]" = {
+            "trace_id": item.trace_id,
             "span_id": item.span_id,
             "name": item._name,
             "status": item._status,
         }
 
+        if item._parent_span_id:
+            res["parent_span_id"] = item._parent_span_id
+
         if item._attributes:
             res["attributes"] = {
                 k: serialize_attribute(v) for (k, v) in item._attributes.items()
@@ -86,7 +106,7 @@ def _to_transport_format(item: "StreamedSpan") -> "Any":
     def _flush(self) -> None:
         with self._lock:
             if len(self._span_buffer) == 0:
-                return None
+                return
 
             envelopes = []
             for trace_id, spans in self._span_buffer.items():
@@ -95,34 +115,40 @@ def _flush(self) -> None:
                     # dsc = spans[0].dynamic_sampling_context()
                     dsc = None
 
-                    envelope = Envelope(
-                        headers={
-                            "sent_at": format_timestamp(datetime.now(timezone.utc)),
-                            "trace": dsc,
-                        }
-                    )
-
-                    envelope.add_item(
-                        Item(
-                            type="span",
-                            content_type="application/vnd.sentry.items.span.v2+json",
+                    # Max per envelope is 1000, so if we happen to have more than
+                    # 1000 spans in one bucket, we'll need to separate them.
+                    for start in range(0, len(spans), self.MAX_ENVELOPE_SIZE):
+                        end = min(start + self.MAX_ENVELOPE_SIZE, len(spans))
+
+                        envelope = Envelope(
                             headers={
-                                "item_count": len(spans),
-                            },
-                            payload=PayloadRef(
-                                json={
-                                    "items": [
-                                        self._to_transport_format(span)
-                                        for span in spans
-                                    ]
-                                }
-                            ),
+                                "sent_at": format_timestamp(datetime.now(timezone.utc)),
+                                "trace": dsc,
+                            }
+                        )
+
+                        envelope.add_item(
+                            Item(
+                                type=self.TYPE,
+                                content_type=self.CONTENT_TYPE,
+                                headers={
+                                    "item_count": end - start,
+                                },
+                                payload=PayloadRef(
+                                    json={
+                                        "items": [
+                                            self._to_transport_format(spans[j])
+                                            for j in range(start, end)
+                                        ]
+                                    }
+                                ),
+                            )
                         )
-                    )
 
-                    envelopes.append(envelope)
+                        envelopes.append(envelope)
 
             self._span_buffer.clear()
+            self._running_size.clear()
 
         for envelope in envelopes:
             self._capture_func(envelope)
@@ -33,7 +33,7 @@
     normalize_incoming_data,
     PropagationContext,
 )
-from sentry_sdk.traces import StreamedSpan
+from sentry_sdk.traces import _DEFAULT_PARENT_SPAN, StreamedSpan, NoOpStreamedSpan
 from sentry_sdk.tracing import (
     BAGGAGE_HEADER_NAME,
     SENTRY_TRACE_HEADER_NAME,
@@ -1174,6 +1174,59 @@ def start_span(
 
             return span
 
+    def start_streamed_span(
+        self,
+        name: str,
+        attributes: "Optional[Attributes]",
+        parent_span: "Optional[StreamedSpan]",
+        active: bool,
+    ) -> "StreamedSpan":
+        # TODO: rename to start_span once we drop the old API
+        if isinstance(parent_span, NoOpStreamedSpan):
+            # parent_span is only set if the user explicitly set it
+            logger.debug(
+                "Ignored parent span provided. Span will be parented to the "
+                "currently active span instead."
+            )
+
+        if parent_span is _DEFAULT_PARENT_SPAN or isinstance(
+            parent_span, NoOpStreamedSpan
+        ):
+            parent_span = self.span  # type: ignore
+
+        # If no eligible parent_span was provided and there is no currently
+        # active span, this is a segment
+        if parent_span is None:
+            propagation_context = self.get_active_propagation_context()
+
+            return StreamedSpan(
+                name=name,
+                attributes=attributes,
+                active=active,
+                scope=self,
+                segment=None,
+                trace_id=propagation_context.trace_id,
+                parent_span_id=propagation_context.parent_span_id,
+                parent_sampled=propagation_context.parent_sampled,
+                baggage=propagation_context.baggage,
+            )
+
+        # This is a child span; take propagation context from the parent span
+        with new_scope():
+            if isinstance(parent_span, NoOpStreamedSpan):
+                return NoOpStreamedSpan()
+
+            return StreamedSpan(
+                name=name,
+                attributes=attributes,
+                active=active,
+                scope=self,
+                segment=parent_span._segment,
+                trace_id=parent_span.trace_id,
+                parent_span_id=parent_span.span_id,
+                parent_sampled=parent_span.sampled,
+            )
+
     def continue_trace(
         self,
         environ_or_headers: "Dict[str, Any]",