Skip to content

Commit fd714f8

Browse files
authored
perf: avoid reading HTTP request body before drop/sample decisions (#44)
1 parent ac4a163 commit fd714f8

File tree

4 files changed

+109
-72
lines changed

4 files changed

+109
-72
lines changed

drift/core/mode_utils.py

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from opentelemetry.trace import SpanKind as OTelSpanKind
1515

1616
if TYPE_CHECKING:
17-
pass
17+
from ..instrumentation.http import HttpTransformEngine
1818

1919
logger = logging.getLogger(__name__)
2020

@@ -144,3 +144,52 @@ def is_background_request(is_server_request: bool = False) -> bool:
144144
current_span_info = SpanUtils.get_current_span_info()
145145

146146
return is_app_ready and not current_span_info and not is_server_request
147+
148+
149+
def should_record_inbound_http_request(
150+
method: str,
151+
target: str,
152+
headers: dict[str, str],
153+
transform_engine: HttpTransformEngine | None,
154+
is_pre_app_start: bool,
155+
) -> tuple[bool, str | None]:
156+
"""Check if an inbound HTTP request should be recorded.
157+
158+
This should be called BEFORE reading the request body to avoid
159+
unnecessary I/O for requests that will be dropped or not sampled.
160+
161+
The check order is:
162+
1. Drop transforms - check if request matches any drop rules
163+
2. Sampling - check if request should be sampled (only when app is ready)
164+
165+
During pre-app-start phase, all requests are sampled to capture
166+
initialization behavior.
167+
168+
Note: This is HTTP-specific. gRPC or other protocols would need a separate function
169+
with different parameters.
170+
171+
Args:
172+
method: HTTP method (GET, POST, etc.)
173+
target: Request target (path + query string, e.g., "/api/users?page=1")
174+
headers: Request headers dictionary
175+
transform_engine: Optional HTTP transform engine for drop checks
176+
is_pre_app_start: Whether app is in pre-start phase (always sample if True)
177+
178+
Returns:
179+
Tuple of (should_record, skip_reason):
180+
- should_record: True if request should be recorded
181+
- skip_reason: If False, explains why ("dropped" or "not_sampled"), None otherwise
182+
"""
183+
if transform_engine and transform_engine.should_drop_inbound_request(method, target, headers):
184+
return False, "dropped"
185+
186+
if not is_pre_app_start:
187+
from .drift_sdk import TuskDrift
188+
from .sampling import should_sample
189+
190+
sdk = TuskDrift.get_instance()
191+
sampling_rate = sdk.get_sampling_rate()
192+
if not should_sample(sampling_rate, is_app_ready=True):
193+
return False, "not_sampled"
194+
195+
return True, None

drift/instrumentation/django/middleware.py

Lines changed: 17 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
if TYPE_CHECKING:
1515
from django.http import HttpRequest, HttpResponse
16-
from ...core.mode_utils import handle_record_mode
16+
from ...core.mode_utils import handle_record_mode, should_record_inbound_http_request
1717
from ...core.tracing import TdSpanAttributes
1818
from ...core.tracing.span_utils import CreateSpanOptions, SpanInfo, SpanUtils
1919
from ...core.types import (
@@ -167,20 +167,25 @@ def _record_request(self, request: HttpRequest, sdk, is_pre_app_start: bool) ->
167167
Returns:
168168
Django HttpResponse object
169169
"""
170-
# Inbound request sampling (only when app is ready)
171-
# Always sample during startup to capture initialization behavior
172-
if not is_pre_app_start:
173-
from ...core.sampling import should_sample
170+
# Pre-flight check: drop transforms and sampling
171+
# NOTE: This is done before body capture to avoid unnecessary I/O
172+
method = request.method or ""
173+
path = request.path
174+
query_string = request.META.get("QUERY_STRING", "")
175+
target = f"{path}?{query_string}" if query_string else path
174176

175-
sampling_rate = sdk.get_sampling_rate()
176-
if not should_sample(sampling_rate, is_app_ready=True):
177-
logger.debug(f"[Django] Request not sampled (rate={sampling_rate}), path={request.path}")
178-
return self.get_response(request)
177+
from ..wsgi import extract_headers
179178

180-
start_time_ns = time.time_ns()
179+
request_headers = extract_headers(request.META)
181180

182-
method = request.method or ""
183-
path = request.path
181+
should_record, skip_reason = should_record_inbound_http_request(
182+
method, target, request_headers, self.transform_engine, is_pre_app_start
183+
)
184+
if not should_record:
185+
logger.debug(f"[Django] Skipping request ({skip_reason}), path={path}")
186+
return self.get_response(request)
187+
188+
start_time_ns = time.time_ns()
184189
span_name = f"{method} {path}"
185190

186191
# Create span using SpanUtils
@@ -216,20 +221,6 @@ def _record_request(self, request: HttpRequest, sdk, is_pre_app_start: bool) ->
216221
except Exception:
217222
pass
218223

219-
# Check if request should be dropped
220-
query_string = request.META.get("QUERY_STRING", "")
221-
target = f"{path}?{query_string}" if query_string else path
222-
223-
from ..wsgi import extract_headers
224-
225-
request_headers = extract_headers(request.META)
226-
227-
if self.transform_engine and self.transform_engine.should_drop_inbound_request(method, target, request_headers):
228-
# Reset context before early return
229-
span_kind_context.reset(span_kind_token)
230-
span_info.span.end()
231-
return self.get_response(request)
232-
233224
# Store metadata on request for later use
234225
request._drift_start_time_ns = start_time_ns # type: ignore
235226
request._drift_span_info = span_info # type: ignore

drift/instrumentation/fastapi/instrumentation.py

Lines changed: 30 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727
from ...core.drift_sdk import TuskDrift
2828
from ...core.json_schema_helper import JsonSchemaHelper, SchemaMerge
29-
from ...core.mode_utils import handle_record_mode
29+
from ...core.mode_utils import handle_record_mode, should_record_inbound_http_request
3030
from ...core.tracing import TdSpanAttributes
3131
from ...core.tracing.span_utils import CreateSpanOptions, SpanInfo, SpanUtils
3232
from ...core.types import (
@@ -106,7 +106,7 @@ async def _handle_replay_request(
106106
transform_engine: HttpTransformEngine | None,
107107
method: str,
108108
raw_path: str,
109-
target: str,
109+
headers: dict[str, str],
110110
) -> None:
111111
"""Handle FastAPI request in REPLAY mode.
112112
@@ -119,7 +119,7 @@ async def _handle_replay_request(
119119
from ...core.types import replay_trace_id_context
120120

121121
# Extract trace ID from headers (case-insensitive lookup)
122-
request_headers = _extract_headers(scope)
122+
request_headers = headers
123123
# Convert headers to lowercase for case-insensitive lookup
124124
headers_lower = {k.lower(): v for k, v in request_headers.items()}
125125
replay_trace_id = headers_lower.get("x-td-trace-id")
@@ -241,6 +241,8 @@ async def _record_request(
241241
transform_engine: HttpTransformEngine | None,
242242
method: str,
243243
raw_path: str,
244+
target: str,
245+
headers: dict[str, str],
244246
is_pre_app_start: bool,
245247
) -> None:
246248
"""Handle request in RECORD mode with span creation using SpanUtils.
@@ -254,18 +256,17 @@ async def _record_request(
254256
transform_engine: HTTP transform engine for request/response transforms
255257
method: HTTP method (GET, POST, etc.)
256258
raw_path: Request path
259+
target: Request target (path + query string)
260+
headers: Request headers dictionary
257261
is_pre_app_start: Whether this request occurred before app was marked ready
258262
"""
259-
# Inbound request sampling (only when app is ready)
260-
# Always sample during startup to capture initialization behavior
261-
if not is_pre_app_start:
262-
from ...core.sampling import should_sample
263-
264-
sdk = TuskDrift.get_instance()
265-
sampling_rate = sdk.get_sampling_rate()
266-
if not should_sample(sampling_rate, is_app_ready=True):
267-
logger.debug(f"[FastAPI] Request not sampled (rate={sampling_rate}), path={raw_path}")
268-
return await original_call(app, scope, receive, send)
263+
# Pre-flight check: drop transforms and sampling before body capture
264+
should_record, skip_reason = should_record_inbound_http_request(
265+
method, target, headers, transform_engine, is_pre_app_start
266+
)
267+
if not should_record:
268+
logger.debug(f"[FastAPI] Skipping request ({skip_reason}), path={raw_path}")
269+
return await original_call(app, scope, receive, send)
269270

270271
start_time_ns = time.time_ns()
271272

@@ -389,16 +390,8 @@ async def _handle_request(
389390
query_string = query_bytes.decode("utf-8", errors="replace")
390391
else:
391392
query_string = str(query_bytes)
392-
target_for_drop = f"{raw_path}?{query_string}" if query_string else raw_path
393-
headers_for_drop = _extract_headers(scope)
394-
395-
# Check if request should be dropped by transform engine
396-
if transform_engine and transform_engine.should_drop_inbound_request(
397-
method,
398-
target_for_drop,
399-
headers_for_drop,
400-
):
401-
return await original_call(app, scope, receive, send)
393+
target = f"{raw_path}?{query_string}" if query_string else raw_path
394+
headers = _extract_headers(scope)
402395

403396
# DISABLED mode - just pass through
404397
if sdk.mode == TuskDriftMode.DISABLED:
@@ -407,14 +400,26 @@ async def _handle_request(
407400
# REPLAY mode - handle trace ID extraction and context setup
408401
if sdk.mode == TuskDriftMode.REPLAY:
409402
return await _handle_replay_request(
410-
app, scope, receive, send, original_call, transform_engine, method, raw_path, target_for_drop
403+
app, scope, receive, send, original_call, transform_engine, method, raw_path, headers
411404
)
412405

413406
# RECORD mode - use handle_record_mode for consistent is_pre_app_start logic
407+
# NOTE: Pre-flight check (drop + sample) is done inside _record_request
408+
# to access is_pre_app_start from handle_record_mode
414409
result = handle_record_mode(
415410
original_function_call=lambda: original_call(app, scope, receive, send),
416411
record_mode_handler=lambda is_pre_app_start: _record_request(
417-
app, scope, receive, send, original_call, transform_engine, method, raw_path, is_pre_app_start
412+
app,
413+
scope,
414+
receive,
415+
send,
416+
original_call,
417+
transform_engine,
418+
method,
419+
raw_path,
420+
target,
421+
headers,
422+
is_pre_app_start,
418423
),
419424
span_kind=OTelSpanKind.SERVER,
420425
)

drift/instrumentation/wsgi/handler.py

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
WsgiAppMethod = Callable[[WSGIApplication, WSGIEnvironment, StartResponse], "Iterable[bytes]"]
3131

3232

33-
from ...core.mode_utils import handle_record_mode
33+
from ...core.mode_utils import handle_record_mode, should_record_inbound_http_request
3434
from ...core.tracing import TdSpanAttributes
3535
from ...core.tracing.span_utils import CreateSpanOptions, SpanUtils
3636
from ...core.types import (
@@ -208,34 +208,26 @@ def _create_and_handle_request(
208208
We manually manage context because the span needs to stay open
209209
across the WSGI response iterator.
210210
"""
211-
# Extract request info for span name and drop check
211+
# Pre-flight check: drop transforms and sampling
212+
# NOTE: This is done before body capture to avoid unnecessary I/O
212213
method = environ.get("REQUEST_METHOD", "GET")
213214
path = environ.get("PATH_INFO", "")
214215
query_string = environ.get("QUERY_STRING", "")
215216
target = f"{path}?{query_string}" if query_string else path
217+
request_headers = extract_headers(environ)
218+
219+
if replay_token is None:
220+
should_record, skip_reason = should_record_inbound_http_request(
221+
method, target, request_headers, transform_engine, is_pre_app_start
222+
)
223+
if not should_record:
224+
logger.debug(f"[WSGI] Skipping request ({skip_reason}), path={path}")
225+
return original_wsgi_app(app, environ, start_response)
216226

217227
# Capture request body
218228
request_body = capture_request_body(environ)
219229
environ["_drift_request_body"] = request_body
220230

221-
# Check if request should be dropped
222-
request_headers = extract_headers(environ)
223-
if transform_engine and transform_engine.should_drop_inbound_request(method, target, request_headers):
224-
if replay_token:
225-
replay_trace_id_context.reset(replay_token)
226-
return original_wsgi_app(app, environ, start_response)
227-
228-
# Inbound request sampling (only RECORD mode + app ready)
229-
# - replay_token is None means RECORD mode (REPLAY mode sets replay_token)
230-
# - not is_pre_app_start means app is ready (always sample during startup)
231-
if replay_token is None and not is_pre_app_start:
232-
from ...core.sampling import should_sample
233-
234-
sampling_rate = sdk.get_sampling_rate()
235-
if not should_sample(sampling_rate, is_app_ready=True):
236-
logger.debug(f"[WSGI] Request not sampled (rate={sampling_rate}), path={path}")
237-
return original_wsgi_app(app, environ, start_response)
238-
239231
span_name = f"{method} {path}"
240232

241233
# Build input value before starting span

0 commit comments

Comments
 (0)