Use-Tusk · sohankshirsagar · Mar 20, 2026 · Mar 18, 2026 · Mar 18, 2026 · Mar 19, 2026
diff --git a/drift/core/drift_sdk.py b/drift/core/drift_sdk.py
@@ -646,8 +646,6 @@ def mark_app_as_ready(self) -> None:
         if self._td_span_processor:
             self._td_span_processor.update_app_ready(True)
 
-        logger.debug("Application marked as ready")
-
         if self.mode == TuskDriftMode.REPLAY:
             logger.debug("Replay mode active - ready to serve mocked responses")
         elif self.mode == TuskDriftMode.RECORD:

diff --git a/drift/instrumentation/django/instrumentation.py b/drift/instrumentation/django/instrumentation.py
@@ -49,31 +49,42 @@ def _resolve_http_transforms(
     @override
     def patch(self, module: ModuleType) -> None:
         """Patch Django by injecting middleware."""
+        if not self._try_inject_middleware():
+            # Settings not configured yet — defer injection until django.setup() runs
+            self._defer_middleware_injection()
+
+    def _try_inject_middleware(self) -> bool:
+        """Attempt to inject DriftMiddleware into Django settings.
+
+        Returns:
+            True if middleware was injected (or already present), False if
+            settings are not yet configured and injection should be deferred.
+        """
         global _middleware_injected
 
         if _middleware_injected:
             logger.debug("Middleware already injected, skipping")
-            return
+            return True
 
         try:
             from django.conf import settings
 
             if not settings.configured:
-                logger.warning("Django settings not configured, cannot inject middleware")
-                return
+                logger.debug("Django settings not configured yet, will defer middleware injection")
+                return False
 
             middleware_setting = self._get_middleware_setting(settings)
             if not middleware_setting:
                 logger.warning("Could not find middleware setting, cannot inject")
-                return
+                return True  # Don't retry — this won't change
 
             current_middleware = list(getattr(settings, middleware_setting, []))
 
             middleware_path = "drift.instrumentation.django.middleware.DriftMiddleware"
             if middleware_path in current_middleware:
                 logger.debug("DriftMiddleware already in settings, skipping injection")
                 _middleware_injected = True
-                return
+                return True
 
             # Insert at position 0 to capture all requests
             current_middleware.insert(0, middleware_path)
@@ -89,11 +100,38 @@ def patch(self, module: ModuleType) -> None:
             self._force_database_reconnect()
 
             print("Django instrumentation applied")
+            return True
 
         except ImportError as e:
             logger.warning(f"Could not import Django settings: {e}")
+            return True  # Don't retry on import errors
         except Exception as e:
             logger.error(f"Failed to inject middleware: {e}", exc_info=True)
+            return True  # Don't retry on unexpected errors
+
+    def _defer_middleware_injection(self) -> None:
+        """Monkey-patch django.setup() to inject middleware after settings are configured.
+
+        When TuskDrift.initialize() runs before DJANGO_SETTINGS_MODULE is set
+        (common in manage.py where the SDK init is the first import), Django
+        settings aren't available yet. This defers injection to run after
+        django.setup() completes, which is when settings are guaranteed to be
+        configured.
+        """
+        import django
+
+        original_setup = django.setup
+
+        def patched_setup(*args, **kwargs):
+            try:
+                result = original_setup(*args, **kwargs)
+                self._try_inject_middleware()
+                return result
+            finally:
+                django.setup = original_setup
+
+        django.setup = patched_setup
+        logger.debug("Deferred middleware injection to django.setup()")
 
     def _force_database_reconnect(self) -> None:
         """Force Django to close and recreate database connections."""

diff --git a/drift/instrumentation/django/middleware.py b/drift/instrumentation/django/middleware.py
@@ -66,6 +66,9 @@ def __call__(self, request: HttpRequest) -> HttpResponse:
         if sdk.mode == TuskDriftMode.DISABLED:
             return self.get_response(request)
 
+        if not sdk.app_ready:
+            sdk.mark_app_as_ready()
+
         # REPLAY mode - handle trace ID extraction and context setup
         if sdk.mode == TuskDriftMode.REPLAY:
             return self._handle_replay_request(request, sdk)

diff --git a/drift/instrumentation/fastapi/instrumentation.py b/drift/instrumentation/fastapi/instrumentation.py
@@ -398,6 +398,9 @@ async def _handle_request(
     if sdk.mode == TuskDriftMode.DISABLED:
         return await original_call(app, scope, receive, send)
 
+    if not sdk.app_ready:
+        sdk.mark_app_as_ready()
+
     # REPLAY mode - handle trace ID extraction and context setup
     if sdk.mode == TuskDriftMode.REPLAY:
         return await _handle_replay_request(

diff --git a/drift/instrumentation/urllib/instrumentation.py b/drift/instrumentation/urllib/instrumentation.py
@@ -338,15 +338,18 @@ def patched_open(opener_self, fullurl, data=None, timeout=_GLOBAL_DEFAULT_TIMEOU
             if sdk.mode == TuskDriftMode.DISABLED:
                 return original_open(opener_self, fullurl, data, timeout)
 
-            # Set calling_library_context to suppress socket instrumentation warnings
-            # context_token = calling_library_context.set("urllib")
-            try:
-                # Extract URL for default response handler
-                if isinstance(fullurl, str):
-                    url = fullurl
-                else:
-                    url = fullurl.full_url
+            # Extract URL early so we can check the scheme
+            if isinstance(fullurl, str):
+                url = fullurl
+            else:
+                url = fullurl.full_url
 
+            # Only instrument HTTP/HTTPS requests; pass through file://, data://, ftp://, etc.
+            parsed = urlparse(url)
+            if parsed.scheme not in ("http", "https"):
+                return original_open(opener_self, fullurl, data, timeout)
+
+            try:
                 def original_call():
                     return original_open(opener_self, fullurl, data, timeout)
 

diff --git a/drift/instrumentation/urllib3/e2e-tests/.tusk/config.yaml b/drift/instrumentation/urllib3/e2e-tests/.tusk/config.yaml
@@ -25,4 +25,3 @@ recording:
 
 replay:
   enable_telemetry: false
-
diff --git a/drift/instrumentation/urllib3/e2e-tests/src/app.py b/drift/instrumentation/urllib3/e2e-tests/src/app.py
@@ -1,6 +1,7 @@
 """Flask test app for e2e tests - urllib3 instrumentation testing."""
 
 import json
+import zlib
 
 import urllib3
 from flask import Flask, jsonify, request
@@ -438,23 +439,21 @@ def test_requests_lib():
 # =============================================================================
 
 
-@app.route("/test/bug/preload-content-false", methods=["GET"])
-def test_bug_preload_content_false():
-    """CONFIRMED BUG: preload_content=False parameter breaks response reading.
+@app.route("/test/preload-content-false-read", methods=["GET"])
+def test_preload_content_false_read():
+    """Test preload_content=False with manual read().
 
-    When preload_content=False, the response body is not preloaded into memory.
-    The instrumentation reads .data in _finalize_span which consumes the body
-    before the application can read it.
-
-    Root cause: instrumentation.py line 839 accesses response.data unconditionally
+    This is the pattern botocore/boto3 uses: request with preload_content=False,
+    then call response.read() to get the body. The instrumentation must buffer
+    the body in _fp (BytesIO) during recording so both the span capture and the
+    caller's read() work correctly.
     """
     try:
         response = http.request(
             "GET",
             "https://jsonplaceholder.typicode.com/posts/21",
             preload_content=False,
         )
-        # Manually read the data after the response
         data_bytes = response.read()
         response.release_conn()
         data = json.loads(data_bytes.decode("utf-8"))
@@ -463,14 +462,40 @@ def test_bug_preload_content_false():
         return jsonify({"error": str(e)}), 500
 
 
-@app.route("/test/bug/streaming-response", methods=["GET"])
-def test_bug_streaming_response():
-    """CONFIRMED BUG: Streaming response body is consumed before iteration.
+@app.route("/test/preload-content-false-crc32", methods=["GET"])
+def test_preload_content_false_crc32():
+    """Test preload_content=False with CRC32 checksum validation.
+
+    Mimics botocore's DynamoDB flow: read the body via read(), then validate
+    the CRC32 checksum against a header value. This failed before the fix
+    because the mock response's BytesIO was exhausted by preload_content=True,
+    causing read() to return b"" and CRC32 to be 0.
+    """
+    try:
+        response = http.request(
+            "GET",
+            "https://jsonplaceholder.typicode.com/posts/22",
+            preload_content=False,
+        )
+        body = response.read()
+        response.release_conn()
+
+        if not body:
+            return jsonify({"error": "Empty body from read()"}), 500
+
+        actual_crc32 = zlib.crc32(body) & 0xFFFFFFFF
+        data = json.loads(body.decode("utf-8"))
+        return jsonify({**data, "crc32": actual_crc32, "body_length": len(body)})
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+
 
-    When using response.stream() to iterate over chunks, the instrumentation
-    has already consumed the body by accessing response.data in _finalize_span.
+@app.route("/test/preload-content-false-stream", methods=["GET"])
+def test_preload_content_false_stream():
+    """Test preload_content=False with chunked stream() reading.
 
-    Root cause: Same as preload-content-false - instrumentation.py line 839
+    The instrumentation buffers the body into a BytesIO, so subsequent
+    stream() calls read from that BytesIO in chunks as normal.
     """
     try:
         response = http.request(
@@ -479,9 +504,8 @@ def test_bug_streaming_response():
             preload_content=False,
         )
 
-        # Try to read response in chunks using stream()
         chunks = []
-        for chunk in response.stream(32):  # Read 32 bytes at a time
+        for chunk in response.stream(32):
             chunks.append(chunk)
 
         response.release_conn()

diff --git a/drift/instrumentation/urllib3/e2e-tests/src/test_requests.py b/drift/instrumentation/urllib3/e2e-tests/src/test_requests.py
@@ -96,17 +96,17 @@
     make_request("GET", "/test/requests-lib")
 
     # ==========================================================================
-    # Note: Bug detection tests for preload_content=False and streaming responses
-    # are NOT included in e2e tests because these patterns are incompatible with
-    # replay mode - we can't capture the response body without consuming the stream.
-    #
-    # The instrumentation now correctly handles these patterns by NOT capturing
-    # the response body, which allows the application to read/stream normally.
-    # However, this means there's no body to replay in REPLAY mode.
-    #
-    # To verify the fix works, run the manual test script in RECORD mode:
-    #   curl http://localhost:8000/test/bug/preload-content-false
-    #   curl http://localhost:8000/test/bug/streaming-response
+    # preload_content=False Tests (botocore/boto3 pattern)
     # ==========================================================================
+    print("\n--- preload_content=False Tests ---\n")
+
+    # read() after preload_content=False (core botocore pattern)
+    make_request("GET", "/test/preload-content-false-read")
+
+    # read() + CRC32 checksum validation (DynamoDB pattern)
+    make_request("GET", "/test/preload-content-false-crc32")
+
+    # stream() after preload_content=False
+    make_request("GET", "/test/preload-content-false-stream")
 
     print_request_summary()
Original file line number	Diff line number	Diff line change
Expand Up		@@ -25,4 +25,3 @@ recording:

		replay:
		enable_telemetry: false