Skip to content

Commit cd72f91

Browse files
KCHENPENGFEIchenpengfei
andauthored
feat(observability): dump HTTP query, request body, and response body to trace spans (#2052)
Add an opt-in middleware that attaches the request and response bodies (truncated, content-type filtered) onto the active OpenTelemetry root span, and surface the URL query string as `url.query`. Off by default — bodies may contain secrets and high-cardinality content; enable via `server.observability.dump_body.enabled` and bound payload size with `max_bytes`. The dump middleware is registered before the HTTP observability middleware so it nests inside the trace span (Starlette executes later-registered middleware first). Streaming, multipart, and binary content types are skipped, and any capture failure is swallowed so the request path is never affected. Co-authored-by: chenpengfei <chenpengfei@bytedance.com>
1 parent 77b604a commit cd72f91

5 files changed

Lines changed: 176 additions & 1 deletion

File tree

openviking/observability/http_observability_middleware.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -826,6 +826,7 @@ async def middleware(request: Request, call_next: Callable) -> Response:
826826

827827
# Extract request information
828828
raw_path = str(request.url.path)
829+
raw_query = request.url.query or None
829830
route_template = _get_route_template(request)
830831
request_id = request.headers.get("x-request-id") or str(uuid.uuid4())
831832

@@ -835,6 +836,7 @@ async def middleware(request: Request, call_next: Callable) -> Response:
835836
http_route=route_template,
836837
request_id=request_id,
837838
url_path=raw_path,
839+
url_query=raw_query,
838840
url_scheme=request.url.scheme,
839841
http_host=request.url.netloc,
840842
source_type=request.headers.get("x-source-type"),

openviking/server/app.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,30 @@ async def _oauth_gc_loop(store) -> None: # noqa: ANN001
320320
allow_headers=["*"],
321321
)
322322

323-
# Add HTTP observability middleware first (metrics, tracing)
323+
# Body dump middleware must be registered BEFORE observability so it ends up
324+
# nested inside the trace span (in Starlette, middleware added later wraps
325+
# earlier-added ones — so earlier registration = inner layer).
326+
if config.observability.dump_body.enabled:
327+
from openviking.server.body_dump_middleware import (
328+
create_dump_http_body_middleware,
329+
)
330+
331+
_dump_body_fn = create_dump_http_body_middleware(
332+
max_bytes=config.observability.dump_body.max_bytes,
333+
)
334+
335+
@app.middleware("http")
336+
async def dump_http_body(request: Request, call_next: Callable):
337+
return await _dump_body_fn(request, call_next)
338+
339+
logger.info(
340+
"HTTP body dump middleware enabled (max_bytes=%d) — bodies will be "
341+
"attached to trace spans. Disable in production via "
342+
"server.observability.dump_body.enabled=false.",
343+
config.observability.dump_body.max_bytes,
344+
)
345+
346+
# Add HTTP observability middleware (metrics, tracing).
324347
# Note: In FastAPI/Starlette, middleware added later executes first (outer layer).
325348
# We want timing to be the outermost layer to measure the full request duration.
326349
from openviking.observability.http_observability_middleware import (
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd.
2+
# SPDX-License-Identifier: AGPL-3.0
3+
"""HTTP request/response body dump middleware for trace debugging.
4+
5+
Attaches the request and response bodies as attributes on the active OpenTelemetry
6+
root span so they can be inspected in trace UIs (Jaeger, Tempo, etc.). The middleware
7+
must run inside the trace span context — register it before the http_observability
8+
middleware in ``create_app``.
9+
"""
10+
11+
from __future__ import annotations
12+
13+
from typing import Awaitable, Callable
14+
15+
from fastapi import Request
16+
from starlette.responses import Response
17+
18+
try:
19+
from opentelemetry import trace as otel_trace
20+
except ImportError: # pragma: no cover - OTel optional
21+
otel_trace = None
22+
23+
# Skip body capture for content types that are binary, streamed, or otherwise
24+
# pointless to materialize as a span attribute.
25+
_SKIP_CONTENT_TYPE_PREFIXES = (
26+
"multipart/form-data",
27+
"application/octet-stream",
28+
"text/event-stream",
29+
"audio/",
30+
"video/",
31+
"image/",
32+
)
33+
34+
35+
def _should_skip(content_type: str | None) -> bool:
36+
if not content_type:
37+
return False
38+
ct = content_type.lower()
39+
return any(ct.startswith(p) for p in _SKIP_CONTENT_TYPE_PREFIXES)
40+
41+
42+
def _truncate(data: bytes, max_bytes: int) -> str:
43+
total = len(data)
44+
head = data[:max_bytes]
45+
text = head.decode("utf-8", errors="replace")
46+
if total > max_bytes:
47+
return f"{text}…[+{total - max_bytes}B truncated, total {total}B]"
48+
return text
49+
50+
51+
def _set_span_attr(key: str, value: object) -> None:
52+
if otel_trace is None:
53+
return
54+
try:
55+
span = otel_trace.get_current_span()
56+
if span is None or not span.is_recording():
57+
return
58+
span.set_attribute(key, value)
59+
except Exception:
60+
# Body dump must never break the request path.
61+
pass
62+
63+
64+
def create_dump_http_body_middleware(
65+
max_bytes: int = 4096,
66+
) -> Callable[[Request, Callable], Awaitable[Response]]:
67+
"""Build a body-dump middleware bound to ``max_bytes``.
68+
69+
The middleware skips streaming/binary content types and truncates payloads to
70+
keep span attributes bounded.
71+
"""
72+
73+
async def middleware(
74+
request: Request,
75+
call_next: Callable[[Request], Awaitable[Response]],
76+
) -> Response:
77+
req_ct = request.headers.get("content-type", "")
78+
if not _should_skip(req_ct):
79+
try:
80+
body = await request.body()
81+
if body:
82+
_set_span_attr("http.request.body", _truncate(body, max_bytes))
83+
_set_span_attr("http.request.body.size", len(body))
84+
if req_ct:
85+
_set_span_attr("http.request.content_type", req_ct)
86+
except Exception:
87+
pass
88+
89+
response = await call_next(request)
90+
91+
resp_ct = response.headers.get("content-type", "")
92+
if _should_skip(resp_ct):
93+
return response
94+
95+
# Once we start iterating ``response.body_iterator`` we own the bytes;
96+
# capture failures must not silently truncate the response sent to the
97+
# client, so we always rebuild a Response from whatever we've collected.
98+
chunks: list[bytes] = []
99+
try:
100+
async for chunk in response.body_iterator:
101+
chunks.append(chunk)
102+
body_bytes = b"".join(chunks)
103+
if body_bytes:
104+
_set_span_attr("http.response.body", _truncate(body_bytes, max_bytes))
105+
_set_span_attr("http.response.body.size", len(body_bytes))
106+
if resp_ct:
107+
_set_span_attr("http.response.content_type", resp_ct)
108+
except Exception:
109+
body_bytes = b"".join(chunks)
110+
_set_span_attr("http.response.body.capture_error", True)
111+
112+
try:
113+
new_headers = {
114+
k: v for k, v in response.headers.items() if k.lower() != "content-length"
115+
}
116+
return Response(
117+
content=body_bytes,
118+
status_code=response.status_code,
119+
headers=new_headers,
120+
media_type=response.media_type,
121+
)
122+
except Exception:
123+
# Fall back to the original response object as a last resort. Its
124+
# body_iterator is exhausted at this point, so this only fires if
125+
# the rebuild path itself is broken.
126+
return response
127+
128+
return middleware

openviking/server/config.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,13 +104,28 @@ class UsageAuditConfig(BaseModel):
104104
model_config = {"extra": "forbid"}
105105

106106

107+
class TraceDumpBodyConfig(BaseModel):
108+
"""HTTP body dump configuration.
109+
110+
Attaches request/response bodies as attributes on the active trace span so
111+
they can be inspected in trace UIs. Off by default — bodies may contain
112+
secrets and high-cardinality content.
113+
"""
114+
115+
enabled: bool = False
116+
max_bytes: int = 4096
117+
118+
model_config = {"extra": "forbid"}
119+
120+
107121
class ObservabilityConfig(BaseModel):
108122
"""Server-side observability configuration."""
109123

110124
metrics: MetricsConfig = Field(default_factory=MetricsConfig)
111125
usage_audit: UsageAuditConfig = Field(default_factory=UsageAuditConfig)
112126
traces: OTelExporterConfig = Field(default_factory=OTelExporterConfig)
113127
logs: OTelExporterConfig = Field(default_factory=OTelExporterConfig)
128+
dump_body: TraceDumpBodyConfig = Field(default_factory=TraceDumpBodyConfig)
114129

115130
model_config = {"extra": "forbid"}
116131

openviking/telemetry/span_models.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ class RootSpanAttributes:
4444
url_path: Optional[str] = None
4545
"""Raw request path for debugging. This may be high-cardinality."""
4646

47+
url_query: Optional[str] = None
48+
"""Raw query string (without leading '?'). May contain secrets — handle accordingly."""
49+
4750
url_scheme: Optional[str] = None
4851
"""URL scheme, such as `http` or `https`."""
4952

@@ -78,6 +81,8 @@ def to_otel_attributes(self) -> Dict[str, Any]:
7881
attrs["http.status_code"] = self.http_status_code
7982
if self.url_path is not None:
8083
attrs["url.path"] = self.url_path
84+
if self.url_query is not None and self.url_query != "":
85+
attrs["url.query"] = self.url_query
8186
if self.url_scheme is not None:
8287
attrs["url.scheme"] = self.url_scheme
8388
if self.http_host is not None:
@@ -290,6 +295,7 @@ def create_root_span_attributes(
290295
http_route: str,
291296
request_id: str,
292297
url_path: Optional[str] = None,
298+
url_query: Optional[str] = None,
293299
url_scheme: Optional[str] = None,
294300
http_host: Optional[str] = None,
295301
source_type: Optional[str] = None,
@@ -301,6 +307,7 @@ def create_root_span_attributes(
301307
http_route=http_route,
302308
request_id=request_id,
303309
url_path=url_path,
310+
url_query=url_query,
304311
url_scheme=url_scheme,
305312
http_host=http_host,
306313
source_type=source_type,

0 commit comments

Comments
 (0)