Skip to content

Commit 3dbdef8

Browse files
committed
urllib: add support for capturing request and response headers
1 parent 4531513 commit 3dbdef8

2 files changed

Lines changed: 498 additions & 2 deletions

File tree

instrumentation/opentelemetry-instrumentation-urllib/src/opentelemetry/instrumentation/urllib/__init__.py

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,99 @@ def response_hook(span: Span, request: Request, response: HTTPResponse):
7777
7878
will exclude requests such as ``https://site/client/123/info`` and ``https://site/xyz/healthcheck``.
7979
80+
Capture HTTP request and response headers
81+
*****************************************
82+
You can configure the agent to capture specified HTTP headers as span attributes, according to the
83+
`semantic conventions <https://opentelemetry.io/docs/specs/semconv/http/http-spans/#http-client-span>`_.
84+
85+
Request headers
86+
***************
87+
To capture HTTP request headers as span attributes, set the environment variable
88+
``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_REQUEST`` to a comma delimited list of HTTP header names.
89+
90+
For example using the environment variable,
91+
::
92+
93+
export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_REQUEST="content-type,custom_request_header"
94+
95+
will extract ``content-type`` and ``custom_request_header`` from the request headers and add them as span attributes.
96+
97+
Request header names in aiohttp are case-insensitive. So, giving the header name as ``CUStom-Header`` in the environment
98+
variable will capture the header named ``custom-header``.
99+
100+
Regular expressions may also be used to match multiple headers that correspond to the given pattern. For example:
101+
::
102+
103+
export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_REQUEST="Accept.*,X-.*"
104+
105+
Would match all request headers that start with ``Accept`` and ``X-``.
106+
107+
To capture all request headers, set ``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_REQUEST`` to ``".*"``.
108+
::
109+
110+
export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_REQUEST=".*"
111+
112+
The name of the added span attribute will follow the format ``http.request.header.<header_name>`` where ``<header_name>``
113+
is the normalized HTTP header name (lowercase, with ``-`` replaced by ``_``). The value of the attribute will be a
114+
single item list containing all the header values.
115+
116+
For example:
117+
``http.request.header.custom_request_header = ["<value1>", "<value2>"]``
118+
119+
Response headers
120+
****************
121+
To capture HTTP response headers as span attributes, set the environment variable
122+
``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_RESPONSE`` to a comma delimited list of HTTP header names.
123+
124+
For example using the environment variable,
125+
::
126+
127+
export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_RESPONSE="content-type,custom_response_header"
128+
129+
will extract ``content-type`` and ``custom_response_header`` from the response headers and add them as span attributes.
130+
131+
Response header names in aiohttp are case-insensitive. So, giving the header name as ``CUStom-Header`` in the environment
132+
variable will capture the header named ``custom-header``.
133+
134+
Regular expressions may also be used to match multiple headers that correspond to the given pattern. For example:
135+
::
136+
137+
export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_RESPONSE="Content.*,X-.*"
138+
139+
Would match all response headers that start with ``Content`` and ``X-``.
140+
141+
To capture all response headers, set ``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_RESPONSE`` to ``".*"``.
142+
::
143+
144+
export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_RESPONSE=".*"
145+
146+
The name of the added span attribute will follow the format ``http.response.header.<header_name>`` where ``<header_name>``
147+
is the normalized HTTP header name (lowercase, with ``-`` replaced by ``_``). The value of the attribute will be a
148+
list containing the header values.
149+
150+
For example:
151+
``http.response.header.custom_response_header = ["<value1>", "<value2>"]``
152+
153+
Sanitizing headers
154+
******************
155+
In order to prevent storing sensitive data such as personally identifiable information (PII), session keys, passwords,
156+
etc, set the environment variable ``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS``
157+
to a comma delimited list of HTTP header names to be sanitized.
158+
159+
Regexes may be used, and all header names will be matched in a case-insensitive manner.
160+
161+
For example using the environment variable,
162+
::
163+
164+
export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS=".*session.*,set-cookie"
165+
166+
will replace the value of headers such as ``session-id`` and ``set-cookie`` with ``[REDACTED]`` in the span.
167+
168+
Note:
169+
The environment variable names used to capture HTTP headers are still experimental, and thus are subject to change.
170+
171+
172+
80173
API
81174
---
82175
"""
@@ -135,8 +228,15 @@ def response_hook(span: Span, request: Request, response: HTTPResponse):
135228
)
136229
from opentelemetry.trace import Span, SpanKind, Tracer, get_tracer
137230
from opentelemetry.util.http import (
231+
OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_REQUEST,
232+
OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_RESPONSE,
233+
OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS,
138234
ExcludeList,
235+
get_custom_header_attributes,
236+
get_custom_headers,
139237
get_excluded_urls,
238+
normalise_request_header_name,
239+
normalise_response_header_name,
140240
parse_excluded_urls,
141241
redact_url,
142242
sanitize_method,
@@ -169,6 +269,9 @@ def _instrument(self, **kwargs: Any):
169269
``response_hook``: An optional callback which is invoked right before the span is finished processing a response
170270
``excluded_urls``: A string containing a comma-delimited
171271
list of regexes used to exclude URLs from tracking
272+
``captured_request_headers``: A comma-separated list of regexes to match against request headers to capture
273+
``captured_response_headers``: A comma-separated list of regexes to match against response headers to capture
274+
``sensitive_headers``: A comma-separated list of regexes to match against captured headers to be sanitized
172275
"""
173276
# initialize semantic conventions opt-in if needed
174277
_OpenTelemetrySemanticConventionStability._initialize()
@@ -205,6 +308,24 @@ def _instrument(self, **kwargs: Any):
205308
else parse_excluded_urls(excluded_urls)
206309
),
207310
sem_conv_opt_in_mode=sem_conv_opt_in_mode,
311+
captured_request_headers=kwargs.get(
312+
"captured_request_headers",
313+
get_custom_headers(
314+
OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_REQUEST
315+
),
316+
),
317+
captured_response_headers=kwargs.get(
318+
"captured_response_headers",
319+
get_custom_headers(
320+
OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_RESPONSE
321+
),
322+
),
323+
sensitive_headers=kwargs.get(
324+
"sensitive_headers",
325+
get_custom_headers(
326+
OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS
327+
),
328+
),
208329
)
209330

210331
def _uninstrument(self, **kwargs: Any):
@@ -223,6 +344,9 @@ def _instrument(
223344
response_hook: _ResponseHookT = None,
224345
excluded_urls: ExcludeList | None = None,
225346
sem_conv_opt_in_mode: _StabilityMode = _StabilityMode.DEFAULT,
347+
captured_request_headers: list[str] | None = None,
348+
captured_response_headers: list[str] | None = None,
349+
sensitive_headers: list[str] | None = None,
226350
):
227351
"""Enables tracing of all requests calls that go through
228352
:code:`urllib.Client._make_request`"""
@@ -275,6 +399,16 @@ def _instrumented_open_call(
275399
)
276400
_set_http_url(labels, url, sem_conv_opt_in_mode)
277401

402+
labels.update(
403+
get_custom_header_attributes(
404+
# TODO: safe with multiple entries for the same header?
405+
dict(request.header_items()),
406+
captured_request_headers,
407+
sensitive_headers,
408+
normalise_request_header_name,
409+
)
410+
)
411+
278412
with tracer.start_as_current_span(
279413
span_name, kind=SpanKind.CLIENT, attributes=labels
280414
) as span:
@@ -310,6 +444,16 @@ def _instrumented_open_call(
310444
labels, f"{ver_[:1]}.{ver_[:-1]}", sem_conv_opt_in_mode
311445
)
312446

447+
if span.is_recording():
448+
response_headers_to_set = get_custom_header_attributes(
449+
result.headers,
450+
captured_response_headers,
451+
sensitive_headers,
452+
normalise_response_header_name,
453+
)
454+
for header, value in response_headers_to_set.items():
455+
span.set_attribute(header, value)
456+
313457
if exception is not None and _report_new(sem_conv_opt_in_mode):
314458
span.set_attribute(ERROR_TYPE, type(exception).__qualname__)
315459
labels[ERROR_TYPE] = type(exception).__qualname__

0 commit comments

Comments
 (0)