@@ -77,6 +77,100 @@ def response_hook(span: Span, request: Request, response: HTTPResponse):
7777
7878will exclude requests such as ``https://site/client/123/info`` and ``https://site/xyz/healthcheck``.
7979
80+ Capture HTTP request and response headers
81+ *****************************************
82+ You can configure the agent to capture specified HTTP headers as span attributes, according to the
83+ `semantic conventions <https://opentelemetry.io/docs/specs/semconv/http/http-spans/#http-client-span>`_.
84+
85+ Request headers
86+ ***************
87+ To capture HTTP request headers as span attributes, set the environment variable
88+ ``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_REQUEST`` to a comma delimited list of HTTP header names.
89+
90+ For example using the environment variable,
91+ ::
92+
93+ export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_REQUEST="content-type,custom_request_header"
94+
95+ will extract ``content-type`` and ``custom_request_header`` from the request headers and add them as span attributes.
96+
97+ Request header names in urllib are case-insensitive. So, giving the header name as ``CUStom-Header`` in the environment
98+ variable will capture the header named ``custom-header``.
99+
100+ Regular expressions may also be used to match multiple headers that correspond to the given pattern. For example:
101+ ::
102+
103+ export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_REQUEST="Accept.*,X-.*"
104+
105+ Would match all request headers that start with ``Accept`` and ``X-``.
106+
107+ To capture all request headers, set ``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_REQUEST`` to ``".*"``.
108+ ::
109+
110+ export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_REQUEST=".*"
111+
112+ The name of the added span attribute will follow the format ``http.request.header.<header_name>`` where ``<header_name>``
113+ is the normalized HTTP header name (lowercase, with ``-`` replaced by ``_``). The value of the attribute will be a
114+ single item list containing all the header values.
115+
116+ For example:
117+ ``http.request.header.custom_request_header = ["<value1>", "<value2>"]``
118+
119+ .. note::
120+ Some headers are injected at a lower level by the ``http.client`` module and so are not captured by this instrumentation
121+
122+ Response headers
123+ ****************
124+ To capture HTTP response headers as span attributes, set the environment variable
125+ ``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_RESPONSE`` to a comma delimited list of HTTP header names.
126+
127+ For example using the environment variable,
128+ ::
129+
130+ export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_RESPONSE="content-type,custom_response_header"
131+
132+ will extract ``content-type`` and ``custom_response_header`` from the response headers and add them as span attributes.
133+
134+ Response header names in urllib are case-insensitive. So, giving the header name as ``CUStom-Header`` in the environment
135+ variable will capture the header named ``custom-header``.
136+
137+ Regular expressions may also be used to match multiple headers that correspond to the given pattern. For example:
138+ ::
139+
140+ export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_RESPONSE="Content.*,X-.*"
141+
142+ Would match all response headers that start with ``Content`` and ``X-``.
143+
144+ To capture all response headers, set ``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_RESPONSE`` to ``".*"``.
145+ ::
146+
147+ export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_RESPONSE=".*"
148+
149+ The name of the added span attribute will follow the format ``http.response.header.<header_name>`` where ``<header_name>``
150+ is the normalized HTTP header name (lowercase, with ``-`` replaced by ``_``). The value of the attribute will be a
151+ list containing the header values.
152+
153+ For example:
154+ ``http.response.header.custom_response_header = ["<value1>", "<value2>"]``
155+
156+ Sanitizing headers
157+ ******************
158+ In order to prevent storing sensitive data such as personally identifiable information (PII), session keys, passwords,
159+ etc, set the environment variable ``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS``
160+ to a comma delimited list of HTTP header names to be sanitized.
161+
162+ Regexes may be used, and all header names will be matched in a case-insensitive manner.
163+
164+ For example using the environment variable,
165+ ::
166+
167+ export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS=".*session.*,set-cookie"
168+
169+ will replace the value of headers such as ``session-id`` and ``set-cookie`` with ``[REDACTED]`` in the span.
170+
171+ Note:
172+ The environment variable names used to capture HTTP headers are still experimental, and thus are subject to change.
173+
80174API
81175---
82176"""
@@ -135,8 +229,15 @@ def response_hook(span: Span, request: Request, response: HTTPResponse):
135229)
136230from opentelemetry .trace import Span , SpanKind , Tracer , get_tracer
137231from opentelemetry .util .http import (
232+ OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_REQUEST ,
233+ OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_RESPONSE ,
234+ OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS ,
138235 ExcludeList ,
236+ get_custom_header_attributes ,
237+ get_custom_headers ,
139238 get_excluded_urls ,
239+ normalise_request_header_name ,
240+ normalise_response_header_name ,
140241 parse_excluded_urls ,
141242 redact_url ,
142243 sanitize_method ,
@@ -169,6 +270,9 @@ def _instrument(self, **kwargs: Any):
169270 ``response_hook``: An optional callback which is invoked right before the span is finished processing a response
170271 ``excluded_urls``: A string containing a comma-delimited
171272 list of regexes used to exclude URLs from tracking
273+ ``captured_request_headers``: A comma-separated list of regexes to match against request headers to capture
274+ ``captured_response_headers``: A comma-separated list of regexes to match against response headers to capture
275+ ``sensitive_headers``: A comma-separated list of regexes to match against captured headers to be sanitized
172276 """
173277 # initialize semantic conventions opt-in if needed
174278 _OpenTelemetrySemanticConventionStability ._initialize ()
@@ -205,6 +309,24 @@ def _instrument(self, **kwargs: Any):
205309 else parse_excluded_urls (excluded_urls )
206310 ),
207311 sem_conv_opt_in_mode = sem_conv_opt_in_mode ,
312+ captured_request_headers = kwargs .get (
313+ "captured_request_headers" ,
314+ get_custom_headers (
315+ OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_REQUEST
316+ ),
317+ ),
318+ captured_response_headers = kwargs .get (
319+ "captured_response_headers" ,
320+ get_custom_headers (
321+ OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_RESPONSE
322+ ),
323+ ),
324+ sensitive_headers = kwargs .get (
325+ "sensitive_headers" ,
326+ get_custom_headers (
327+ OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS
328+ ),
329+ ),
208330 )
209331
210332 def _uninstrument (self , ** kwargs : Any ):
@@ -223,6 +345,9 @@ def _instrument(
223345 response_hook : _ResponseHookT = None ,
224346 excluded_urls : ExcludeList | None = None ,
225347 sem_conv_opt_in_mode : _StabilityMode = _StabilityMode .DEFAULT ,
348+ captured_request_headers : list [str ] | None = None ,
349+ captured_response_headers : list [str ] | None = None ,
350+ sensitive_headers : list [str ] | None = None ,
226351):
227352 """Enables tracing of all requests calls that go through
228353 :code:`urllib.Client._make_request`"""
@@ -232,7 +357,10 @@ def _instrument(
232357 @functools .wraps (opener_open )
233358 def instrumented_open (opener , fullurl , data = None , timeout = None ):
234359 if isinstance (fullurl , str ):
235- request_ = Request (fullurl , data )
360+ # in case of multiple entries for the same header Opener.open sends the first value
361+ request_ = Request (
362+ fullurl , data , headers = dict (reversed (opener .addheaders ))
363+ )
236364 else :
237365 request_ = fullurl
238366
@@ -275,14 +403,23 @@ def _instrumented_open_call(
275403 )
276404 _set_http_url (labels , url , sem_conv_opt_in_mode )
277405
406+ headers = get_or_create_headers ()
407+ labels .update (
408+ get_custom_header_attributes (
409+ headers ,
410+ captured_request_headers ,
411+ sensitive_headers ,
412+ normalise_request_header_name ,
413+ )
414+ )
415+
278416 with tracer .start_as_current_span (
279417 span_name , kind = SpanKind .CLIENT , attributes = labels
280418 ) as span :
281419 exception = None
282420 if callable (request_hook ):
283421 request_hook (span , request )
284422
285- headers = get_or_create_headers ()
286423 inject (headers )
287424
288425 with suppress_http_instrumentation ():
@@ -310,6 +447,16 @@ def _instrumented_open_call(
310447 labels , f"{ ver_ [:1 ]} .{ ver_ [:- 1 ]} " , sem_conv_opt_in_mode
311448 )
312449
450+ if span .is_recording ():
451+ span .set_attributes (
452+ get_custom_header_attributes (
453+ result .headers ,
454+ captured_response_headers ,
455+ sensitive_headers ,
456+ normalise_response_header_name ,
457+ )
458+ )
459+
313460 if exception is not None and _report_new (sem_conv_opt_in_mode ):
314461 span .set_attribute (ERROR_TYPE , type (exception ).__qualname__ )
315462 labels [ERROR_TYPE ] = type (exception ).__qualname__
0 commit comments