Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion sdk/core/corehttp/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,18 @@

### Features Added

- Introduced the keyword argument `additional_allowed_query_params` to `DistributedHttpTracingPolicy` to allow users to specify additional URL query parameters that should not be redacted in span attributes. [#46657](https://github.com/Azure/azure-sdk-for-python/pull/46657)

### Breaking Changes

### Bugs Fixed

- Fixed `PipelineClient.format_url` to avoid adding trailing slashes when the URL template contains only query parameters. #45113
- Fixed `PipelineClient.format_url` to avoid adding trailing slashes when the URL template contains only query parameters. [#45113](https://github.com/Azure/azure-sdk-for-python/pull/45113)

### Other Changes

- URL attributes in HTTP tracing spans will now have query parameters sanitized by default. To add additional query parameters that should not be redacted, use the `additional_allowed_query_params` argument when constructing the `DistributedHttpTracingPolicy`. [#46657](https://github.com/Azure/azure-sdk-for-python/pull/46657)

## 1.0.0b7 (2026-02-05)

### Features Added
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,17 @@
from __future__ import annotations
import logging
import urllib.parse
from typing import Any, Optional, Tuple, Union, Type, Mapping, Dict, TYPE_CHECKING
from typing import Any, Iterable, MutableSet, Optional, Tuple, Union, Type, Mapping, Dict, TYPE_CHECKING
from types import TracebackType

from ...rest import HttpRequest
from ...rest._rest_py3 import _HttpResponseBase as SansIOHttpResponse
from ._base import SansIOHTTPPolicy
from ._utils import sanitize_url
from ...settings import settings
from ...instrumentation.tracing._models import SpanKind, TracingOptions
from ...instrumentation.tracing._tracer import get_tracer
from ...utils._utils import CaseInsensitiveSet

if TYPE_CHECKING:
from ..pipeline import PipelineRequest, PipelineResponse
Expand All @@ -31,11 +33,17 @@ class DistributedHttpTracingPolicy(SansIOHTTPPolicy[HttpRequest, SansIOHttpRespo

:keyword instrumentation_config: Configuration for the instrumentation providers.
:type instrumentation_config: dict[str, Any]
:keyword additional_allowed_query_params: Query parameter names whose values are allowed in recorded URLs.
These are added to the default set which includes "api-version".
:type additional_allowed_query_params: Iterable[str]
"""

TRACING_CONTEXT = "TRACING_CONTEXT"
_SUPPRESSION_TOKEN = "SUPPRESSION_TOKEN"

DEFAULT_QUERY_PARAMS_ALLOWLIST: frozenset[str] = frozenset(["api-version"])
_REDACTED_PLACEHOLDER = "REDACTED"

# Attribute names
_HTTP_RESEND_COUNT = "http.request.resend_count"
_USER_AGENT_ORIGINAL = "user_agent.original"
Expand All @@ -50,9 +58,13 @@ def __init__( # pylint: disable=unused-argument
self,
*,
instrumentation_config: Optional[Mapping[str, Any]] = None,
additional_allowed_query_params: Optional[Iterable[str]] = None,
**kwargs: Any,
) -> None:
self._instrumentation_config = instrumentation_config
self.allowed_query_params: MutableSet[str] = CaseInsensitiveSet(self.__class__.DEFAULT_QUERY_PARAMS_ALLOWLIST)
if additional_allowed_query_params:
self.allowed_query_params.update(additional_allowed_query_params)

def on_request(self, request: PipelineRequest[HttpRequest]) -> None:
"""Starts a span for the network call.
Expand Down Expand Up @@ -149,7 +161,7 @@ def _set_http_client_span_attributes(
"""
attributes: Dict[str, Any] = {
self._HTTP_REQUEST_METHOD: request.method,
self._URL_FULL: request.url,
self._URL_FULL: sanitize_url(request.url, self.allowed_query_params, self._REDACTED_PLACEHOLDER),
}

parsed_url = urllib.parse.urlparse(request.url)
Expand Down
38 changes: 36 additions & 2 deletions sdk/core/corehttp/corehttp/runtime/policies/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,12 @@
from __future__ import annotations
import datetime
import email.utils
from typing import Optional, cast, Union, TYPE_CHECKING
import urllib.parse
from typing import AbstractSet, Optional, cast, Union, TYPE_CHECKING
from urllib.parse import urlparse

from ...rest import HttpResponse, AsyncHttpResponse, HttpRequest
from ...utils._utils import case_insensitive_dict
from ...utils._utils import case_insensitive_dict, CaseInsensitiveSet

if TYPE_CHECKING:
from ...runtime.pipeline import PipelineResponse
Expand Down Expand Up @@ -92,3 +93,36 @@ def get_domain(url: str) -> str:
:return: The domain of the url.
"""
return str(urlparse(url).netloc).lower()


def sanitize_url(url: str, allowed_query_params: AbstractSet[str], redacted_placeholder: str = "REDACTED") -> str:
"""Redact query parameter values not in the allowlist.

:param str url: The URL to sanitize.
:param set[str] allowed_query_params: Set of query parameter names whose values should not be redacted.
If a :class:`~corehttp.utils._utils.CaseInsensitiveSet` is provided, lookups are case-insensitive
without per-call normalization.
:param str redacted_placeholder: The placeholder to use for redacted values.
:return: The sanitized URL with redacted query parameter values.
:rtype: str
"""
parsed_url = urllib.parse.urlparse(url)
if not parsed_url.query:
return url

if not isinstance(allowed_query_params, CaseInsensitiveSet):
allowed_query_params = CaseInsensitiveSet(allowed_query_params)

parts = []
for param in parsed_url.query.split("&"):
eq_idx = param.find("=")
if eq_idx == -1:
# No value to redact, keep as-is.
parts.append(param)
else:
key = param[:eq_idx]
parts.append(param if key in allowed_query_params else f"{key}={redacted_placeholder}")

sanitized_query = "&".join(parts)
sanitized_url = parsed_url._replace(query=sanitized_query)
return urllib.parse.urlunparse(sanitized_url)
40 changes: 40 additions & 0 deletions sdk/core/corehttp/corehttp/utils/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
Iterator,
Mapping,
MutableMapping,
MutableSet,
Optional,
Tuple,
Union,
Expand Down Expand Up @@ -134,6 +135,45 @@ def __repr__(self) -> str:
return str(dict(self.items()))


class CaseInsensitiveSet(MutableSet[str]):
"""A set that stores values in their original form but performs
case-insensitive lookups via a pre-computed lowercase cache.

:param data: Initial values for the set.
:type data: Iterable[str]
"""

def __init__(self, data: Optional[Iterable[str]] = None) -> None:
self._lower_to_original: Dict[str, str] = {}
if data:
for item in data:
self.add(item)

def __contains__(self, item: object) -> bool:
if not isinstance(item, str):
return False
return item.lower() in self._lower_to_original

def __iter__(self) -> Iterator[str]:
return iter(self._lower_to_original.values())

def __len__(self) -> int:
return len(self._lower_to_original)

def add(self, value: str) -> None:
lower = value.lower()
if lower not in self._lower_to_original:
self._lower_to_original[lower] = value

def discard(self, value: str) -> None:
self._lower_to_original.pop(value.lower(), None)

def update(self, *others: Iterable[str]) -> None:
for other in others:
for item in other:
self.add(item)


def get_file_items(files: "FilesType") -> Sequence[Tuple[str, "FileType"]]:
if isinstance(files, Mapping):
# casting because ItemsView technically isn't a Sequence, even
Expand Down
101 changes: 98 additions & 3 deletions sdk/core/corehttp/tests/test_tracing_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def test_distributed_tracing_policy(tracing_helper, http_response):
assert traceparent.split("-")[2] == format_span_id(span_context.span_id)

assert finished_spans[0].attributes.get(policy._HTTP_REQUEST_METHOD) == "GET"
assert finished_spans[0].attributes.get(policy._URL_FULL) == "http://localhost/temp?query=query"
assert finished_spans[0].attributes.get(policy._URL_FULL) == "http://localhost/temp?query=REDACTED"
assert finished_spans[0].attributes.get(policy._SERVER_ADDRESS) == "localhost"
assert finished_spans[0].attributes.get(policy._USER_AGENT_ORIGINAL) is None
assert finished_spans[0].attributes.get(policy._HTTP_RESPONSE_STATUS_CODE) == 202
Expand Down Expand Up @@ -138,7 +138,7 @@ def test_distributed_tracing_policy_with_user_agent_policy(tracing_helper, http_
assert traceparent.split("-")[2] == format_span_id(span_context.span_id)

assert finished_spans[0].attributes.get(policy._HTTP_REQUEST_METHOD) == "GET"
assert finished_spans[0].attributes.get(policy._URL_FULL) == "http://localhost/temp?query=query"
assert finished_spans[0].attributes.get(policy._URL_FULL) == "http://localhost/temp?query=REDACTED"
assert finished_spans[0].attributes.get(policy._SERVER_ADDRESS) == "localhost"
assert finished_spans[0].attributes.get(policy._USER_AGENT_ORIGINAL) is not None
assert finished_spans[0].attributes.get(policy._USER_AGENT_ORIGINAL).endswith("test-user-agent")
Expand Down Expand Up @@ -221,7 +221,7 @@ def test_distributed_tracing_policy_with_tracing_options(tracing_helper, http_re
assert finished_spans[0].parent is root_span.get_span_context()

assert finished_spans[0].attributes.get(policy._HTTP_REQUEST_METHOD) == "GET"
assert finished_spans[0].attributes.get(policy._URL_FULL) == "http://localhost/temp?query=query"
assert finished_spans[0].attributes.get(policy._URL_FULL) == "http://localhost/temp?query=REDACTED"
assert finished_spans[0].attributes.get(policy._SERVER_ADDRESS) == "localhost"
assert finished_spans[0].attributes.get(policy._USER_AGENT_ORIGINAL) is None
assert finished_spans[0].attributes.get(policy._HTTP_RESPONSE_STATUS_CODE) == 202
Expand Down Expand Up @@ -268,3 +268,98 @@ def test_suppress_http_auto_instrumentation(port, tracing_helper):
assert finished_spans[0].attributes.get(policy._HTTP_RESPONSE_STATUS_CODE) == 200

requests_instrumentor.uninstrument()


@pytest.mark.parametrize("http_response", HTTP_RESPONSES)
def test_url_full_sanitized_default(tracing_helper, http_response):
"""Test that url.full redacts query params not in the default allowlist."""
with tracing_helper.tracer.start_as_current_span("Root"):
policy = DistributedHttpTracingPolicy()

request = HttpRequest("GET", "http://localhost/temp?api-version=2024-01-01&secret=mysecret&token=abc")
pipeline_request = PipelineRequest(request, PipelineContext(None))
policy.on_request(pipeline_request)

response = create_http_response(http_response, request, None, headers=request.headers, status_code=200)
policy.on_response(pipeline_request, PipelineResponse(request, response, PipelineContext(None)))

finished_spans = tracing_helper.exporter.get_finished_spans()
assert (
finished_spans[0].attributes.get(policy._URL_FULL)
== "http://localhost/temp?api-version=2024-01-01&secret=REDACTED&token=REDACTED"
)


@pytest.mark.parametrize("http_response", HTTP_RESPONSES)
def test_url_full_sanitized_custom_allowed(tracing_helper, http_response):
"""Test that custom additional_allowed_query_params are additive to the default allowlist."""
with tracing_helper.tracer.start_as_current_span("Root"):
policy = DistributedHttpTracingPolicy(additional_allowed_query_params=["token"])

request = HttpRequest("GET", "http://localhost/temp?api-version=2024-01-01&secret=mysecret&token=abc")
pipeline_request = PipelineRequest(request, PipelineContext(None))
policy.on_request(pipeline_request)

response = create_http_response(http_response, request, None, headers=request.headers, status_code=200)
policy.on_response(pipeline_request, PipelineResponse(request, response, PipelineContext(None)))

finished_spans = tracing_helper.exporter.get_finished_spans()
assert (
finished_spans[0].attributes.get(policy._URL_FULL)
== "http://localhost/temp?api-version=2024-01-01&secret=REDACTED&token=abc"
)


@pytest.mark.parametrize("http_response", HTTP_RESPONSES)
def test_url_full_sanitized_case_insensitive(tracing_helper, http_response):
"""Test that additional_allowed_query_params matching is case-insensitive."""
with tracing_helper.tracer.start_as_current_span("Root"):
policy = DistributedHttpTracingPolicy(additional_allowed_query_params=["MyParam"])

request = HttpRequest("GET", "http://localhost/temp?myparam=value1&other=value2")
pipeline_request = PipelineRequest(request, PipelineContext(None))
policy.on_request(pipeline_request)

response = create_http_response(http_response, request, None, headers=request.headers, status_code=200)
policy.on_response(pipeline_request, PipelineResponse(request, response, PipelineContext(None)))

finished_spans = tracing_helper.exporter.get_finished_spans()
assert finished_spans[0].attributes.get(policy._URL_FULL) == "http://localhost/temp?myparam=value1&other=REDACTED"


@pytest.mark.parametrize("http_response", HTTP_RESPONSES)
def test_url_full_no_query_params(tracing_helper, http_response):
"""Test that URLs without query params are unchanged."""
with tracing_helper.tracer.start_as_current_span("Root"):
policy = DistributedHttpTracingPolicy()

request = HttpRequest("GET", "http://localhost/temp")
pipeline_request = PipelineRequest(request, PipelineContext(None))
policy.on_request(pipeline_request)

response = create_http_response(http_response, request, None, headers=request.headers, status_code=200)
policy.on_response(pipeline_request, PipelineResponse(request, response, PipelineContext(None)))

finished_spans = tracing_helper.exporter.get_finished_spans()
assert finished_spans[0].attributes.get(policy._URL_FULL) == "http://localhost/temp"


@pytest.mark.parametrize("http_response", HTTP_RESPONSES)
def test_url_full_allowed_query_params_additive(tracing_helper, http_response):
"""Test that allowed_query_params can be updated after construction."""
with tracing_helper.tracer.start_as_current_span("Root"):
policy = DistributedHttpTracingPolicy()
policy.allowed_query_params.add("custom")

request = HttpRequest("GET", "http://localhost/temp?api-version=v1&custom=val&other=secret")
pipeline_request = PipelineRequest(request, PipelineContext(None))
policy.on_request(pipeline_request)

response = create_http_response(http_response, request, None, headers=request.headers, status_code=200)
policy.on_response(pipeline_request, PipelineResponse(request, response, PipelineContext(None)))

finished_spans = tracing_helper.exporter.get_finished_spans()
assert (
finished_spans[0].attributes.get(policy._URL_FULL)
== "http://localhost/temp?api-version=v1&custom=val&other=REDACTED"
)
Loading
Loading