diff --git a/.changelog/4551.fixed b/.changelog/4551.fixed new file mode 100644 index 0000000000..b876160c66 --- /dev/null +++ b/.changelog/4551.fixed @@ -0,0 +1 @@ +`opentelemetry-instrumentation-wsgi`: use `PATH_INFO` and `QUERY_STRING` for URL attributes instead of parsing `RAW_URI` or `REQUEST_URI` diff --git a/instrumentation/opentelemetry-instrumentation-falcon/tests/test_falcon.py b/instrumentation/opentelemetry-instrumentation-falcon/tests/test_falcon.py index 7dbf55724b..7f216a326e 100644 --- a/instrumentation/opentelemetry-instrumentation-falcon/tests/test_falcon.py +++ b/instrumentation/opentelemetry-instrumentation-falcon/tests/test_falcon.py @@ -247,7 +247,7 @@ def _test_method(self, method, old_semconv=True, new_semconv=False): SERVER_ADDRESS: "falconframework.org", URL_SCHEME: "http", SERVER_PORT: 80, - URL_PATH: "/" if self._has_fixed_http_target else "/hello", + URL_PATH: "/hello", CLIENT_PORT: 65133, NETWORK_PROTOCOL_VERSION: "1.1", "falcon.resource": "HelloWorldResource", @@ -360,7 +360,7 @@ def test_url_template_new_semconv(self): SERVER_ADDRESS: "falconframework.org", URL_SCHEME: "http", SERVER_PORT: 80, - URL_PATH: "/" if self._has_fixed_http_target else "/user/123", + URL_PATH: "/user/123", CLIENT_PORT: 65133, NETWORK_PROTOCOL_VERSION: "1.1", "falcon.resource": "UserResource", diff --git a/instrumentation/opentelemetry-instrumentation-wsgi/src/opentelemetry/instrumentation/wsgi/__init__.py b/instrumentation/opentelemetry-instrumentation-wsgi/src/opentelemetry/instrumentation/wsgi/__init__.py index aef56d0c6b..47ec15da57 100644 --- a/instrumentation/opentelemetry-instrumentation-wsgi/src/opentelemetry/instrumentation/wsgi/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-wsgi/src/opentelemetry/instrumentation/wsgi/__init__.py @@ -262,7 +262,6 @@ def response_hook(span: Span, environ: WSGIEnvironment, status: str, response_he OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST, OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE, SanitizeValue, - _parse_url_query, detect_synthetic_user_agent, get_custom_headers, normalise_request_header_name, @@ -360,7 +359,8 @@ def collect_request_attributes( if target is None: # Note: `"" or None is None` target = environ.get("REQUEST_URI") if target: - path, query = _parse_url_query(target) + path = environ.get("PATH_INFO") + query = environ.get("QUERY_STRING") _set_http_target(result, target, path, query, sem_conv_opt_in_mode) else: # old semconv v1.20.0 diff --git a/instrumentation/opentelemetry-instrumentation-wsgi/tests/test_wsgi_middleware.py b/instrumentation/opentelemetry-instrumentation-wsgi/tests/test_wsgi_middleware.py index 67d64d9486..3f4af0c9b5 100644 --- a/instrumentation/opentelemetry-instrumentation-wsgi/tests/test_wsgi_middleware.py +++ b/instrumentation/opentelemetry-instrumentation-wsgi/tests/test_wsgi_middleware.py @@ -805,6 +805,8 @@ def test_request_attributes_with_full_request_uri(self): self.environ["REQUEST_URI"] = ( "http://docs.python.org:80/3/library/urllib.parse.html?highlight=params#url-parsing" # Might happen in a CONNECT request ) + self.environ["PATH_INFO"] = "/3/library/urllib.parse.html" + self.environ["QUERY_STRING"] = "highlight=params" expected_old = { HTTP_HOST: "127.0.0.1:8080", HTTP_TARGET: "http://docs.python.org:80/3/library/urllib.parse.html?highlight=params#url-parsing", @@ -825,6 +827,34 @@ def test_request_attributes_with_full_request_uri(self): expected_new.items(), ) + def test_request_attributes_with_invalid_request_uri_uses_wsgi_environ( + self, + ): + # Previously raised ValueError when REQUEST_URI was parsed. + self.environ["REQUEST_URI"] = "http://example.com[invalid" + self.environ["PATH_INFO"] = "/safe/path" + self.environ["QUERY_STRING"] = "a=b" + + expected_old = { + HTTP_TARGET: self.environ["REQUEST_URI"], + } + expected_new = { + URL_PATH: "/safe/path", + URL_QUERY: "a=b", + } + + self.assertGreaterEqual( + otel_wsgi.collect_request_attributes(self.environ).items(), + expected_old.items(), + ) + self.assertGreaterEqual( + otel_wsgi.collect_request_attributes( + self.environ, + _StabilityMode.HTTP, + ).items(), + expected_new.items(), + ) + def test_http_user_agent_attribute(self): self.environ["HTTP_USER_AGENT"] = "test-useragent" expected = {HTTP_USER_AGENT: "test-useragent"}