Skip to content

Commit 6779621

Browse files
ezhang6811ADOT Patch workflow
andauthored
feat: allow HTTP operations to be configurable via environment variable (#718)
*Issue #, if available:* *Description of changes:* Same as aws-observability/aws-otel-java-instrumentation#1352 When HTTP span names don't contain a URL path, we generate the HTTP operation by truncating the URL path to only the first trailing value to preserve low cardinality (i.e. /api/v1/users -> /api). This can result in overly broad operation groupings for services with endpoint paths of various depths. This PR introduces an environment variable configuration, `OTEL_AWS_HTTP_OPERATION_PATHS`, which allows users to configure their own HTTP endpoint paths. If this variable is provided, the span name's URL path will resolve to the longest matching path. Wildcards are supported with the following syntaxes: `{version}`, `:version`, or simply `*`. This way, users can decide how their service endpoint are grouped into operation names shown in CloudWatch. Added unit and integration tests to verify behavior, and did some E2E testing with an instrumented HTTP server. By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. --------- Co-authored-by: ADOT Patch workflow <adot-patch-workflow@github.com>
1 parent 08aa7ab commit 6779621

4 files changed

Lines changed: 317 additions & 2 deletions

File tree

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ If your change does not need a CHANGELOG entry, add the "skip changelog" label t
1212

1313
## Unreleased
1414

15+
- feat: support environment-configured endpoint visibility for HTTP operation names
16+
([#718](https://github.com/aws-observability/aws-otel-python-instrumentation/pull/718))
1517
- fix(lambda-layer): Disable all agentic instrumentation in Lambda by default
1618
([#710](https://github.com/aws-observability/aws-otel-python-instrumentation/pull/710))
1719
- fix(genai-instrumentors): cleanup code, align with OTel GenAI semconv, add missing attributes and fix deprecated usage

aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_span_processing_util.py

Lines changed: 114 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,12 @@
2626
# Max keyword length supported by parsing into remote_operation from DB_STATEMENT
2727
MAX_KEYWORD_LENGTH = 27
2828

29+
# Environment variable for configurable operation name paths
30+
OTEL_AWS_HTTP_OPERATION_PATHS_CONFIG: str = "OTEL_AWS_HTTP_OPERATION_PATHS"
31+
32+
# Cached parsed operation paths (sorted longest first)
33+
_operation_paths: List[str] = None
34+
2935

3036
# Get dialect keywords retrieved from dialect_keywords.json file.
3137
# Only meant to be invoked by SQL_KEYWORD_PATTERN and unit tests
@@ -41,6 +47,112 @@ def _get_dialect_keywords() -> List[str]:
4147
SQL_KEYWORD_PATTERN = r"^(?:" + "|".join(_get_dialect_keywords()) + r")\b"
4248

4349

50+
def get_operation_paths() -> List[str]:
51+
"""Parse OTEL_AWS_HTTP_OPERATION_PATHS env var into a sorted list of path templates (longest first)."""
52+
global _operation_paths # pylint: disable=global-statement
53+
if _operation_paths is None:
54+
config = os.environ.get(OTEL_AWS_HTTP_OPERATION_PATHS_CONFIG)
55+
if config is None or config.strip() == "":
56+
_operation_paths = []
57+
else:
58+
paths = [p.strip() for p in config.split(",") if p.strip()]
59+
# Sort longest first (by segment count) for longest-prefix-match
60+
paths.sort(key=lambda p: len(p.split("/")), reverse=True)
61+
_operation_paths = paths
62+
return _operation_paths
63+
64+
65+
def reset_operation_paths() -> None:
66+
"""Reset cached operation paths (for testing)."""
67+
global _operation_paths # pylint: disable=global-statement
68+
_operation_paths = None
69+
70+
71+
def apply_operation_path_span_name(span: ReadableSpan) -> ReadableSpan:
72+
"""If OTEL_AWS_HTTP_OPERATION_PATHS is configured and matches, override the span name.
73+
74+
Returns the span (possibly with modified _name) if a match is found, or the original span unchanged.
75+
"""
76+
paths = get_operation_paths()
77+
if not paths:
78+
return span
79+
80+
url_path = _get_url_path(span)
81+
if url_path is None or url_path == "":
82+
return span
83+
84+
# Strip query string and fragment (relevant for http.target)
85+
for sep in ("?", "#"):
86+
idx = url_path.find(sep)
87+
if idx >= 0:
88+
url_path = url_path[:idx]
89+
90+
# Normalize trailing slashes
91+
while url_path.endswith("/") and len(url_path) > 1:
92+
url_path = url_path[:-1]
93+
94+
url_segments = url_path.split("/")
95+
for pattern in paths:
96+
normalized_pattern = pattern
97+
while normalized_pattern.endswith("/") and len(normalized_pattern) > 1:
98+
normalized_pattern = normalized_pattern[:-1]
99+
if _segments_match(url_segments, normalized_pattern.split("/")):
100+
http_method = _get_http_method(span)
101+
new_name = f"{http_method} {pattern}" if http_method else pattern
102+
# Override the span name directly
103+
span._name = new_name
104+
return span
105+
106+
return span
107+
108+
109+
def _get_url_path(span: ReadableSpan) -> str:
110+
"""Get the URL path from the span, checking url.path first, then falling back to http.target (deprecated)."""
111+
if span.attributes is None:
112+
return None
113+
url_path = span.attributes.get(SpanAttributes.URL_PATH)
114+
if url_path is not None:
115+
return url_path
116+
return span.attributes.get(SpanAttributes.HTTP_TARGET)
117+
118+
119+
def _get_http_method(span: ReadableSpan) -> str:
120+
"""Get the HTTP method from the span, checking http.request.method first, then http.method (deprecated)."""
121+
if span.attributes is None:
122+
return None
123+
method = span.attributes.get(SpanAttributes.HTTP_REQUEST_METHOD)
124+
if method is not None:
125+
return method
126+
return span.attributes.get(SpanAttributes.HTTP_METHOD)
127+
128+
129+
def _segments_match(url_segments: List[str], pattern_segments: List[str]) -> bool:
130+
"""Check if URL segments match a pattern's segments.
131+
132+
Pattern segments can use {param}, :param, or * as wildcards matching any single non-empty segment.
133+
The pattern acts as a prefix — extra URL segments after the pattern are allowed.
134+
"""
135+
for idx, ps in enumerate(pattern_segments):
136+
if idx >= len(url_segments):
137+
return False
138+
us = url_segments[idx]
139+
140+
if _is_wildcard_segment(ps):
141+
if us == "":
142+
return False
143+
continue
144+
145+
if ps != us:
146+
return False
147+
148+
return True
149+
150+
151+
def _is_wildcard_segment(segment: str) -> bool:
152+
"""A segment is a wildcard if it uses {param}, :param, or * format."""
153+
return (segment.startswith("{") and segment.endswith("}")) or segment.startswith(":") or segment == "*"
154+
155+
44156
def get_ingress_operation(__, span: ReadableSpan) -> str:
45157
"""
46158
Ingress operation (i.e. operation for Server and Consumer spans) will be generated from "http.method + http.target/
@@ -166,8 +278,8 @@ def _is_valid_operation(span: ReadableSpan, operation: str) -> bool:
166278
if operation is None or operation == UNKNOWN_OPERATION:
167279
return False
168280

169-
if is_key_present(span, SpanAttributes.HTTP_METHOD):
170-
http_method: str = span.attributes.get(SpanAttributes.HTTP_METHOD)
281+
http_method: str = _get_http_method(span)
282+
if http_method:
171283
return operation != http_method
172284

173285
return True

aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_span_metrics_processor.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from typing_extensions import override
66

77
from amazon.opentelemetry.distro._aws_attribute_keys import AWS_REMOTE_SERVICE
8+
from amazon.opentelemetry.distro._aws_span_processing_util import apply_operation_path_span_name
89
from amazon.opentelemetry.distro.metric_attribute_generator import MetricAttributeGenerator
910
from amazon.opentelemetry.distro.sampler.aws_xray_remote_sampler import AwsXRayRemoteSampler
1011
from opentelemetry.context import Context
@@ -86,6 +87,10 @@ def on_start(self, span: Span, parent_context: Optional[Context] = None) -> None
8687

8788
@override
8889
def on_end(self, span: ReadableSpan) -> None:
90+
# If OTEL_AWS_HTTP_OPERATION_PATHS is configured, override the span name
91+
# so that metrics use the configured operation path instead of the original span name.
92+
span = apply_operation_path_span_name(span)
93+
8994
attribute_dict: Dict[str, BoundedAttributes] = self._generator.generate_metric_attributes_dict_from_span(
9095
span, self._resource
9196
)

aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_span_processing_util.py

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,3 +381,199 @@ def test_sql_dialect_keywords_max_length(self):
381381
keywords: List[str] = _get_dialect_keywords()
382382
for keyword in keywords:
383383
self.assertLessEqual(len(keyword), MAX_KEYWORD_LENGTH)
384+
385+
386+
class TestOperationPaths(TestCase):
387+
"""Tests for OTEL_AWS_HTTP_OPERATION_PATHS and apply_operation_path_span_name."""
388+
389+
def setUp(self):
390+
from amazon.opentelemetry.distro._aws_span_processing_util import reset_operation_paths
391+
392+
reset_operation_paths()
393+
self.span_mock = MagicMock()
394+
self.span_mock.attributes = {}
395+
self.span_mock.name = "GET /api"
396+
self.span_mock._name = "GET /api"
397+
398+
def tearDown(self):
399+
from amazon.opentelemetry.distro._aws_span_processing_util import reset_operation_paths
400+
401+
reset_operation_paths()
402+
403+
# --- _segments_match tests ---
404+
405+
def test_segments_match_exact(self):
406+
from amazon.opentelemetry.distro._aws_span_processing_util import _segments_match
407+
408+
self.assertTrue(_segments_match("/api/contests".split("/"), "/api/contests".split("/")))
409+
410+
def test_segments_match_no_match(self):
411+
from amazon.opentelemetry.distro._aws_span_processing_util import _segments_match
412+
413+
self.assertFalse(_segments_match("/api/players".split("/"), "/api/contests".split("/")))
414+
415+
def test_segments_match_extra_url_segments_allowed(self):
416+
from amazon.opentelemetry.distro._aws_span_processing_util import _segments_match
417+
418+
self.assertTrue(_segments_match("/api/contests/123/extra".split("/"), "/api/contests".split("/")))
419+
420+
def test_segments_match_pattern_longer_than_url(self):
421+
from amazon.opentelemetry.distro._aws_span_processing_util import _segments_match
422+
423+
self.assertFalse(_segments_match("/api".split("/"), "/api/contests/{id}".split("/")))
424+
425+
def test_segments_match_curly_brace_wildcard(self):
426+
from amazon.opentelemetry.distro._aws_span_processing_util import _segments_match
427+
428+
self.assertTrue(_segments_match("/api/contests/123".split("/"), "/api/contests/{id}".split("/")))
429+
430+
def test_segments_match_colon_param_wildcard(self):
431+
from amazon.opentelemetry.distro._aws_span_processing_util import _segments_match
432+
433+
self.assertTrue(_segments_match("/api/users/42".split("/"), "/api/users/:userId".split("/")))
434+
435+
def test_segments_match_star_wildcard(self):
436+
from amazon.opentelemetry.distro._aws_span_processing_util import _segments_match
437+
438+
self.assertTrue(
439+
_segments_match("/api/contests/123/leaderboard".split("/"), "/api/contests/*/leaderboard".split("/"))
440+
)
441+
442+
def test_segments_match_wildcard_does_not_match_empty(self):
443+
from amazon.opentelemetry.distro._aws_span_processing_util import _segments_match
444+
445+
self.assertFalse(_segments_match("/api/contests/".split("/"), "/api/contests/{id}".split("/")))
446+
447+
# --- apply_operation_path_span_name tests ---
448+
449+
@patch.dict(
450+
os.environ,
451+
{"OTEL_AWS_HTTP_OPERATION_PATHS": "/api/contests/{id}/leaderboard, /api/contests/{id}, /api/contests"},
452+
)
453+
def test_apply_matches_url_path(self):
454+
from amazon.opentelemetry.distro._aws_span_processing_util import (
455+
apply_operation_path_span_name,
456+
reset_operation_paths,
457+
)
458+
459+
reset_operation_paths()
460+
self.span_mock.attributes = {"url.path": "/api/contests/123/leaderboard", "http.request.method": "GET"}
461+
result = apply_operation_path_span_name(self.span_mock)
462+
self.assertEqual(result._name, "GET /api/contests/{id}/leaderboard")
463+
464+
@patch.dict(os.environ, {"OTEL_AWS_HTTP_OPERATION_PATHS": "/api/teams/{id}, /api/teams"})
465+
def test_apply_matches_http_target_with_query(self):
466+
from amazon.opentelemetry.distro._aws_span_processing_util import (
467+
apply_operation_path_span_name,
468+
reset_operation_paths,
469+
)
470+
471+
reset_operation_paths()
472+
self.span_mock.attributes = {"http.target": "/api/teams/5?include=roster", "http.request.method": "GET"}
473+
result = apply_operation_path_span_name(self.span_mock)
474+
self.assertEqual(result._name, "GET /api/teams/{id}")
475+
476+
@patch.dict(
477+
os.environ,
478+
{"OTEL_AWS_HTTP_OPERATION_PATHS": "/api/contests/{id}/leaderboard, /api/contests/{id}, /api/contests, /api"},
479+
)
480+
def test_apply_longest_match_wins(self):
481+
from amazon.opentelemetry.distro._aws_span_processing_util import (
482+
apply_operation_path_span_name,
483+
reset_operation_paths,
484+
)
485+
486+
reset_operation_paths()
487+
self.span_mock.attributes = {"url.path": "/api/contests/42", "http.request.method": "GET"}
488+
result = apply_operation_path_span_name(self.span_mock)
489+
self.assertEqual(result._name, "GET /api/contests/{id}")
490+
491+
@patch.dict(os.environ, {"OTEL_AWS_HTTP_OPERATION_PATHS": "/api/contests/{id}"})
492+
def test_apply_no_match_returns_original(self):
493+
from amazon.opentelemetry.distro._aws_span_processing_util import (
494+
apply_operation_path_span_name,
495+
reset_operation_paths,
496+
)
497+
498+
reset_operation_paths()
499+
self.span_mock.attributes = {"url.path": "/unknown/path", "http.request.method": "GET"}
500+
result = apply_operation_path_span_name(self.span_mock)
501+
self.assertEqual(result._name, "GET /api") # unchanged
502+
503+
def test_apply_empty_config_returns_original(self):
504+
from amazon.opentelemetry.distro._aws_span_processing_util import apply_operation_path_span_name
505+
506+
result = apply_operation_path_span_name(self.span_mock)
507+
self.assertIs(result, self.span_mock)
508+
509+
@patch.dict(os.environ, {"OTEL_AWS_HTTP_OPERATION_PATHS": "/api/contests"})
510+
def test_apply_no_http_method(self):
511+
from amazon.opentelemetry.distro._aws_span_processing_util import (
512+
apply_operation_path_span_name,
513+
reset_operation_paths,
514+
)
515+
516+
reset_operation_paths()
517+
self.span_mock.attributes = {"url.path": "/api/contests"}
518+
result = apply_operation_path_span_name(self.span_mock)
519+
self.assertEqual(result._name, "/api/contests")
520+
521+
@patch.dict(os.environ, {"OTEL_AWS_HTTP_OPERATION_PATHS": "/api/v1/{userId}, /api/{version}/user1"})
522+
def test_apply_same_length_first_config_wins(self):
523+
from amazon.opentelemetry.distro._aws_span_processing_util import (
524+
apply_operation_path_span_name,
525+
reset_operation_paths,
526+
)
527+
528+
reset_operation_paths()
529+
self.span_mock.attributes = {"url.path": "/api/v1/user1", "http.request.method": "GET"}
530+
result = apply_operation_path_span_name(self.span_mock)
531+
self.assertEqual(result._name, "GET /api/v1/{userId}")
532+
533+
@patch.dict(os.environ, {"OTEL_AWS_HTTP_OPERATION_PATHS": "/api/contests"})
534+
def test_apply_trailing_slash_normalized(self):
535+
from amazon.opentelemetry.distro._aws_span_processing_util import (
536+
apply_operation_path_span_name,
537+
reset_operation_paths,
538+
)
539+
540+
reset_operation_paths()
541+
self.span_mock.attributes = {"url.path": "/api/contests/", "http.request.method": "GET"}
542+
result = apply_operation_path_span_name(self.span_mock)
543+
self.assertEqual(result._name, "GET /api/contests")
544+
545+
@patch.dict(os.environ, {"OTEL_AWS_HTTP_OPERATION_PATHS": "/api/contests"})
546+
def test_apply_query_string_stripped(self):
547+
from amazon.opentelemetry.distro._aws_span_processing_util import (
548+
apply_operation_path_span_name,
549+
reset_operation_paths,
550+
)
551+
552+
reset_operation_paths()
553+
self.span_mock.attributes = {"url.path": "/api/contests?page=1&size=10", "http.request.method": "GET"}
554+
result = apply_operation_path_span_name(self.span_mock)
555+
self.assertEqual(result._name, "GET /api/contests")
556+
557+
@patch.dict(os.environ, {"OTEL_AWS_HTTP_OPERATION_PATHS": "/api/users/:userId/stats"})
558+
def test_apply_colon_param_in_config(self):
559+
from amazon.opentelemetry.distro._aws_span_processing_util import (
560+
apply_operation_path_span_name,
561+
reset_operation_paths,
562+
)
563+
564+
reset_operation_paths()
565+
self.span_mock.attributes = {"url.path": "/api/users/42/stats", "http.request.method": "GET"}
566+
result = apply_operation_path_span_name(self.span_mock)
567+
self.assertEqual(result._name, "GET /api/users/:userId/stats")
568+
569+
@patch.dict(os.environ, {"OTEL_AWS_HTTP_OPERATION_PATHS": "/api/*/users/*"})
570+
def test_apply_star_wildcard_in_config(self):
571+
from amazon.opentelemetry.distro._aws_span_processing_util import (
572+
apply_operation_path_span_name,
573+
reset_operation_paths,
574+
)
575+
576+
reset_operation_paths()
577+
self.span_mock.attributes = {"url.path": "/api/v2/users/42", "http.request.method": "GET"}
578+
result = apply_operation_path_span_name(self.span_mock)
579+
self.assertEqual(result._name, "GET /api/*/users/*")

0 commit comments

Comments
 (0)