diff --git a/manifests/cpp_httpd.yml b/manifests/cpp_httpd.yml index 766eb583297..a73ea21524a 100644 --- a/manifests/cpp_httpd.yml +++ b/manifests/cpp_httpd.yml @@ -135,6 +135,7 @@ manifest: tests/test_protobuf.py: missing_feature tests/test_resource_renaming.py: missing_feature tests/test_rum_injection.py: irrelevant (RUM injection only supported for Java) + tests/test_sampling_rate_capping.py::Test_SamplingRateCappedIncrease: missing_feature tests/test_sampling_rates.py::Test_SampleRateFunction: v1.0.0 # real version unknown tests/test_sampling_rates.py::Test_SampleRateFunction::test_sample_rate_function: missing_feature (/sample_rate_route is not implemented) tests/test_sampling_rates.py::Test_SamplingDecisionAdded: v1.0.0 # real version unknown diff --git a/manifests/cpp_kong.yml b/manifests/cpp_kong.yml index 88fe2fcc098..bc42b037a04 100644 --- a/manifests/cpp_kong.yml +++ b/manifests/cpp_kong.yml @@ -27,6 +27,7 @@ manifest: tests/test_protobuf.py: missing_feature tests/test_resource_renaming.py: missing_feature tests/test_rum_injection.py: irrelevant + tests/test_sampling_rate_capping.py::Test_SamplingRateCappedIncrease: missing_feature tests/test_sampling_rates.py: missing_feature tests/test_scrubbing.py: irrelevant tests/test_semantic_conventions.py: missing_feature diff --git a/manifests/cpp_nginx.yml b/manifests/cpp_nginx.yml index 6b21e5f0d89..fe617fe7d81 100644 --- a/manifests/cpp_nginx.yml +++ b/manifests/cpp_nginx.yml @@ -362,6 +362,7 @@ manifest: tests/test_protobuf.py: missing_feature tests/test_resource_renaming.py: missing_feature tests/test_rum_injection.py: irrelevant (RUM injection only supported for Java) + tests/test_sampling_rate_capping.py::Test_SamplingRateCappedIncrease: missing_feature tests/test_sampling_rates.py::Test_SampleRateFunction: v1.0.0 # real version unknown tests/test_sampling_rates.py::Test_SamplingDecisionAdded: v1.0.0 # real version unknown tests/test_sampling_rates.py::Test_SamplingDecisions: v1.0.0 # real version unknown diff --git a/manifests/dotnet.yml b/manifests/dotnet.yml index 84b3ed15b4f..cf5a65083b1 100644 --- a/manifests/dotnet.yml +++ b/manifests/dotnet.yml @@ -1094,6 +1094,7 @@ manifest: tests/test_protobuf.py::Test_Protobuf: v3.15.0 tests/test_resource_renaming.py: missing_feature tests/test_rum_injection.py: irrelevant (RUM injection only supported for Java) + tests/test_sampling_rate_capping.py::Test_SamplingRateCappedIncrease: missing_feature tests/test_sampling_rates.py::Test_SampleRateFunction: v3.11.1 # real version unknown tests/test_sampling_rates.py::Test_SamplingDecisionAdded: v3.11.1 # real version unknown tests/test_sampling_rates.py::Test_SamplingDecisions: v3.11.1 # real version unknown diff --git a/manifests/golang.yml b/manifests/golang.yml index 1002edf1b15..df8d35d61f9 100644 --- a/manifests/golang.yml +++ b/manifests/golang.yml @@ -1316,6 +1316,7 @@ manifest: net-http: v2.4.0 net-http-orchestrion: v2.4.0 tests/test_rum_injection.py: irrelevant (RUM injection only supported for Java) + tests/test_sampling_rate_capping.py::Test_SamplingRateCappedIncrease: v2.7.0-dev tests/test_sampling_rates.py::Test_SampleRateFunction: v1.72.1 # real version unknown tests/test_sampling_rates.py::Test_SamplingDecisionAdded: v1.72.1 # real version unknown tests/test_sampling_rates.py::Test_SamplingDecisions: v1.72.1 # real version unknown diff --git a/manifests/java.yml b/manifests/java.yml index 68a75b57b3d..e637f673f03 100644 --- a/manifests/java.yml +++ b/manifests/java.yml @@ -4056,6 +4056,7 @@ manifest: - weblog_declaration: "*": irrelevant (RUM injection only supported for spring-boot) spring-boot: v1.49.0 + tests/test_sampling_rate_capping.py::Test_SamplingRateCappedIncrease: v1.60.0-SNAPSHOT+c95286e6a5 tests/test_sampling_rates.py::Test_SampleRateFunction: - weblog_declaration: "*": v1.49.0 diff --git a/manifests/nodejs.yml b/manifests/nodejs.yml index 8f3338ed646..1fb14eeef64 100644 --- a/manifests/nodejs.yml +++ b/manifests/nodejs.yml @@ -2212,6 +2212,7 @@ manifest: nextjs: missing_feature tests/test_resource_renaming.py::Test_Resource_Renaming_Stats_Aggregation_Keys::test_stats_aggregation_with_method_and_endpoint: flaky (APPSEC-61406) tests/test_rum_injection.py: irrelevant (RUM injection only supported for Java) + tests/test_sampling_rate_capping.py::Test_SamplingRateCappedIncrease: missing_feature tests/test_sampling_rates.py::Test_SampleRateFunction: *ref_5_54_0 tests/test_sampling_rates.py::Test_SamplingDecisionAdded: *ref_5_17_0 tests/test_sampling_rates.py::Test_SamplingDecisions: *ref_5_54_0 diff --git a/manifests/php.yml b/manifests/php.yml index 64db4eba274..1da726152d2 100644 --- a/manifests/php.yml +++ b/manifests/php.yml @@ -935,6 +935,7 @@ manifest: tests/test_resource_renaming.py: v1.14.0 tests/test_resource_renaming.py::Test_Resource_Renaming_Stats_Aggregation_Keys: missing_feature tests/test_rum_injection.py: irrelevant (RUM injection only supported for Java) + tests/test_sampling_rate_capping.py::Test_SamplingRateCappedIncrease: missing_feature tests/test_sampling_rates.py::Test_SampleRateFunction: v1.13.0+4663b2fa7c20c6920f347d059b57dc2a419cb7f7 tests/test_sampling_rates.py::Test_SamplingDecisionAdded: v1.7.2 # real version unknown tests/test_sampling_rates.py::Test_SamplingDecisions: v1.13.0+4663b2fa7c20c6920f347d059b57dc2a419cb7f7 diff --git a/manifests/python.yml b/manifests/python.yml index 3bf2a70bba3..35bcf795bec 100644 --- a/manifests/python.yml +++ b/manifests/python.yml @@ -1961,6 +1961,7 @@ manifest: tests/test_resource_renaming.py::Test_Resource_Renaming_Stats_Aggregation_Keys: v3.17.0 tests/test_resource_renaming.py::Test_Resource_Renaming_Stats_Aggregation_Keys::test_stats_aggregation_with_method_and_endpoint: v4.5.0 tests/test_rum_injection.py: irrelevant (RUM injection only supported for Java) + tests/test_sampling_rate_capping.py::Test_SamplingRateCappedIncrease: missing_feature tests/test_sampling_rates.py::Test_SampleRateFunction: - weblog_declaration: "*": v3.6.0+dev diff --git a/manifests/ruby.yml b/manifests/ruby.yml index b48f3f19929..893c1f3df24 100644 --- a/manifests/ruby.yml +++ b/manifests/ruby.yml @@ -1840,6 +1840,7 @@ manifest: uds-sinatra: irrelevant tests/test_resource_renaming.py::Test_Resource_Renaming_Stats_Aggregation_Keys: missing_feature tests/test_rum_injection.py: irrelevant (RUM injection only supported for Java) + tests/test_sampling_rate_capping.py::Test_SamplingRateCappedIncrease: missing_feature tests/test_sampling_rates.py::Test_SampleRateFunction: v2.15.0 # real version unknown tests/test_sampling_rates.py::Test_SamplingDecisionAdded: v2.12.1 # real version unknown tests/test_sampling_rates.py::Test_SamplingDecisions: v2.15.0 # real version unknown diff --git a/tests/test_sampling_rate_capping.py b/tests/test_sampling_rate_capping.py new file mode 100644 index 00000000000..a85b35b514d --- /dev/null +++ b/tests/test_sampling_rate_capping.py @@ -0,0 +1,125 @@ +# Unless explicitly stated otherwise all files in this repository are licensed under the the Apache License Version 2.0. +# This product includes software developed at Datadog (https://www.datadoghq.com/). +# Copyright 2021 Datadog, Inc. + +import time + +import requests + +from utils import weblog, interfaces, scenarios, features, logger +from utils.proxy.mocked_response import ( + MOCKED_TRACER_RESPONSES_PATH, + SequentialJsonMockedTracerResponse, + _get_proxy_domain, +) +from utils.proxy.ports import ProxyPorts + + +LOW_RATE = 0.1 +HIGH_RATE = 1.0 + + +def _send_mocked_tracer_responses(mocks: list) -> None: + """Send multiple mocked tracer responses in a single PUT request.""" + domain = _get_proxy_domain() + response = requests.put( + f"http://{domain}:{ProxyPorts.proxy_commands}{MOCKED_TRACER_RESPONSES_PATH}", + json=[m.to_json() for m in mocks], + timeout=30, + ) + response.raise_for_status() + + +@scenarios.sampling_rate_capping +@features.ensure_that_sampling_is_consistent_across_languages +class Test_SamplingRateCappedIncrease: + """When the agent returns a new higher sampling rate, the tracer should not jump directly + to the new rate. Instead, it should cap increases to 2x per flush interval, ramping up + gradually (e.g. 0.1 -> 0.2 -> 0.4 -> 0.8 -> 1.0). + """ + + NUM_LOW_RATE_RESPONSES = 3 + NUM_HIGH_RATE_RESPONSES = 30 + + def setup_sampling_rate_capped_increase(self): + low_rate_response = {"rate_by_service": {"service:,env:": LOW_RATE}} + high_rate_response = {"rate_by_service": {"service:,env:": HIGH_RATE}} + + sequence = [low_rate_response] * self.NUM_LOW_RATE_RESPONSES + [ + high_rate_response + ] * self.NUM_HIGH_RATE_RESPONSES + + # Send mocks for both trace endpoints in one call to avoid overwriting + mocks = [ + SequentialJsonMockedTracerResponse(path="/v0.4/traces", mocked_json_sequence=sequence), + SequentialJsonMockedTracerResponse(path="/v0.5/traces", mocked_json_sequence=sequence), + ] + _send_mocked_tracer_responses(mocks) + + # Generate initial traffic until the tracer picks up the low rate + for i in range(40): + weblog.get(f"/sample_rate_route/{i}") + + # Wait for a span with the low agent_psr to appear + def wait_for_low_rate(_data: dict) -> bool: + for _, span in interfaces.library.get_root_spans(): + agent_psr = span.get("metrics", {}).get("_dd.agent_psr") + if agent_psr is not None and abs(agent_psr - LOW_RATE) < 0.01: + return True + return False + + interfaces.library.wait_for(wait_for_low_rate, timeout=30) + + # Record how many spans exist before the ramp-up phase + self._spans_before_ramp = sum(1 for _ in interfaces.library.get_root_spans()) + + # Generate traffic in bursts to trigger multiple flush cycles during ramp-up + # Each burst sends requests, then sleeps to allow the tracer to flush and receive + # the next mocked response, which should trigger a capped rate increase. + request_idx = 100 + for _ in range(10): + for _j in range(20): + weblog.get(f"/sample_rate_route/{request_idx}") + request_idx += 1 + time.sleep(2) + + # Wait for a span with the high rate that appeared AFTER the low-rate phase + def wait_for_high_rate_after_ramp(_data: dict) -> bool: + for idx, (_, span) in enumerate(interfaces.library.get_root_spans()): + if idx < self._spans_before_ramp: + continue + agent_psr = span.get("metrics", {}).get("_dd.agent_psr") + if agent_psr is not None and abs(agent_psr - HIGH_RATE) < 0.01: + return True + return False + + interfaces.library.wait_for(wait_for_high_rate_after_ramp, timeout=40) + + def test_sampling_rate_capped_increase(self): + """Verify that the tracer ramps up sampling rate gradually instead of jumping directly.""" + # Only look at spans from AFTER the low-rate phase to avoid the default 1.0 at startup + agent_psr_values = set() + + for idx, (_, span) in enumerate(interfaces.library.get_root_spans()): + if idx < self._spans_before_ramp: + continue + agent_psr = span.get("metrics", {}).get("_dd.agent_psr") + if agent_psr is not None: + agent_psr_values.add(round(agent_psr, 4)) + + logger.info(f"Observed _dd.agent_psr values (ramp phase): {sorted(agent_psr_values)}") + + assert any(abs(v - LOW_RATE) < 0.01 for v in agent_psr_values), ( + f"Expected to see the low rate ({LOW_RATE}) in _dd.agent_psr values: {sorted(agent_psr_values)}" + ) + assert any(abs(v - HIGH_RATE) < 0.01 for v in agent_psr_values), ( + f"Expected to see the high rate ({HIGH_RATE}) in _dd.agent_psr values: {sorted(agent_psr_values)}" + ) + + # Key assertion: at least one intermediate value strictly between LOW_RATE and HIGH_RATE + intermediate_values = [v for v in agent_psr_values if LOW_RATE + 0.01 < v < HIGH_RATE - 0.01] + assert len(intermediate_values) > 0, ( + f"Expected at least one intermediate _dd.agent_psr value between {LOW_RATE} and {HIGH_RATE}, " + f"but only saw: {sorted(agent_psr_values)}. " + "The tracer should cap sampling rate increases to 2x per interval, not jump directly." + ) diff --git a/utils/_context/_scenarios/__init__.py b/utils/_context/_scenarios/__init__.py index ca1e06a7e9b..048384bf5d1 100644 --- a/utils/_context/_scenarios/__init__.py +++ b/utils/_context/_scenarios/__init__.py @@ -126,6 +126,13 @@ class _Scenarios: scenario_groups=[scenario_groups.sampling], ) + sampling_rate_capping = EndToEndScenario( + "SAMPLING_RATE_CAPPING", + weblog_env={"DD_TRACE_RATE_LIMIT": "10000000", "DD_TRACE_STATS_COMPUTATION_ENABLED": "false"}, + doc="Test that tracers cap sampling rate increases to 2x per interval when agent restarts", + scenario_groups=[scenario_groups.sampling], + ) + trace_propagation_style_w3c = EndToEndScenario( "TRACE_PROPAGATION_STYLE_W3C", weblog_env={ diff --git a/utils/proxy/mocked_response.py b/utils/proxy/mocked_response.py index fbcc56654c3..1a8b83407de 100644 --- a/utils/proxy/mocked_response.py +++ b/utils/proxy/mocked_response.py @@ -218,6 +218,41 @@ def to_json(self) -> dict: } +class SequentialJsonMockedTracerResponse(MockedTracerResponse): + """Overwrites JSON content on requests to a given path with a sequence of predefined responses. + + Uses a global counter (not per-runtime_id). When the sequence is exhausted, + the last element is returned for all subsequent requests. + """ + + def __init__(self, path: str, mocked_json_sequence: list[dict]): + super().__init__(path=path, mocked_headers={"Content-Type": "application/json"}) + self.mocked_json_sequence = mocked_json_sequence + """Sequence of JSON responses to return""" + + self._request_count: int = 0 + """Tracks how many requests have been made""" + + def execute(self, flow: HTTPFlow) -> None: + super().execute(flow) + + idx = min(self._request_count, len(self.mocked_json_sequence) - 1) + response = self.mocked_json_sequence[idx] + + flow.response.content = json.dumps(response).encode() + flow.response.headers["st-proxy-sequential-response-idx"] = str(idx) + + if self._request_count < len(self.mocked_json_sequence) - 1: + self._request_count += 1 + + def to_json(self) -> dict: + return { + "type": self.__class__.__name__, + "path": self.path, + "mocked_json_sequence": self.mocked_json_sequence, + } + + class _InternalMockedTracerResponse(MockedTracerResponse): """Tracer mocked responses that will be applied on the entire test session.