support float sampling values

quinnmil · quinnmil · commit b813f6f9c5b6 · 2025-11-24T11:41:12.000-08:00
switch to used random.random in sampler and tests for float support
add coerce rate to float helper method
diff --git a/src/scout_apm/core/config.py b/src/scout_apm/core/config.py
@@ -250,7 +250,7 @@ def __init__(self):
             "monitor": False,
             "name": "Python App",
             "revision_sha": self._git_revision_sha(),
-            "sample_rate": 100,
+            "sample_rate": 1,
             "sample_endpoints": [],
             "endpoint_sample_rate": None,
             "sample_jobs": [],
@@ -307,28 +307,54 @@ def convert_to_float(value: Any) -> float:
         return 0.0
 
 
-def convert_sample_rate(value: Any) -> Optional[int]:
+def _coerce_rate_to_float(value: Any, context: str = "") -> float:
     """
-    Converts sample rate to integer, ensuring it's between 0 and 100.
+    Helper to convert a rate value to float between 0 and 1.
+    For backwards compatibility, values > 1 are treated as percentages
+    and converted to decimals (e.g., 80 -> 0.80).
+
+    Args:
+        value: The value to convert
+        context: Optional context string for better error messages
+            (e.g., "endpoint /users")
+
+    Returns:
+        Float between 0.0 and 1.0
+
+    Raises:
+        ValueError: If value cannot be converted to float
+    """
+    rate = float(value)
+    # Anything above 1 is assumed a percentage for backwards compat
+    if rate > 1:
+        rate = rate / 100
+    # Clamp between 0 and 1
+    if rate < 0 or rate > 1:
+        context_str = f"For {context}, you" if context else "You"
+        logger.warning(
+            f"Sample rates must be between 0 and 1. {context_str} passed in {value}, "
+            f"which we interpreted as {rate}. Clamping."
+        )
+        rate = max(0.0, min(1.0, rate))
+    return rate
+
+
+def convert_sample_rate(value: Any) -> Optional[float]:
+    """
+    Converts sample rate to float, ensuring it's between 0 and 1.
+    For backwards compatibility, values > 1 are treated as percentages
+    and converted to decimals (e.g., 80 -> 0.80).
     Allows None as a valid value.
     """
     if value is None:
         return None
     try:
-        rate = int(value)
-        if not (0 <= rate <= 100):
-            logger.warning(
-                f"Invalid sample rate {rate}. Must be between 0 and 100. "
-                "Defaulting to 100."
-            )
-            return 100
-        return rate
+        return _coerce_rate_to_float(value)
     except (TypeError, ValueError):
         logger.warning(
-            f"Invalid sample rate {value}. Must be a number between 0 and 100. "
-            "Defaulting to 100."
+            f"Invalid sample rate {value}. Must be a number. Defaulting to 1.0."
         )
-        return 100
+        return 1.0
 
 
 def convert_to_list(value: Any) -> List[Any]:
@@ -351,30 +377,36 @@ def convert_ignore_paths(value: Any) -> List[str]:
     return [_strip_leading_slash(path) for path in raw_paths]
 
 
-def convert_endpoint_sampling(value: Union[str, Dict[str, Any]]) -> Dict[str, int]:
+def convert_endpoint_sampling(value: Union[str, Dict[str, Any]]) -> Dict[str, float]:
     """
     Converts endpoint sampling configuration from string or dict format
     to a normalized dict.
-    Example: '/endpoint:40,/test:0' -> {'/endpoint': 40, '/test': 0}
+    For backwards compatibility, values > 1 are treated as percentages
+    and converted to decimals (e.g., 80 -> 0.80).
+    Example: '/endpoint:40,/test:0' -> {'/endpoint': 0.40, '/test': 0.0}
     """
     if isinstance(value, dict):
-        return {_strip_leading_slash(k): int(v) for k, v in value.items()}
+        result = {}
+        for k, v in value.items():
+            try:
+                result[_strip_leading_slash(k)] = _coerce_rate_to_float(
+                    v, context=f"endpoint {k}"
+                )
+            except (TypeError, ValueError):
+                logger.warning(f"Invalid sampling rate for endpoint {k}: {v}")
+                continue
+        return result
     if isinstance(value, str):
         if not value.strip():
             return {}
         result = {}
         pairs = [pair.strip() for pair in value.split(",")]
         for pair in pairs:
             try:
-                endpoint, rate = pair.split(":")
-                rate_int = int(rate)
-                if not (0 <= rate_int <= 100):
-                    logger.warning(
-                        f"Invalid sampling rate {rate} for endpoint {endpoint}. "
-                        "Must be between 0 and 100."
-                    )
-                    continue
-                result[_strip_leading_slash(endpoint)] = rate_int
+                endpoint, rate_str = pair.split(":")
+                result[_strip_leading_slash(endpoint)] = _coerce_rate_to_float(
+                    rate_str, context=f"endpoint {endpoint}"
+                )
             except ValueError:
                 logger.warning(f"Invalid sampling configuration: {pair}")
                 continue
diff --git a/src/scout_apm/core/sampler.py b/src/scout_apm/core/sampler.py
@@ -44,7 +44,7 @@ def _any_sampling(self):
             Boolean indicating if any sampling is enabled
         """
         return (
-            self.sample_rate < 100
+            self.sample_rate < 1
             or self.sample_endpoints
             or self.sample_jobs
             or self.ignore_endpoints
@@ -92,7 +92,7 @@ def _get_operation_type_and_name(
         else:
             return None, None
 
-    def get_effective_sample_rate(self, operation: str, is_ignored: bool) -> int:
+    def get_effective_sample_rate(self, operation: str, is_ignored: bool) -> float:
         """
         Determines the effective sample rate for a given operation.
 
@@ -107,7 +107,7 @@ def get_effective_sample_rate(self, operation: str, is_ignored: bool) -> int:
             is_ignored: boolean for if the specific transaction is ignored
 
         Returns:
-            Integer between 0 and 100 representing sample rate
+            Float between 0 and 1 representing sample rate
         """
         op_type, name = self._get_operation_type_and_name(operation)
         patterns = self.sample_endpoints if op_type == "endpoint" else self.sample_jobs
@@ -144,6 +144,4 @@ def should_sample(self, operation: str, is_ignored: bool) -> bool:
         """
         if not self._any_sampling():
             return True
-        return random.randint(1, 100) <= self.get_effective_sample_rate(
-            operation, is_ignored
-        )
+        return random.random() <= self.get_effective_sample_rate(operation, is_ignored)
diff --git a/tests/unit/core/test_config.py b/tests/unit/core/test_config.py
@@ -246,13 +246,30 @@ def test_sample_rate_conversion_from_env():
     config = ScoutConfig()
     with mock.patch.dict(os.environ, {"SCOUT_SAMPLE_RATE": "50"}):
         value = config.value("sample_rate")
-    assert isinstance(value, int)
-    assert value == 50
+    assert isinstance(value, float)
+    assert value == 0.50  # 50 is converted to 0.50 for backwards compatibility
 
 
 @pytest.mark.parametrize(
     "original, converted",
-    [("0", 0), ("50", 50), ("100", 100), ("x", 100)],
+    [
+        # Float values (0-1 range)
+        ("0", 0.0),
+        ("0.5", 0.5),
+        ("1", 1.0),
+        ("0.001", 0.001),
+        # Integer percentages (> 1, backwards compatibility)
+        ("50", 0.50),
+        ("100", 1.0),
+        ("1", 1.0),
+        ("1.5", 0.015),  # 1.5% -> 0.015
+        # Edge cases
+        ("x", 1.0),  # Invalid defaults to 1.0
+        (None, None),  # None is preserved
+        # Clamping
+        ("-2.5", 0.0),  # Negative values clamped to 0
+        ("150", 1.0),  # > 100% clamped to 1.0
+    ],
 )
 def test_sample_rate_conversion_from_python(original, converted):
     ScoutConfig.set(sample_rate=original)
@@ -270,14 +287,21 @@ def test_endpoint_sampling_conversion_from_env():
     ):
         value = config.value("sample_endpoints")
     assert isinstance(value, dict)
-    assert value == {"endpoint": 40, "test": 0}
+    assert value == {"endpoint": 0.40, "test": 0.0}  # Converted to floats
 
 
 @pytest.mark.parametrize(
     "original, converted",
     [
-        ("/endpoint:40,/test:0", {"endpoint": 40, "test": 0}),
-        ({"endpoint": 40, "test": 0}, {"endpoint": 40, "test": 0}),
+        # String format with percentages (backwards compat)
+        ("/endpoint:40,/test:0", {"endpoint": 0.40, "test": 0.0}),
+        # Dict format with percentages (backwards compat)
+        ({"endpoint": 40, "test": 0}, {"endpoint": 0.40, "test": 0.0}),
+        # Dict format with floats
+        ({"endpoint": 0.40, "test": 0.0}, {"endpoint": 0.40, "test": 0.0}),
+        # String format with floats
+        ("/endpoint:0.5,/test:0.001", {"endpoint": 0.5, "test": 0.001}),
+        # Empty
         ("", {}),
         (object(), {}),
     ],
@@ -296,14 +320,21 @@ def test_job_sampling_conversion_from_env():
     with mock.patch.dict(os.environ, {"SCOUT_SAMPLE_JOBS": "job1:30,job2:70"}):
         value = config.value("sample_jobs")
     assert isinstance(value, dict)
-    assert value == {"job1": 30, "job2": 70}
+    assert value == {"job1": 0.30, "job2": 0.70}  # Converted to floats
 
 
 @pytest.mark.parametrize(
     "original, converted",
     [
-        ("job1:30,job2:70", {"job1": 30, "job2": 70}),
-        ({"job1": 30, "job2": 70}, {"job1": 30, "job2": 70}),
+        # String format with percentages (backwards compat)
+        ("job1:30,job2:70", {"job1": 0.30, "job2": 0.70}),
+        # Dict format with percentages (backwards compat)
+        ({"job1": 30, "job2": 70}, {"job1": 0.30, "job2": 0.70}),
+        # Dict format with floats
+        ({"job1": 0.30, "job2": 0.70}, {"job1": 0.30, "job2": 0.70}),
+        # String format with floats
+        ("job1:0.5,job2:0.001", {"job1": 0.5, "job2": 0.001}),
+        # Empty
         ("", {}),
         (object(), {}),
     ],
@@ -315,3 +346,44 @@ def test_job_sampling_conversion_from_python(original, converted):
         assert config.value("sample_jobs") == converted
     finally:
         ScoutConfig.reset_all()
+
+
+# Additional tests for nullable sample rates
+@pytest.mark.parametrize(
+    "original, converted",
+    [
+        (None, None),
+        ("0", 0.0),
+        ("0.5", 0.5),
+        ("50", 0.50),
+        ("100", 1.0),
+    ],
+)
+def test_endpoint_sample_rate_nullable(original, converted):
+    """Test that endpoint_sample_rate allows None and converts other values."""
+    ScoutConfig.set(endpoint_sample_rate=original)
+    config = ScoutConfig()
+    try:
+        assert config.value("endpoint_sample_rate") == converted
+    finally:
+        ScoutConfig.reset_all()
+
+
+@pytest.mark.parametrize(
+    "original, converted",
+    [
+        (None, None),
+        ("0", 0.0),
+        ("0.5", 0.5),
+        ("50", 0.50),
+        ("100", 1.0),
+    ],
+)
+def test_job_sample_rate_nullable(original, converted):
+    """Test that job_sample_rate allows None and converts other values."""
+    ScoutConfig.set(job_sample_rate=original)
+    config = ScoutConfig()
+    try:
+        assert config.value("job_sample_rate") == converted
+    finally:
+        ScoutConfig.reset_all()
diff --git a/tests/unit/core/test_sampler.py b/tests/unit/core/test_sampler.py