Fix rate limiter wait

xverges · xverges · commit 03d4b0d70edb · 2025-11-08T12:35:17.000+01:00
diff --git a/src/pytest_load_testing/token_bucket_rate_limiter.py b/src/pytest_load_testing/token_bucket_rate_limiter.py
@@ -137,24 +137,6 @@ def hourly_rate(self) -> int:
         rate = self._hourly_rate() if callable(self._hourly_rate) else self._hourly_rate
         return rate.calls_per_hour
 
-    def _get_or_initialize_state(self) -> Dict[str, Any]:
-        """Get the current state, initializing if necessary."""
-        with self.shared_state.locked_dict() as state:
-            if not state:
-                current_time = time.time()
-                state.update(
-                    {
-                        "start_time": current_time,
-                        "last_refill_time": current_time,
-                        "tokens": self.burst_capacity,  # Start with full bucket
-                        "call_count": 0,
-                        "exceptions": 0,
-                    }
-                )
-
-            # Return a copy to avoid modifications outside the lock
-            return dict(state)
-
     def _check_rate(self, state: Dict[str, Any]) -> None:
         """Check if the current rate is within acceptable limits."""
         current_time = time.time()
@@ -228,16 +210,20 @@ def _calculate_wait_time_and_update(self) -> float:
             # Update tokens (can't exceed burst capacity)
             tokens = min(state["tokens"] + new_tokens, self.burst_capacity)
 
-            # If we have at least 1 token, we can proceed immediately
+            # Always consume 1 token immediately, even if it makes tokens negative
+            # This ensures proper serialization across multiple threads/processes
+            # Negative tokens represent a "debt" that must be paid back with wait time
+            state["tokens"] = tokens - 1
+            state["last_refill_time"] = current_time
+
+            # If we had at least 1 token, we can proceed immediately
             if tokens >= 1:
-                # Consume 1 token and update the state
-                state["tokens"] = tokens - 1
-                state["last_refill_time"] = current_time
                 return 0
 
-            # Calculate wait time until we have 1 token
-            wait_time = (1 - tokens) / tokens_per_second
-            return max(0, wait_time)
+            # Calculate wait time to pay back the token debt
+            # We need to wait until tokens would have refilled to 0
+            wait_time = abs(state["tokens"]) / tokens_per_second
+            return wait_time
 
     def _increment_call_count_and_check_rate(self) -> Tuple[int, Dict[str, Any]]:
         """
@@ -275,6 +261,13 @@ def _check_max_calls(self, call_count: int) -> None:
     class RateLimitContext:
         """
         Context object yielded by rate_limited_context that provides access to rate limiter metrics.
+
+        Properties:
+            id: Rate limiter identifier
+            hourly_rate: Configured rate limit in calls per hour
+            call_count: Total number of calls made
+            exceptions: Total number of exceptions encountered
+            start_time: Unix timestamp of when the first call was made
         """
 
         _limiter: "TokenBucketRateLimiter"
@@ -296,6 +289,11 @@ def call_count(self) -> int:
         def exceptions(self) -> int:
             return self._state["exceptions"]
 
+        @property
+        def start_time(self) -> float:
+            """Timestamp of when the first call was made (Unix timestamp)."""
+            return self._state["start_time"]
+
     @contextlib.contextmanager
     def rate_limited_context(self) -> Generator[RateLimitContext, Any, None]:
         """
@@ -305,6 +303,7 @@ def rate_limited_context(self) -> Generator[RateLimitContext, Any, None]:
             with rate_limiter.rate_limited_context() as ctx:
                 print(f"Using rate limiter {ctx.id} with rate {ctx.hourly_rate}/hr")
                 print(f"Current call count: {ctx.call_count}")
+                print(f"First call at: {ctx.start_time}")
                 perform_action()
         """
         # Calculate wait time and update tokens atomically
diff --git a/tests/test_token_bucket/test_concurrent_rate_limiting.py b/tests/test_token_bucket/test_concurrent_rate_limiting.py
@@ -0,0 +1,173 @@
+"""
+Tests for concurrent rate limiting behavior.
+
+These tests verify that the token bucket rate limiter properly enforces
+rate limits when multiple threads/processes are competing for tokens.
+"""
+
+import time
+from concurrent.futures import ThreadPoolExecutor
+
+from pytest_load_testing.concurrent_fixtures import SharedJson
+from pytest_load_testing.token_bucket_rate_limiter import RateLimit, TokenBucketRateLimiter
+
+
+def test_concurrent_workers_respect_rate_limit(tmp_path):
+    """
+    Test that multiple concurrent workers properly respect the rate limit.
+
+    This test verifies the fix for the bug where multiple threads could
+    calculate wait times based on the same token state, leading to rate
+    limit violations.
+
+    With the old buggy code, this test would fail because multiple threads
+    would see the same token state and all proceed after the same wait time.
+
+    With the fix, tokens are consumed immediately (even going negative),
+    ensuring proper serialization.
+    """
+    # Create a rate limiter with 1 call per second and burst capacity of 1
+    data_file = tmp_path / "concurrent_test.json"
+    lock_file = tmp_path / "concurrent_test.lock"
+    shared_state = SharedJson(
+        data_file=data_file,
+        lock_file=lock_file,
+    )
+
+    limiter = TokenBucketRateLimiter(
+        shared_state=shared_state,
+        hourly_rate=RateLimit.per_second(1),  # 1 call per second
+        burst_capacity=1,  # No burst allowance
+        max_drift=0.5,
+        num_calls_between_checks=1000,
+        seconds_before_first_check=100.0,
+    )
+
+    # Track execution times
+    execution_times = []
+
+    def make_call():
+        """Make a rate-limited call and record the time."""
+        with limiter.rate_limited_context():
+            execution_times.append(time.time())
+
+    # Run 5 calls concurrently with 2 workers
+    start_time = time.time()
+    with ThreadPoolExecutor(max_workers=2) as executor:
+        futures = [executor.submit(make_call) for _ in range(5)]
+        for future in futures:
+            future.result()
+
+    elapsed = time.time() - start_time
+
+    # With 1 call/second rate limit, 5 calls should take at least 4 seconds
+    # (first call is immediate, then 4 more calls at 1/second)
+    assert elapsed >= 4.0, f"Expected at least 4 seconds for 5 calls at 1/sec rate, but took only {elapsed:.2f}s"
+
+    # Verify calls were properly spaced
+    # Sort execution times
+    execution_times.sort()
+
+    # Check spacing between consecutive calls
+    for i in range(1, len(execution_times)):
+        gap = execution_times[i] - execution_times[i - 1]
+        # Each gap should be at least 0.9 seconds (allowing small timing variance)
+        assert gap >= 0.9, f"Gap between call {i - 1} and {i} was only {gap:.2f}s, expected at least 0.9s"
+
+
+def test_concurrent_workers_with_burst_capacity(tmp_path):
+    """
+    Test that burst capacity allows initial rapid calls, then enforces rate limit.
+    """
+    data_file = tmp_path / "burst_test.json"
+    lock_file = tmp_path / "burst_test.lock"
+    shared_state = SharedJson(
+        data_file=data_file,
+        lock_file=lock_file,
+    )
+
+    limiter = TokenBucketRateLimiter(
+        shared_state=shared_state,
+        hourly_rate=RateLimit.per_second(1),  # 1 call per second
+        burst_capacity=3,  # Allow 3 rapid calls
+        max_drift=0.5,
+        num_calls_between_checks=1000,
+        seconds_before_first_check=100.0,
+    )
+
+    execution_times = []
+
+    def make_call():
+        with limiter.rate_limited_context():
+            execution_times.append(time.time())
+
+    # Run 5 calls concurrently
+    with ThreadPoolExecutor(max_workers=2) as executor:
+        futures = [executor.submit(make_call) for _ in range(5)]
+        for future in futures:
+            future.result()
+
+    execution_times.sort()
+
+    # First 3 calls should be rapid (using burst capacity)
+    first_three_duration = execution_times[2] - execution_times[0]
+    assert first_three_duration < 0.5, f"First 3 calls should be rapid, but took {first_three_duration:.2f}s"
+
+    # Calls 4 and 5 should be rate-limited
+    # They should take at least 1 second each after the burst
+    gap_3_to_4 = execution_times[3] - execution_times[2]
+    gap_4_to_5 = execution_times[4] - execution_times[3]
+
+    assert gap_3_to_4 >= 0.9, f"Gap from call 3 to 4 was only {gap_3_to_4:.2f}s, expected ~1s"
+    assert gap_4_to_5 >= 0.9, f"Gap from call 4 to 5 was only {gap_4_to_5:.2f}s, expected ~1s"
+
+
+def test_negative_tokens_prevent_race_condition(tmp_path):
+    """
+    Test that the fix properly prevents the race condition by allowing negative tokens.
+
+    This test specifically targets the bug where multiple threads could see
+    the same positive token count and all calculate the same wait time.
+    """
+    data_file = tmp_path / "negative_tokens_test.json"
+    lock_file = tmp_path / "negative_tokens_test.lock"
+    shared_state = SharedJson(
+        data_file=data_file,
+        lock_file=lock_file,
+    )
+
+    limiter = TokenBucketRateLimiter(
+        shared_state=shared_state,
+        hourly_rate=RateLimit.per_second(2),  # 2 calls per second
+        burst_capacity=1,  # Only 1 token available initially
+        max_drift=0.5,
+        num_calls_between_checks=1000,
+        seconds_before_first_check=100.0,
+    )
+
+    call_count = [0]
+
+    def make_call():
+        with limiter.rate_limited_context():
+            call_count[0] += 1
+
+    # Launch 4 calls simultaneously
+    start_time = time.time()
+    with ThreadPoolExecutor(max_workers=4) as executor:
+        futures = [executor.submit(make_call) for _ in range(4)]
+        for future in futures:
+            future.result()
+
+    elapsed = time.time() - start_time
+
+    # With 2 calls/second and 4 calls:
+    # - Call 1: immediate (uses burst token)
+    # - Call 2: waits 0.5s (token debt of -1)
+    # - Call 3: waits 1.0s (token debt of -2)
+    # - Call 4: waits 1.5s (token debt of -3)
+    # Total time should be at least 1.5 seconds
+    assert elapsed >= 1.4, (
+        f"Expected at least 1.4 seconds for 4 calls at 2/sec rate with burst=1, but took only {elapsed:.2f}s"
+    )
+
+    assert call_count[0] == 4, f"Expected 4 calls, got {call_count[0]}"