Skip to content

Commit e9c3c2f

Browse files
Merge pull request #142 from aidenerdogan/feat/improve-rate-limiting
feat: improve rate limiting with conservative thresholds and exponent…
2 parents bfd7b6b + c990475 commit e9c3c2f

File tree

1 file changed

+62
-8
lines changed

1 file changed

+62
-8
lines changed

datafast/llms.py

Lines changed: 62 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
# LiteLLM
1919
import litellm
20+
from litellm.exceptions import RateLimitError
2021
from litellm.utils import ModelResponse
2122

2223
# Internal imports
@@ -116,15 +117,27 @@ def _respect_rate_limit(self) -> None:
116117
# Keep only timestamps within the last minute
117118
self._request_timestamps = [
118119
ts for ts in self._request_timestamps if current - ts < 60]
119-
if len(self._request_timestamps) < self.rpm_limit:
120+
121+
# Be more conservative - wait if we're at 90% of the limit
122+
conservative_limit = max(1, int(self.rpm_limit * 0.9))
123+
124+
if len(self._request_timestamps) < conservative_limit:
120125
return
126+
121127
# Need to wait until the earliest request is outside the 60-second window
122128
earliest = self._request_timestamps[0]
123-
# Add a 1s margin to avoid accidental rate limit exceedance
124-
sleep_time = 61 - (current - earliest)
129+
# Add a 2s margin to avoid accidental rate limit exceedance
130+
sleep_time = 62 - (current - earliest)
125131
if sleep_time > 0:
126-
logger.warning(f"Rate limit reached | Waiting {sleep_time:.1f}s")
132+
logger.warning(
133+
f"Rate limit approaching | Requests: {len(self._request_timestamps)}/{self.rpm_limit} | "
134+
f"Waiting {sleep_time:.1f}s"
135+
)
127136
time.sleep(sleep_time)
137+
# Clean up old timestamps after waiting
138+
current = time.monotonic()
139+
self._request_timestamps = [
140+
ts for ts in self._request_timestamps if current - ts < 60]
128141

129142
@staticmethod
130143
def _strip_code_fences(content: str) -> str:
@@ -258,9 +271,33 @@ def generate(
258271
if response_format is not None:
259272
completion_params["response_format"] = response_format
260273

261-
# Call LiteLLM completion with batch messages
262-
response: list[ModelResponse] = litellm.batch_completion(
263-
**completion_params)
274+
# Call LiteLLM completion with batch messages - retry on rate limit
275+
max_retries = 3
276+
retry_delay = 5 # Start with 5 seconds
277+
response = None
278+
279+
for attempt in range(max_retries):
280+
try:
281+
response: list[ModelResponse] = litellm.batch_completion(
282+
**completion_params)
283+
break # Success, exit retry loop
284+
except RateLimitError as e:
285+
if attempt < max_retries - 1:
286+
wait_time = retry_delay * (2 ** attempt) # Exponential backoff
287+
logger.warning(
288+
f"Rate limit hit | Provider: {self.provider_name} | Model: {self.model_id} | "
289+
f"Attempt {attempt + 1}/{max_retries} | Waiting {wait_time}s before retry"
290+
)
291+
time.sleep(wait_time)
292+
else:
293+
logger.error(
294+
f"Rate limit exceeded after {max_retries} attempts | "
295+
f"Provider: {self.provider_name} | Model: {self.model_id}"
296+
)
297+
raise
298+
299+
if response is None:
300+
raise RuntimeError("Failed to get response after retries")
264301

265302
# Record timestamp for rate limiting (one timestamp per batch item)
266303
if self.rpm_limit is not None:
@@ -270,7 +307,24 @@ def generate(
270307

271308
# Extract content from each response
272309
results = []
273-
for one_response in response:
310+
for idx, one_response in enumerate(response):
311+
if isinstance(one_response, Exception):
312+
if isinstance(one_response, RateLimitError):
313+
logger.warning(
314+
"Rate limit error in batch item | Provider: %s | Model: %s | Item: %d",
315+
self.provider_name,
316+
self.model_id,
317+
idx,
318+
)
319+
raise RuntimeError(
320+
f"Batch item {idx} failed during generation: {one_response}"
321+
) from one_response
322+
323+
if not getattr(one_response, "choices", None):
324+
raise RuntimeError(
325+
f"Unexpected response type from LiteLLM batch completion at item {idx}: {type(one_response).__name__}"
326+
)
327+
274328
content = one_response.choices[0].message.content
275329

276330
if response_format is not None:

0 commit comments

Comments
 (0)