Skip to content

Commit 7a133e2

Browse files
committed
feat: make LLM backoff configurable end-to-end
- extend LLMConfig with backoff delay/attempt/factor fields and thread them through LLMExtractionStrategy, LLMContentFilter, table extraction, and Docker API handlers - expose the backoff parameter knobs on perform_completion_with_backoff/aperform_completion_with_backoff and document them in the md_v2 guides
1 parent b36c6da commit 7a133e2

File tree

9 files changed

+85
-16
lines changed

9 files changed

+85
-16
lines changed

crawl4ai/async_configs.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1792,7 +1792,10 @@ def __init__(
17921792
frequency_penalty: Optional[float] = None,
17931793
presence_penalty: Optional[float] = None,
17941794
stop: Optional[List[str]] = None,
1795-
n: Optional[int] = None,
1795+
n: Optional[int] = None,
1796+
backoff_base_delay: Optional[int] = None,
1797+
backoff_max_attempts: Optional[int] = None,
1798+
backoff_exponential_factor: Optional[int] = None,
17961799
):
17971800
"""Configuaration class for LLM provider and API token."""
17981801
self.provider = provider
@@ -1821,6 +1824,9 @@ def __init__(
18211824
self.presence_penalty = presence_penalty
18221825
self.stop = stop
18231826
self.n = n
1827+
self.backoff_base_delay = backoff_base_delay if backoff_base_delay is not None else 2
1828+
self.backoff_max_attempts = backoff_max_attempts if backoff_max_attempts is not None else 3
1829+
self.backoff_exponential_factor = backoff_exponential_factor if backoff_exponential_factor is not None else 2
18241830

18251831
@staticmethod
18261832
def from_kwargs(kwargs: dict) -> "LLMConfig":
@@ -1834,7 +1840,10 @@ def from_kwargs(kwargs: dict) -> "LLMConfig":
18341840
frequency_penalty=kwargs.get("frequency_penalty"),
18351841
presence_penalty=kwargs.get("presence_penalty"),
18361842
stop=kwargs.get("stop"),
1837-
n=kwargs.get("n")
1843+
n=kwargs.get("n"),
1844+
backoff_base_delay=kwargs.get("backoff_base_delay"),
1845+
backoff_max_attempts=kwargs.get("backoff_max_attempts"),
1846+
backoff_exponential_factor=kwargs.get("backoff_exponential_factor")
18381847
)
18391848

18401849
def to_dict(self):
@@ -1848,7 +1857,10 @@ def to_dict(self):
18481857
"frequency_penalty": self.frequency_penalty,
18491858
"presence_penalty": self.presence_penalty,
18501859
"stop": self.stop,
1851-
"n": self.n
1860+
"n": self.n,
1861+
"backoff_base_delay": self.backoff_base_delay,
1862+
"backoff_max_attempts": self.backoff_max_attempts,
1863+
"backoff_exponential_factor": self.backoff_exponential_factor
18521864
}
18531865

18541866
def clone(self, **kwargs):

crawl4ai/content_filter_strategy.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -980,6 +980,9 @@ def _proceed_with_chunk(
980980
prompt,
981981
api_token,
982982
base_url=base_url,
983+
base_delay=self.llm_config.backoff_base_delay,
984+
max_attempts=self.llm_config.backoff_max_attempts,
985+
exponential_factor=self.llm_config.backoff_exponential_factor,
983986
extra_args=extra_args,
984987
)
985988

crawl4ai/extraction_strategy.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -649,6 +649,9 @@ def extract(self, url: str, ix: int, html: str) -> List[Dict[str, Any]]:
649649
base_url=self.llm_config.base_url,
650650
json_response=self.force_json_response,
651651
extra_args=self.extra_args,
652+
base_delay=self.llm_config.backoff_base_delay,
653+
max_attempts=self.llm_config.backoff_max_attempts,
654+
exponential_factor=self.llm_config.backoff_exponential_factor
652655
) # , json_response=self.extract_type == "schema")
653656
# Track usage
654657
usage = TokenUsage(
@@ -846,6 +849,9 @@ async def aextract(self, url: str, ix: int, html: str) -> List[Dict[str, Any]]:
846849
base_url=self.llm_config.base_url,
847850
json_response=self.force_json_response,
848851
extra_args=self.extra_args,
852+
base_delay=self.llm_config.backoff_base_delay,
853+
max_attempts=self.llm_config.backoff_max_attempts,
854+
exponential_factor=self.llm_config.backoff_exponential_factor
849855
)
850856
# Track usage
851857
usage = TokenUsage(

crawl4ai/table_extraction.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -795,6 +795,9 @@ def extract_tables(self, element: etree.Element, **kwargs) -> List[Dict[str, Any
795795
api_token=self.llm_config.api_token,
796796
base_url=self.llm_config.base_url,
797797
json_response=True,
798+
base_delay=self.llm_config.backoff_base_delay,
799+
max_attempts=self.llm_config.backoff_max_attempts,
800+
exponential_factor=self.llm_config.backoff_exponential_factor,
798801
extra_args=self.extra_args
799802
)
800803

@@ -1116,6 +1119,9 @@ def _process_chunk(self, chunk_html: str, chunk_index: int, total_chunks: int, h
11161119
api_token=self.llm_config.api_token,
11171120
base_url=self.llm_config.base_url,
11181121
json_response=True,
1122+
base_delay=self.llm_config.backoff_base_delay,
1123+
max_attempts=self.llm_config.backoff_max_attempts,
1124+
exponential_factor=self.llm_config.backoff_exponential_factor,
11191125
extra_args=self.extra_args
11201126
)
11211127

crawl4ai/utils.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1745,6 +1745,9 @@ def perform_completion_with_backoff(
17451745
api_token,
17461746
json_response=False,
17471747
base_url=None,
1748+
base_delay=2,
1749+
max_attempts=3,
1750+
exponential_factor=2,
17481751
**kwargs,
17491752
):
17501753
"""
@@ -1761,6 +1764,9 @@ def perform_completion_with_backoff(
17611764
api_token (str): The API token for authentication.
17621765
json_response (bool): Whether to request a JSON response. Defaults to False.
17631766
base_url (Optional[str]): The base URL for the API. Defaults to None.
1767+
base_delay (int): The base delay in seconds. Defaults to 2.
1768+
max_attempts (int): The maximum number of attempts. Defaults to 3.
1769+
exponential_factor (int): The exponential factor. Defaults to 2.
17641770
**kwargs: Additional arguments for the API request.
17651771
17661772
Returns:
@@ -1770,9 +1776,6 @@ def perform_completion_with_backoff(
17701776
from litellm import completion
17711777
from litellm.exceptions import RateLimitError
17721778

1773-
max_attempts = 3
1774-
base_delay = 2 # Base delay in seconds, you can adjust this based on your needs
1775-
17761779
extra_args = {"temperature": 0.01, "api_key": api_token, "base_url": base_url}
17771780
if json_response:
17781781
extra_args["response_format"] = {"type": "json_object"}
@@ -1798,7 +1801,7 @@ def perform_completion_with_backoff(
17981801
# Check if we have exhausted our max attempts
17991802
if attempt < max_attempts - 1:
18001803
# Calculate the delay and wait
1801-
delay = base_delay * (2**attempt) # Exponential backoff formula
1804+
delay = base_delay * (exponential_factor**attempt) # Exponential backoff formula
18021805
print(f"Waiting for {delay} seconds before retrying...")
18031806
time.sleep(delay)
18041807
else:
@@ -1831,6 +1834,9 @@ async def aperform_completion_with_backoff(
18311834
api_token,
18321835
json_response=False,
18331836
base_url=None,
1837+
base_delay=2,
1838+
max_attempts=3,
1839+
exponential_factor=2,
18341840
**kwargs,
18351841
):
18361842
"""
@@ -1847,6 +1853,9 @@ async def aperform_completion_with_backoff(
18471853
api_token (str): The API token for authentication.
18481854
json_response (bool): Whether to request a JSON response. Defaults to False.
18491855
base_url (Optional[str]): The base URL for the API. Defaults to None.
1856+
base_delay (int): The base delay in seconds. Defaults to 2.
1857+
max_attempts (int): The maximum number of attempts. Defaults to 3.
1858+
exponential_factor (int): The exponential factor. Defaults to 2.
18501859
**kwargs: Additional arguments for the API request.
18511860
18521861
Returns:
@@ -1857,9 +1866,6 @@ async def aperform_completion_with_backoff(
18571866
from litellm.exceptions import RateLimitError
18581867
import asyncio
18591868

1860-
max_attempts = 3
1861-
base_delay = 2 # Base delay in seconds, you can adjust this based on your needs
1862-
18631869
extra_args = {"temperature": 0.01, "api_key": api_token, "base_url": base_url}
18641870
if json_response:
18651871
extra_args["response_format"] = {"type": "json_object"}
@@ -1885,7 +1891,7 @@ async def aperform_completion_with_backoff(
18851891
# Check if we have exhausted our max attempts
18861892
if attempt < max_attempts - 1:
18871893
# Calculate the delay and wait
1888-
delay = base_delay * (2**attempt) # Exponential backoff formula
1894+
delay = base_delay * (exponential_factor**attempt) # Exponential backoff formula
18891895
print(f"Waiting for {delay} seconds before retrying...")
18901896
await asyncio.sleep(delay)
18911897
else:

deploy/docker/api.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,10 @@ async def handle_llm_qa(
108108
prompt_with_variables=prompt,
109109
api_token=get_llm_api_key(config), # Returns None to let litellm handle it
110110
temperature=get_llm_temperature(config),
111-
base_url=get_llm_base_url(config)
111+
base_url=get_llm_base_url(config),
112+
base_delay=config["llm"].get("backoff_base_delay", 2),
113+
max_attempts=config["llm"].get("backoff_max_attempts", 3),
114+
exponential_factor=config["llm"].get("backoff_exponential_factor", 2)
112115
)
113116

114117
return response.choices[0].message.content

docs/md_v2/api/parameters.md

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -439,10 +439,19 @@ LLMConfig is useful to pass LLM provider config to strategies and functions that
439439
| **`provider`** | `"ollama/llama3","groq/llama3-70b-8192","groq/llama3-8b-8192", "openai/gpt-4o-mini" ,"openai/gpt-4o","openai/o1-mini","openai/o1-preview","openai/o3-mini","openai/o3-mini-high","anthropic/claude-3-haiku-20240307","anthropic/claude-3-opus-20240229","anthropic/claude-3-sonnet-20240229","anthropic/claude-3-5-sonnet-20240620","gemini/gemini-pro","gemini/gemini-1.5-pro","gemini/gemini-2.0-flash","gemini/gemini-2.0-flash-exp","gemini/gemini-2.0-flash-lite-preview-02-05","deepseek/deepseek-chat"`<br/>*(default: `"openai/gpt-4o-mini"`)* | Which LLM provider to use.
440440
| **`api_token`** |1.Optional. When not provided explicitly, api_token will be read from environment variables based on provider. For example: If a gemini model is passed as provider then,`"GEMINI_API_KEY"` will be read from environment variables <br/> 2. API token of LLM provider <br/> eg: `api_token = "gsk_1ClHGGJ7Lpn4WGybR7vNWGdyb3FY7zXEw3SCiy0BAVM9lL8CQv"` <br/> 3. Environment variable - use with prefix "env:" <br/> eg:`api_token = "env: GROQ_API_KEY"` | API token to use for the given provider
441441
| **`base_url`** |Optional. Custom API endpoint | If your provider has a custom endpoint
442+
| **`backoff_base_delay`** |Optional. `int` *(default: `2`)* | Seconds to wait before the first retry when the provider throttles a request.
443+
| **`backoff_max_attempts`** |Optional. `int` *(default: `3`)* | Total tries (initial call + retries) before surfacing an error.
444+
| **`backoff_exponential_factor`** |Optional. `int` *(default: `2`)* | Multiplier that increases the wait time for each retry (`delay = base_delay * factor^attempt`).
442445

443446
## 3.2 Example Usage
444447
```python
445-
llm_config = LLMConfig(provider="openai/gpt-4o-mini", api_token=os.getenv("OPENAI_API_KEY"))
448+
llm_config = LLMConfig(
449+
provider="openai/gpt-4o-mini",
450+
api_token=os.getenv("OPENAI_API_KEY"),
451+
backoff_base_delay=1, # optional
452+
backoff_max_attempts=5, # optional
453+
backoff_exponential_factor=3, # optional
454+
)
446455
```
447456

448457
## 4. Putting It All Together

docs/md_v2/complete-sdk-reference.md

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1593,8 +1593,20 @@ The `clone()` method:
15931593
- Environment variable - use with prefix "env:" <br/> eg:`api_token = "env: GROQ_API_KEY"`
15941594
3. **`base_url`**:
15951595
- If your provider has a custom endpoint
1596-
```python
1597-
llm_config = LLMConfig(provider="openai/gpt-4o-mini", api_token=os.getenv("OPENAI_API_KEY"))
1596+
1597+
4. **Backoff controls** *(optional)*:
1598+
- `backoff_base_delay` *(default `2` seconds)* – how long to pause before the first retry if the provider rate-limits you.
1599+
- `backoff_max_attempts` *(default `3`)* – total tries for the same prompt (initial call + retries).
1600+
- `backoff_exponential_factor` *(default `2`)* – how quickly the pause grows between retries. A factor of 2 yields waits like 2s4s8s.
1601+
- Because these plug into Crawl4AI’s retry helper, every LLM strategy automatically follows the pacing you define here.
1602+
```python
1603+
llm_config = LLMConfig(
1604+
provider="openai/gpt-4o-mini",
1605+
api_token=os.getenv("OPENAI_API_KEY"),
1606+
backoff_base_delay=1, # optional
1607+
backoff_max_attempts=5, # optional
1608+
backoff_exponential_factor=3, # optional
1609+
)
15981610
```
15991611
## 4. Putting It All Together
16001612
In a typical scenario, you define **one** `BrowserConfig` for your crawler session, then create **one or more** `CrawlerRunConfig` & `LLMConfig` depending on each call's needs:

docs/md_v2/core/browser-crawler-config.md

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,8 +308,20 @@ The `clone()` method:
308308
3.⠀**`base_url`**:
309309
- If your provider has a custom endpoint
310310

311+
4.⠀**Retry/backoff controls** *(optional)*:
312+
- `backoff_base_delay` *(default `2` seconds)* – base delay inserted before the first retry when the provider returns a rate-limit response.
313+
- `backoff_max_attempts` *(default `3`)* – total number of attempts (initial call plus retries) before the request is surfaced as an error.
314+
- `backoff_exponential_factor` *(default `2`)* – growth rate for the retry delay (`delay = base_delay * factor^attempt`).
315+
- These values are forwarded to the shared `perform_completion_with_backoff` helper, ensuring every strategy that consumes your `LLMConfig` honors the same throttling policy.
316+
311317
```python
312-
llm_config = LLMConfig(provider="openai/gpt-4o-mini", api_token=os.getenv("OPENAI_API_KEY"))
318+
llm_config = LLMConfig(
319+
provider="openai/gpt-4o-mini",
320+
api_token=os.getenv("OPENAI_API_KEY"),
321+
backoff_base_delay=1, # optional
322+
backoff_max_attempts=5, # optional
323+
backoff_exponential_factor=3, #optional
324+
)
313325
```
314326

315327
## 4. Putting It All Together

0 commit comments

Comments
 (0)