From 5cb71bc0b8af10a4d86501bd00294ac76686176f Mon Sep 17 00:00:00 2001 From: Sihyeon Jang Date: Wed, 18 Mar 2026 16:31:53 +0900 Subject: [PATCH] Fix LiteLLM concurrent_requests exceeding 100 being capped by httpx default pool limit Configure litellm's global HTTP client session with connection pool limits matching the user-specified concurrent_requests value, bypassing the default httpx max_connections=100 cap. Fixes #1100 Signed-off-by: Sihyeon Jang --- src/lighteval/models/endpoints/litellm_model.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/lighteval/models/endpoints/litellm_model.py b/src/lighteval/models/endpoints/litellm_model.py index 87332d1d7..619e3a46d 100644 --- a/src/lighteval/models/endpoints/litellm_model.py +++ b/src/lighteval/models/endpoints/litellm_model.py @@ -25,6 +25,7 @@ from concurrent.futures import ThreadPoolExecutor from json import JSONDecodeError +import httpx import requests from tqdm import tqdm @@ -158,6 +159,16 @@ def __init__(self, config: LiteLLMModelConfig) -> None: self.pairwise_tokenization = False litellm.drop_params = True litellm.verbose = config.verbose + + # Configure litellm's global HTTP client to match concurrent_requests, + # avoiding the default httpx connection pool limit of 100. + litellm.client_session = httpx.Client( + limits=httpx.Limits( + max_connections=config.concurrent_requests, + max_keepalive_connections=config.concurrent_requests, + ), + timeout=httpx.Timeout(config.timeout) if config.timeout else httpx.Timeout(None), + ) self.prompt_manager = PromptManager( use_chat_template=True, tokenizer=self.tokenizer, system_prompt=config.system_prompt )