1919
2020from __future__ import annotations
2121
22+ from collections .abc import AsyncIterator
23+ from typing import TYPE_CHECKING , Any
24+
2225import structlog
2326from tenacity import (
27+ RetryError ,
2428 retry ,
2529 retry_if_exception_type ,
2630 stop_after_attempt ,
2731 wait_exponential_jitter ,
28- RetryError ,
2932)
3033
3134from repowise .core .providers .llm .base import (
3740 RateLimitError ,
3841)
3942
40- from typing import TYPE_CHECKING , Any , AsyncIterator
4143from repowise .core .rate_limiter import RateLimiter
4244
4345if TYPE_CHECKING :
@@ -55,9 +57,13 @@ class LiteLLMProvider(BaseProvider):
5557
5658 Args:
5759 model: LiteLLM model string (e.g., "groq/llama-3.1-70b-versatile").
60+ When using api_base (local proxy), just use the model name
61+ (e.g., "zai.glm-5") - the provider will auto-add "openai/" prefix.
5862 api_key: API key for the target provider. Some providers read from
5963 environment variables (e.g., GROQ_API_KEY, TOGETHER_API_KEY).
60- api_base: Optional custom API base URL (e.g., for self-hosted deployments).
64+ For local proxies without auth, a dummy key is used.
65+ api_base: Optional custom API base URL for self-hosted LiteLLM proxy.
66+ When set, the model is treated as OpenAI-compatible.
6167 rate_limiter: Optional RateLimiter instance.
6268 """
6369
@@ -75,6 +81,13 @@ def __init__(
7581 self ._rate_limiter = rate_limiter
7682 self ._cost_tracker = cost_tracker
7783
84+ # When using a custom api_base (proxy), treat model as OpenAI-compatible.
85+ # LiteLLM requires "openai/" prefix to route to custom endpoints.
86+ if api_base and not model .startswith ("openai/" ):
87+ self ._litellm_model = f"openai/{ model } "
88+ else :
89+ self ._litellm_model = model
90+
7891 @property
7992 def provider_name (self ) -> str :
8093 return "litellm"
@@ -130,7 +143,7 @@ async def _generate_with_retry(
130143 litellm .suppress_debug_info = True
131144
132145 call_kwargs : dict [str , object ] = {
133- "model" : self ._model ,
146+ "model" : self ._litellm_model ,
134147 "messages" : [
135148 {"role" : "system" , "content" : system_prompt },
136149 {"role" : "user" , "content" : user_prompt },
@@ -142,6 +155,10 @@ async def _generate_with_retry(
142155 call_kwargs ["api_key" ] = self ._api_key
143156 if self ._api_base :
144157 call_kwargs ["api_base" ] = self ._api_base
158+ # Local proxy without auth: OpenAI SDK still requires a key.
159+ # Use a dummy key if none provided.
160+ if not self ._api_key :
161+ call_kwargs ["api_key" ] = "sk-dummy"
145162
146163 try :
147164 response = await litellm .acompletion (** call_kwargs )
@@ -199,14 +216,15 @@ async def stream_chat(
199216 tool_executor : Any | None = None ,
200217 ) -> AsyncIterator [ChatStreamEvent ]:
201218 import json as _json
219+
202220 import litellm # type: ignore[import-untyped]
203221
204222 litellm .set_verbose = False
205223 litellm .suppress_debug_info = True
206224
207225 full_messages = [{"role" : "system" , "content" : system_prompt }, * messages ]
208226 call_kwargs : dict [str , Any ] = {
209- "model" : self ._model ,
227+ "model" : self ._litellm_model ,
210228 "messages" : full_messages ,
211229 "temperature" : temperature ,
212230 "max_tokens" : max_tokens ,
@@ -218,6 +236,10 @@ async def stream_chat(
218236 call_kwargs ["api_key" ] = self ._api_key
219237 if self ._api_base :
220238 call_kwargs ["api_base" ] = self ._api_base
239+ # Local proxy without auth: OpenAI SDK still requires a key.
240+ # Use a dummy key if none provided.
241+ if not self ._api_key :
242+ call_kwargs ["api_key" ] = "sk-dummy"
221243
222244 try :
223245 stream = await litellm .acompletion (** call_kwargs )
@@ -244,7 +266,11 @@ async def stream_chat(
244266 for tc_delta in delta .tool_calls :
245267 idx = tc_delta .index
246268 if idx not in tool_calls_acc :
247- tool_calls_acc [idx ] = {"id" : getattr (tc_delta , "id" , "" ) or "" , "name" : "" , "arguments" : "" }
269+ tool_calls_acc [idx ] = {
270+ "id" : getattr (tc_delta , "id" , "" ) or "" ,
271+ "name" : "" ,
272+ "arguments" : "" ,
273+ }
248274 acc = tool_calls_acc [idx ]
249275 if getattr (tc_delta , "id" , None ):
250276 acc ["id" ] = tc_delta .id
0 commit comments