77import inspect
88from typing import Any , Callable , Literal , Union , get_args , get_origin
99
10- __version__ = "0.2.1 "
10+ __version__ = "0.2.2 "
1111
1212import requests
1313from packaging import version
@@ -132,9 +132,14 @@ def function_to_openai_tool(func: Callable) -> dict[str, Any]:
132132 }
133133 }
134134
135- # Utility for multiple functions, code by Kimi k2 thinking
136- def functions_to_tools (funcs : list [Callable ]) -> list [dict [str , Any ]]:
137- return [utils .function_to_openai_tool (f ) for f in funcs ]
135+ @staticmethod
136+ def generate_clients (api_key_env :str , base_url :str , ** kwargs ):
137+ def client_builder (api_key :str = None ):
138+ api_key = os .environ .get (api_key_env ) if api_key_env else api_key
139+
140+ return openai .OpenAI (api_key = api_key , base_url = base_url , default_headers = kwargs .get ("default_headers" ))
141+
142+ return client_builder
138143
139144
140145class clients :
@@ -149,68 +154,25 @@ def generic(api_key:str, base_url:str) -> openai.OpenAI:
149154 Use `clients.generic_request` for call
150155 """
151156 return openai .OpenAI (api_key = api_key , base_url = base_url )
152-
153- @staticmethod
154- def veniceai (api_key : str = None ) -> openai .OpenAI :
155- """
156- Use `clients.veniceai_request` for call
157- """
158- if os .environ .get ('VENICE_API_KEY' ) :
159- api_key = os .environ .get ('VENICE_API_KEY' )
160- return openai .OpenAI (api_key = api_key , base_url = "https://api.venice.ai/api/v1" )
161157
162- @staticmethod
163- def deepseek (api_key : str = None ) -> openai .OpenAI :
164- """
165- Use `clients.generic_request` for call
166- """
167- if os .environ .get ('DEEPSEEK_API_KEY' ) :
168- api_key = os .environ .get ('DEEPSEEK_API_KEY' )
169- return openai .OpenAI (api_key = api_key , base_url = "https://api.deepseek.com" )
158+ veniceai = utils .generate_clients ('VENICE_API_KEY' , "https://api.venice.ai/api/v1" )
159+
160+ deepseek = utils .generate_clients ('DEEPSEEK_API_KEY' ,"https://api.deepseek.com" )
170161
171- @staticmethod
172- def xai (api_key : str = None ) -> openai .OpenAI :
173- """
174- Use `clients.generic_request` for call
175- """
176- if os .environ .get ('XAI_API_KEY' ) :
177- api_key = os .environ .get ('XAI_API_KEY' )
178- return openai .OpenAI (api_key = api_key , base_url = "https://api.x.ai/v1" , timeout = 3600 )
162+ xai = utils .generate_clients ('XAI_API_KEY' , "https://api.x.ai/v1" )
179163
180- @staticmethod
181- def groq (api_key : str = None ) -> openai .OpenAI :
182- """
183- Use `clients.generic_request` for call
184- """
185- if os .environ .get ('GROQ_API_KEY' ) :
186- api_key = os .environ .get ('GROQ_API_KEY' )
187- return openai .OpenAI (api_key = api_key , base_url = "https://api.groq.com/openai/v1" )
188-
189- @staticmethod
190- def huggingface (api_key : str = None ) -> openai .OpenAI :
191- """
192- Use `clients.generic_request` for call
193- """
194- if os .environ .get ('HUGGINGFACE_API_KEY' ) :
195- api_key = os .environ .get ('HUGGINGFACE_API_KEY' )
196- return openai .OpenAI (api_key = api_key , base_url = "https://router.huggingface.co/v1" )
164+ groq = utils .generate_clients ('GROQ_API_KEY' , "https://api.groq.com/openai/v1" )
165+
166+ huggingface = utils .generate_clients ('HUGGINGFACE_API_KEY' , "https://router.huggingface.co/v1" )
197167
198- @staticmethod
199- def openrouter (api_key : str = None ) -> openai .OpenAI :
200- """
201- Use `clients.openrouter_request` for call
202- """
203- if os .environ .get ('OPENROUTER_API_KEY' ) :
204- api_key = os .environ .get ('OPENROUTER_API_KEY' )
205- return openai .OpenAI (api_key = api_key , base_url = "https://openrouter.ai/api/v1" )
168+ ollama = utils .generate_clients (None , "http://localhost:11434/v1" )
206169
207- @staticmethod
208- def ollama () -> openai .OpenAI :
209- """
210- Use `clients.generic_request` for call
211- """
212- return openai .OpenAI (api_key = "" , base_url = "http://localhost:11434/v1" )
170+ openrouter = utils .generate_clients ('OPENROUTER_API_KEY' , "https://openrouter.ai/api/v1" )
213171
172+ kimi_code = utils .generate_clients ("KIMI_CODE_API_KEY" , "https://api.kimi.com/coding/v1" ,
173+ default_headers = {"User-Agent" : "RooCode/3.30.3" ,"HTTP-Referer" : "https://github.com/RooVetGit/Roo-Cline" ,"X-Title" : "Roo Code" }
174+ )
175+
214176# ==============================
215177# Customers for calls with their specifications"
216178#
@@ -322,18 +284,35 @@ def handle_streaming(stream: openai.Stream):
322284 accumulated_tool_calls = {}
323285 arg_chunks = {} # Per tool_call index: list of argument chunks
324286
287+ is_thinking = False
288+
325289 # Process each chunk
326290 for chunk in stream :
327291 # Skip if no choices
328292 if not chunk .choices :
329293 continue
294+
330295 delta = chunk .choices [0 ].delta
331296 if delta is None :
332297 continue
298+
299+ # Handle reasoning streaming
300+ if getattr (delta , 'reasoning_content' , None ):
301+ if is_thinking :
302+ yield delta .reasoning_content , [], False
303+ else :
304+ yield "<think>\n " , [], False
305+ yield delta .reasoning_content , [], False
306+ is_thinking = True
333307
334308 # Handle content streaming
335309 if delta .content :
336- yield delta .content , [], False
310+ if is_thinking :
311+ yield "</think>\n " , [], False
312+ yield delta .content , [], False
313+ is_thinking = False
314+ else :
315+ yield delta .content , [], False
337316
338317 # Handle tool calls in delta
339318 if delta .tool_calls :
@@ -412,12 +391,17 @@ def handle_tool_call(tool_call:dict) -> tuple[str, str, dict, str] :
412391
413392 return fid , fname , args , ""
414393
394+ # Utility for multiple functions, code by Kimi k2 thinking
395+ @staticmethod
396+ def functions_to_tools (funcs : list [Callable ]) -> list [dict [str , Any ]]:
397+ return [utils .function_to_openai_tool (f ) for f in funcs ]
398+
415399# ==============================
416400# Functions to simplify the messages roles
417401# ==============================
418402
419403@staticmethod
420- def create_assistant_response (content :str , tool_calls :list [dict ]= None ) -> dict [str , str ]:
404+ def create_assistant_response (content :str , tool_calls :list [dict ]= None , reasoning_content : str = None ) -> dict [str , str ]:
421405 """
422406 Creates an assistant message, optionally with tool calls.
423407
@@ -428,8 +412,12 @@ def create_assistant_response(content:str, tool_calls:list[dict]=None) -> dict[s
428412 Returns:
429413 dict: Message formatted for the API
430414 """
431- if tool_calls : return {"role" : "assistant" ,"content" : content ,"tool_calls" : tool_calls }
432- return {"role" : "assistant" ,"content" : content }
415+ r = {"role" : "assistant" ,"content" : content }
416+ if tool_calls :
417+ r .update ({"tool_calls" :tool_calls })
418+ if reasoning_content :
419+ r .update ({"reasoning_content" :reasoning_content })
420+ return r
433421
434422@staticmethod
435423def create_function_response (id :str , result :str , name :str ) -> dict [str , str , str ]:
@@ -450,14 +438,16 @@ def create_user_prompt(content:str) -> dict[str, str] :
450438# ==============================
451439
452440class agent_base :
453- def __init__ (self ):
441+ def __init__ (self , is_thinking_enabled : bool = False , yield_thinking : bool = False ):
454442
455443 self ._system_prompt = [create_system_prompt ("" )]
456444 self .messages = []
457445 self .tools = []
458446
459447 self .meta = {
460- "create_stream" :True
448+ "create_stream" :True ,
449+ "is_thinking_enabled" :is_thinking_enabled ,
450+ "yield_thinking" :yield_thinking ,
461451 }
462452
463453 def create_stream (self ):
@@ -524,6 +514,12 @@ def manage_messages_after_reply(self):
524514
525515 def execute_tools (self , fname , args ):
526516 raise NotImplementedError ("Subclasses must implement execute_tools()" )
517+
518+ def manage_token_yield (self , token , is_thinking = None ):
519+ """
520+ # TO IMPLEMENT if needed for custom front !
521+ """
522+ return token
527523
528524 def __call__ (self , prompt ):
529525
@@ -534,19 +530,41 @@ def __call__(self, prompt):
534530 ))
535531
536532 while run :
537-
533+ is_thinking = False # Reset at each request
538534 response = ""
535+ reasoning = ""
539536 for token , tool_calls , run in handle_streaming (self .create_stream ()) :
540537 if token :
541- yield token
542- response += token
543-
544- self .messages .append (create_assistant_response (
545- self .manage_assistant_response (response )
546- ))
538+
539+ if "<think>" in token or is_thinking :
540+ is_thinking = True
541+
542+ if "</think>" in token :
543+ is_thinking = False
544+
545+ if self .meta ["is_thinking_enabled" ] :
546+ reasoning += token
547+ else :
548+ response += token
549+
550+ if self .meta ["yield_thinking" ]:
551+ yield self .manage_token_yield (token , is_thinking )
552+
553+ else :
554+ yield self .manage_token_yield (token , is_thinking )
555+ response += token
547556
548557 if run :
549- self .messages .append (create_assistant_response (response , tool_calls ))
558+
559+ if self .meta ["is_thinking_enabled" ] :
560+ self .messages .append (create_assistant_response (
561+ self .manage_assistant_response (response ), tool_calls ,
562+ reasoning_content = reasoning )
563+ )
564+ else :
565+ self .messages .append (create_assistant_response (
566+ self .manage_assistant_response (response ), tool_calls )
567+ )
550568
551569 for tool_call in tool_calls :
552570 fid , fname , args , _ = handle_tool_call (tool_call )
@@ -558,6 +576,17 @@ def __call__(self, prompt):
558576 ))
559577
560578 self .manage_messages_in_reply ()
579+
580+ reasoning = re .sub (r'<think>\n?|</think>\n' , '' , reasoning ).strip ()
561581
562- self .messages .append (create_assistant_response (response ))
582+ if self .meta ["is_thinking_enabled" ] :
583+ self .messages .append (create_assistant_response (
584+ self .manage_assistant_response (response ), tool_calls ,
585+ reasoning_content = reasoning )
586+ )
587+ else :
588+ self .messages .append (create_assistant_response (
589+ self .manage_assistant_response (response ), tool_calls )
590+ )
591+
563592 self .manage_messages_after_reply ()
0 commit comments