1- import gradio as gr
21import open_taranis as T
3- from time import time
42
53class chat_fn_gradio :
64 def __init__ (self ,
@@ -15,77 +13,55 @@ def __init__(self,
1513 self .model = model
1614 self ._system_prompt = [{"role" :"system" , "content" :_system_prompt }]
1715
18- self .meta = {
19- "defaut_create_stream_used" :False
20- }
21- self .memory = []
22-
2316 def create_stream (self , messages ):
2417 """
2518 TO IMPLEMENT
26-
27- ```
28- if self.meta["defaut_create_stream_used"]==False : # Just used to detect, must be rewritten by the user
29- print("Classic create_stream method used !")
30- self.meta=True
31-
32- return self.request(self.client,messages=messages,model=self.model)
33- ```
34-
3519 """
36- if self .meta ["defaut_create_stream_used" ]== False : # Just used to detect, must be rewritten by the user
37- print ("Classic create_stream method used !" )
38- self .meta = True
3920
40- return self .request (self .client ,messages = messages ,model = self .model )
21+ return self .request (
22+ self .client ,
23+ messages = messages ,
24+ model = self .model
25+ )
4126
4227 def create_fn (self ):
4328
4429 # Gradio chat function
4530 # Gradio sends: message, history
4631 def fn (message , history , * args ):
4732
48- if history == []: # Reset memory
49- self .memory = []
33+ messages = []
34+
35+ for user , assistant in history :
36+ messages .append (T .create_user_prompt (user ))
37+ messages .append (T .create_assistant_response (assistant ))
38+ messages .append (T .create_user_prompt (message ))
5039
51- # Here we use our own internal memory rather than that of the gradio :
52- self .memory .append (T .create_user_prompt (message ))
53- is_thinking = False
5440
55- tempo_time = 0.0
56- time_thinking = 0.0
41+ stream = self .request (
42+ self .client ,
43+ messages = self ._system_prompt + messages ,
44+ model = self .model
45+ )
5746
5847 stream = self .create_stream (
59- self . _system_prompt + self . memory # We make the system prompt adaptable and never at the beginning of the memory
48+ messages = messages
6049 )
6150
6251 partial = ""
63- token_nb = 0
52+ is_thinking = False
6453
6554 for token , _ , _ in T .handle_streaming (stream ):
6655 if token :
6756
68- if "<think>" in token or not is_thinking :
69- tempo_time = time ()
7057 if "<think>" in token or is_thinking :
7158 is_thinking = True
7259
7360 if "</think>" in token :
74- time_thinking = time () - tempo_time
7561 is_thinking = False
7662
7763 else : partial += token
78- token_nb += 1
79-
80- # ====================================
81- yield f"""Tokens : { token_nb }
82- Model : { self .model }
8364
84- ---
85- { f"**Think : { time_thinking :.4f} s**\n \n ---\n \n " if time_thinking != 0.0 else "" }
86- { f"**Thinking for { (time () - tempo_time ):.4f} s....**" if is_thinking else partial } """
87-
88- # ====================================
89- self .memory .append (T .create_assistant_response (partial ))
65+ yield partial
9066 return
9167 return fn
0 commit comments