Skip to content

Commit e742e04

Browse files
Update to v0.1.4
1 parent 331f5c2 commit e742e04

File tree

4 files changed

+31
-54
lines changed

4 files changed

+31
-54
lines changed

README.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# open-taranis
22

3-
Minimalist Python framework for AI agents logic-only coding with streaming, tool calls, and multi-LLM provider support.
3+
Python framework for AI agents logic-only coding with streaming, tool calls, and multi-LLM provider support.
44

55
## Installation
66

@@ -35,13 +35,14 @@ To create a simple display using gradio as backend :
3535
```python
3636
import open_taranis as T
3737
import open_taranis.web_front as W
38+
import gradio as gr
3839

39-
W.gr.ChatInterface(
40+
gr.ChatInterface(
4041
fn=W.chat_fn_gradio(
41-
client=T.clients.openrouter(api_key="api_key"),
42+
client=T.clients.openrouter(API_KEY),
4243
request=T.clients.openrouter_request,
4344
model="mistralai/mistral-7b-instruct:free",
44-
_system_prompt=""
45+
_system_prompt="You are an agent named **Taranis**"
4546
).create_fn(),
4647
title="web front"
4748
).launch()
@@ -71,6 +72,7 @@ W.gr.ChatInterface(
7172
- **v0.1.1** : Code to deploy a **frontend with gradio** added (no complex logic at the moment, ex: tool_calls)
7273
- **v0.1.2** : Fixed a display bug in the **web_front** and experimentally added **ollama as a backend**
7374
- **v0.1.3** : Fixed the memory reset in the **web_front** and remove **ollama module** for **openai front** (work 100 times better)
75+
- **v0.1.4** : Fixed `web_front` for native use on huggingface, as well as `handle_streaming` which had tool retrieval issues
7476

7577
## Advanced Examples
7678

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "open-taranis"
7-
version = "0.1.3"
7+
version = "0.1.4"
88
description = "Minimalist Python framework for AI agents logic-only coding with streaming, tool calls, and multi-LLM provider support"
99
authors = [{name = "SyntaxError4Life", email = "lilian@zanomega.com"}]
10-
dependencies = ["openai","gradio"]
10+
dependencies = ["openai"]
1111
readme = "README.md"
1212
requires-python = ">=3.8"
1313
license = {text = "GPL-3.0-or-later"}

src/open_taranis/__init__.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from typing import Any, Callable, Literal, Union, get_args, get_origin
88

99

10-
__version__ = "0.1.3"
10+
__version__ = "0.1.4"
1111

1212
import requests
1313
from packaging import version
@@ -334,11 +334,9 @@ def handle_streaming(stream: openai.Stream):
334334
"arg_chunks": [] # New: list for arguments
335335
}
336336
arg_chunks[index] = []
337+
if tool_call.function:
337338
if tool_call.function.name:
338-
# Ollama sends full name each chunk, OpenAI sends incrementally
339-
if accumulated_tool_calls[index]["function"]["name"] == "":
340-
accumulated_tool_calls[index]["function"]["name"] = tool_call.function.name
341-
# else: skip (already set, don't +=)
339+
accumulated_tool_calls[index]["function"]["name"] += tool_call.function.name
342340
if tool_call.function.arguments:
343341
# Append to list instead of +=
344342
arg_chunks[index].append(tool_call.function.arguments)
@@ -380,6 +378,7 @@ def handle_streaming(stream: openai.Stream):
380378
]
381379
yield "", tool_calls, len(tool_calls) > 0
382380

381+
383382
def handle_tool_call(tool_call:dict) -> tuple[str, str, dict, str] :
384383
"""
385384
Return :

src/open_taranis/web_front.py

Lines changed: 19 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
import gradio as gr
21
import open_taranis as T
3-
from time import time
42

53
class chat_fn_gradio:
64
def __init__(self,
@@ -15,77 +13,55 @@ def __init__(self,
1513
self.model = model
1614
self._system_prompt = [{"role":"system", "content":_system_prompt}]
1715

18-
self.meta = {
19-
"defaut_create_stream_used":False
20-
}
21-
self.memory = []
22-
2316
def create_stream(self, messages):
2417
"""
2518
TO IMPLEMENT
26-
27-
```
28-
if self.meta["defaut_create_stream_used"]==False : # Just used to detect, must be rewritten by the user
29-
print("Classic create_stream method used !")
30-
self.meta=True
31-
32-
return self.request(self.client,messages=messages,model=self.model)
33-
```
34-
3519
"""
36-
if self.meta["defaut_create_stream_used"]==False : # Just used to detect, must be rewritten by the user
37-
print("Classic create_stream method used !")
38-
self.meta=True
3920

40-
return self.request(self.client,messages=messages,model=self.model)
21+
return self.request(
22+
self.client,
23+
messages=messages,
24+
model=self.model
25+
)
4126

4227
def create_fn(self):
4328

4429
# Gradio chat function
4530
# Gradio sends: message, history
4631
def fn(message, history, *args):
4732

48-
if history == []: # Reset memory
49-
self.memory = []
33+
messages=[]
34+
35+
for user, assistant in history :
36+
messages.append(T.create_user_prompt(user))
37+
messages.append(T.create_assistant_response(assistant))
38+
messages.append(T.create_user_prompt(message))
5039

51-
# Here we use our own internal memory rather than that of the gradio :
52-
self.memory.append(T.create_user_prompt(message))
53-
is_thinking = False
5440

55-
tempo_time = 0.0
56-
time_thinking = 0.0
41+
stream = self.request(
42+
self.client,
43+
messages=self._system_prompt+messages,
44+
model=self.model
45+
)
5746

5847
stream = self.create_stream(
59-
self._system_prompt+self.memory # We make the system prompt adaptable and never at the beginning of the memory
48+
messages=messages
6049
)
6150

6251
partial = ""
63-
token_nb = 0
52+
is_thinking = False
6453

6554
for token, _, _ in T.handle_streaming(stream):
6655
if token :
6756

68-
if "<think>" in token or not is_thinking :
69-
tempo_time = time()
7057
if "<think>" in token or is_thinking :
7158
is_thinking = True
7259

7360
if "</think>" in token :
74-
time_thinking = time() - tempo_time
7561
is_thinking = False
7662

7763
else : partial += token
78-
token_nb += 1
79-
80-
# ====================================
81-
yield f"""Tokens : {token_nb}
82-
Model : {self.model}
8364

84-
---
85-
{f"**Think : {time_thinking:.4f}s**\n\n---\n\n" if time_thinking!=0.0 else ""}
86-
{f"**Thinking for {(time() - tempo_time):.4f}s....**" if is_thinking else partial}"""
87-
88-
# ====================================
89-
self.memory.append(T.create_assistant_response(partial))
65+
yield partial
9066
return
9167
return fn

0 commit comments

Comments
 (0)