Skip to content

Commit 592243e

Browse files
author
moo
committed
version bump
1 parent a7a7a8d commit 592243e

File tree

4 files changed

+62
-50
lines changed

4 files changed

+62
-50
lines changed

docs/api/chat.mdx

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -837,34 +837,34 @@ def to_openai(self) -> list[dict[str, t.Any]]:
837837

838838
```python
839839
to_tokens(
840-
tokenizer: str,
840+
tokenizer_id: str,
841841
tokenizer_kwargs: dict[str, Any] | None = None,
842842
*,
843843
apply_chat_template_kwargs: dict[str, Any]
844844
| None = None,
845845
encode_kwargs: dict[str, Any] | None = None,
846846
decode_kwargs: dict[str, Any] | None = None,
847-
) -> list[int]
847+
) -> TokenizedChat
848848
```
849849

850850
Converts the chat messages to a list of tokenized messages.
851851

852852
**Returns:**
853853

854-
* `list[int]`
854+
* `TokenizedChat`
855855
–The serialized chat as a list of token lists.
856856

857857
<Accordion title="Source code in rigging/chat.py" icon="code">
858858
```python
859859
async def to_tokens(
860860
self,
861-
tokenizer: str,
861+
tokenizer_id: str,
862862
tokenizer_kwargs: dict[str, t.Any] | None = None,
863863
*,
864864
apply_chat_template_kwargs: dict[str, t.Any] | None = None,
865865
encode_kwargs: dict[str, t.Any] | None = None,
866866
decode_kwargs: dict[str, t.Any] | None = None,
867-
) -> list[int]:
867+
) -> TokenizedChat:
868868
"""
869869
Converts the chat messages to a list of tokenized messages.
870870
@@ -874,7 +874,10 @@ async def to_tokens(
874874
from rigging.data import chats_to_tokens
875875
from rigging.tokenize import get_tokenizer
876876

877-
tokenizer = get_tokenizer(tokenizer, **tokenizer_kwargs)
877+
if tokenizer_kwargs is None:
878+
tokenizer_kwargs = {}
879+
880+
tokenizer = get_tokenizer(tokenizer_id, **tokenizer_kwargs)
878881

879882
return await chats_to_tokens(
880883
self,
@@ -1085,34 +1088,34 @@ def to_openai(self) -> list[list[dict[str, t.Any]]]:
10851088

10861089
```python
10871090
to_tokens(
1088-
tokenizer: str,
1091+
tokenizer_id: str,
10891092
tokenizer_kwargs: dict[str, Any] | None = None,
10901093
*,
10911094
apply_chat_template_kwargs: dict[str, Any]
10921095
| None = None,
10931096
encode_kwargs: dict[str, Any] | None = None,
10941097
decode_kwargs: dict[str, Any] | None = None,
1095-
) -> list[list[int]]
1098+
) -> list[TokenizedChat]
10961099
```
10971100

10981101
Converts the chat list to a list of tokenized messages.
10991102

11001103
**Returns:**
11011104

1102-
* `list[list[int]]`
1105+
* `list[TokenizedChat]`
11031106
–The serialized chat list as a list of token lists.
11041107

11051108
<Accordion title="Source code in rigging/chat.py" icon="code">
11061109
```python
11071110
async def to_tokens(
11081111
self,
1109-
tokenizer: str,
1112+
tokenizer_id: str,
11101113
tokenizer_kwargs: dict[str, t.Any] | None = None,
11111114
*,
11121115
apply_chat_template_kwargs: dict[str, t.Any] | None = None,
11131116
encode_kwargs: dict[str, t.Any] | None = None,
11141117
decode_kwargs: dict[str, t.Any] | None = None,
1115-
) -> list[list[int]]:
1118+
) -> list[TokenizedChat]:
11161119
"""
11171120
Converts the chat list to a list of tokenized messages.
11181121
@@ -1123,9 +1126,11 @@ async def to_tokens(
11231126
from rigging.data import chats_to_tokens
11241127
from rigging.tokenize import get_tokenizer
11251128

1126-
tokenizer = get_tokenizer(tokenizer, **tokenizer_kwargs)
1129+
if tokenizer_kwargs is None:
1130+
tokenizer_kwargs = {}
1131+
1132+
tokenizer = get_tokenizer(tokenizer_id, **tokenizer_kwargs)
11271133

1128-
# openai_chats = [chat.to_openai() for chat in self]
11291134
return [
11301135
await chats_to_tokens(
11311136
chat,

docs/api/data.mdx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ chats\_to\_tokens
257257

258258
```python
259259
chats_to_tokens(
260-
chat: Chat | Sequence[Chat],
260+
chat: Chat | None,
261261
tokenizer: AutoTokenizer,
262262
*,
263263
apply_chat_template_kwargs: dict[str, Any]
@@ -272,7 +272,7 @@ Transform a chat into a tokenized format with structured slices.
272272
**Parameters:**
273273

274274
* **`chat`**
275-
(`Chat | Sequence[Chat]`)
275+
(`Chat | None`)
276276
–The chat object to tokenize.
277277
* **`tokenizer`**
278278
(`AutoTokenizer`)
@@ -286,7 +286,7 @@ Transform a chat into a tokenized format with structured slices.
286286
<Accordion title="Source code in rigging/data.py" icon="code">
287287
```python
288288
async def chats_to_tokens(
289-
chat: Chat | t.Sequence[Chat],
289+
chat: Chat | None,
290290
tokenizer: AutoTokenizer,
291291
*,
292292
apply_chat_template_kwargs: dict[str, t.Any] | None = None,

docs/api/tokenize.mdx

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,16 @@ get\_tokenizer
1111

1212
```python
1313
get_tokenizer(
14-
model: str | Any, **tokenizer_kwargs: Any
15-
) -> AutoTokenizer
14+
tokenizer_id: str, **tokenizer_kwargs: Any
15+
) -> AutoTokenizer | None
1616
```
1717

1818
Get the tokenizer from transformers model identifier, or from an already loaded tokenizer.
1919

2020
**Parameters:**
2121

22-
* **`model`**
23-
(`str | Any`)
22+
* **`tokenizer_id`**
23+
(`str`)
2424
–The model identifier (string) or an already loaded tokenizer.
2525
* **`tokenizer_kwargs`**
2626
(`Any`, default:
@@ -30,28 +30,39 @@ Get the tokenizer from transformers model identifier, or from an already loaded
3030

3131
**Returns:**
3232

33-
* `AutoTokenizer`
33+
* `AutoTokenizer | None`
3434
–An instance of `AutoTokenizer`.
3535

3636
<Accordion title="Source code in rigging/tokenize/tokenizer.py" icon="code">
3737
```python
3838
def get_tokenizer(
39-
model: str | t.Any,
39+
tokenizer_id: str,
4040
**tokenizer_kwargs: t.Any,
41-
) -> AutoTokenizer:
41+
) -> AutoTokenizer | None:
4242
"""
4343
Get the tokenizer from transformers model identifier, or from an already loaded tokenizer.
4444
4545
Args:
46-
model: The model identifier (string) or an already loaded tokenizer.
46+
tokenizer_id: The model identifier (string) or an already loaded tokenizer.
4747
tokenizer_kwargs: Additional keyword arguments for the tokenizer initialization.
4848
4949
Returns:
5050
An instance of `AutoTokenizer`.
5151
"""
52-
if isinstance(model, str):
53-
return AutoTokenizer.from_pretrained(model, **tokenizer_kwargs)
54-
return model
52+
tokenizer: AutoTokenizer | None = None
53+
54+
try:
55+
tokenizer = AutoTokenizer.from_pretrained(
56+
tokenizer_id,
57+
**tokenizer_kwargs,
58+
)
59+
logger.success(f"Loaded tokenizer for model '{tokenizer_id}'")
60+
61+
except Exception as e: # noqa: BLE001
62+
# Catch all exceptions to handle any issues with loading the tokenizer
63+
logger.error(f"Failed to load tokenizer for model '{tokenizer_id}': {e}")
64+
65+
return tokenizer
5566
```
5667

5768

pyproject.toml

Lines changed: 19 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "rigging"
3-
version = "3.0.0"
3+
version = "3.0.3"
44
description = "LLM Interaction Framework"
55
authors = ["Nick Landers <monoxgas@gmail.com>"]
66
license = "MIT"
@@ -17,7 +17,7 @@ pydantic-xml = "^2.11.0"
1717
loguru = "^0.7.2"
1818
litellm = "^1.67.2"
1919
pandas = "^2.2.2"
20-
eval-type-backport = "^0.2.0" # For 3.9 future annotations
20+
eval-type-backport = "^0.2.0" # For 3.9 future annotations
2121
elasticsearch = "^8.13.2"
2222
xmltodict = "^0.13.0"
2323
colorama = "^0.4.6"
@@ -66,7 +66,7 @@ pytest-asyncio = "^1.0.0"
6666
types-colorama = "^0.4.15.20240311"
6767
types-requests = "2.32.4.20250611"
6868
beautifulsoup4 = "^4.13.4"
69-
mkdocstrings = {extras = ["python"], version = "^0.29.1"}
69+
mkdocstrings = { extras = ["python"], version = "^0.29.1" }
7070
markdown = "^3.8"
7171
markdownify = "^1.1.0"
7272

@@ -101,11 +101,7 @@ ignore_no_config = true
101101
# Security
102102

103103
[tool.bandit]
104-
exclude_dirs = [
105-
"examples/*",
106-
".github/*",
107-
".hooks/*",
108-
]
104+
exclude_dirs = ["examples/*", ".github/*", ".hooks/*"]
109105

110106
# Type Checking
111107

@@ -126,22 +122,22 @@ extend-exclude = [
126122
]
127123

128124
[tool.ruff.lint]
129-
select = [ "ALL" ]
125+
select = ["ALL"]
130126
ignore = [
131-
"E501", # line too long (we make best effort)
132-
"TRY003", # long messages in exception classes
133-
"EM", # picky message construction for exceptions
134-
"C90", # mccabe complexity
135-
"A002", # shadowing built-in
136-
"D", # docstrings
137-
"ANN", # annotations (handled by mypy)
138-
"PLR0913", # too many arguments
139-
"ERA001", # commented out code
140-
"FIX002", # contains todo, consider fixing
141-
"TD002", # TODO
142-
"TD003", # TODO
143-
"PLR0911", # too many return statements
144-
"FBT003", # boolean positional in function call
127+
"E501", # line too long (we make best effort)
128+
"TRY003", # long messages in exception classes
129+
"EM", # picky message construction for exceptions
130+
"C90", # mccabe complexity
131+
"A002", # shadowing built-in
132+
"D", # docstrings
133+
"ANN", # annotations (handled by mypy)
134+
"PLR0913", # too many arguments
135+
"ERA001", # commented out code
136+
"FIX002", # contains todo, consider fixing
137+
"TD002", # TODO
138+
"TD003", # TODO
139+
"PLR0911", # too many return statements
140+
"FBT003", # boolean positional in function call
145141
]
146142

147143
[tool.ruff.format]

0 commit comments

Comments
 (0)