version bump

moo · moo · commit 592243e61528 · 2025-06-17T14:59:01.000-06:00
diff --git a/docs/api/chat.mdx b/docs/api/chat.mdx
@@ -837,34 +837,34 @@ def to_openai(self) -> list[dict[str, t.Any]]:
 
 ```python
 to_tokens(
-    tokenizer: str,
+    tokenizer_id: str,
     tokenizer_kwargs: dict[str, Any] | None = None,
     *,
     apply_chat_template_kwargs: dict[str, Any]
     | None = None,
     encode_kwargs: dict[str, Any] | None = None,
     decode_kwargs: dict[str, Any] | None = None,
-) -> list[int]
+) -> TokenizedChat
 ```
 
 Converts the chat messages to a list of tokenized messages.
 
 **Returns:**
 
-* `list[int]`
+* `TokenizedChat`
   –The serialized chat as a list of token lists.
 
 <Accordion title="Source code in rigging/chat.py" icon="code">
 ```python
 async def to_tokens(
     self,
-    tokenizer: str,
+    tokenizer_id: str,
     tokenizer_kwargs: dict[str, t.Any] | None = None,
     *,
     apply_chat_template_kwargs: dict[str, t.Any] | None = None,
     encode_kwargs: dict[str, t.Any] | None = None,
     decode_kwargs: dict[str, t.Any] | None = None,
-) -> list[int]:
+) -> TokenizedChat:
     """
     Converts the chat messages to a list of tokenized messages.
 
@@ -874,7 +874,10 @@ async def to_tokens(
     from rigging.data import chats_to_tokens
     from rigging.tokenize import get_tokenizer
 
-    tokenizer = get_tokenizer(tokenizer, **tokenizer_kwargs)
+    if tokenizer_kwargs is None:
+        tokenizer_kwargs = {}
+
+    tokenizer = get_tokenizer(tokenizer_id, **tokenizer_kwargs)
 
     return await chats_to_tokens(
         self,
@@ -1085,34 +1088,34 @@ def to_openai(self) -> list[list[dict[str, t.Any]]]:
 
 ```python
 to_tokens(
-    tokenizer: str,
+    tokenizer_id: str,
     tokenizer_kwargs: dict[str, Any] | None = None,
     *,
     apply_chat_template_kwargs: dict[str, Any]
     | None = None,
     encode_kwargs: dict[str, Any] | None = None,
     decode_kwargs: dict[str, Any] | None = None,
-) -> list[list[int]]
+) -> list[TokenizedChat]
 ```
 
 Converts the chat list to a list of tokenized messages.
 
 **Returns:**
 
-* `list[list[int]]`
+* `list[TokenizedChat]`
   –The serialized chat list as a list of token lists.
 
 <Accordion title="Source code in rigging/chat.py" icon="code">
 ```python
 async def to_tokens(
     self,
-    tokenizer: str,
+    tokenizer_id: str,
     tokenizer_kwargs: dict[str, t.Any] | None = None,
     *,
     apply_chat_template_kwargs: dict[str, t.Any] | None = None,
     encode_kwargs: dict[str, t.Any] | None = None,
     decode_kwargs: dict[str, t.Any] | None = None,
-) -> list[list[int]]:
+) -> list[TokenizedChat]:
     """
     Converts the chat list to a list of tokenized messages.
 
@@ -1123,9 +1126,11 @@ async def to_tokens(
     from rigging.data import chats_to_tokens
     from rigging.tokenize import get_tokenizer
 
-    tokenizer = get_tokenizer(tokenizer, **tokenizer_kwargs)
+    if tokenizer_kwargs is None:
+        tokenizer_kwargs = {}
+
+    tokenizer = get_tokenizer(tokenizer_id, **tokenizer_kwargs)
 
-    # openai_chats = [chat.to_openai() for chat in self]
     return [
         await chats_to_tokens(
             chat,
diff --git a/docs/api/data.mdx b/docs/api/data.mdx
@@ -257,7 +257,7 @@ chats\_to\_tokens
 
 ```python
 chats_to_tokens(
-    chat: Chat | Sequence[Chat],
+    chat: Chat | None,
     tokenizer: AutoTokenizer,
     *,
     apply_chat_template_kwargs: dict[str, Any]
@@ -272,7 +272,7 @@ Transform a chat into a tokenized format with structured slices.
 **Parameters:**
 
 * **`chat`**
-  (`Chat | Sequence[Chat]`)
+  (`Chat | None`)
   –The chat object to tokenize.
 * **`tokenizer`**
   (`AutoTokenizer`)
@@ -286,7 +286,7 @@ Transform a chat into a tokenized format with structured slices.
 <Accordion title="Source code in rigging/data.py" icon="code">
 ```python
 async def chats_to_tokens(
-    chat: Chat | t.Sequence[Chat],
+    chat: Chat | None,
     tokenizer: AutoTokenizer,
     *,
     apply_chat_template_kwargs: dict[str, t.Any] | None = None,
diff --git a/docs/api/tokenize.mdx b/docs/api/tokenize.mdx
@@ -11,16 +11,16 @@ get\_tokenizer
 
 ```python
 get_tokenizer(
-    model: str | Any, **tokenizer_kwargs: Any
-) -> AutoTokenizer
+    tokenizer_id: str, **tokenizer_kwargs: Any
+) -> AutoTokenizer | None
 ```
 
 Get the tokenizer from transformers model identifier, or from an already loaded tokenizer.
 
 **Parameters:**
 
-* **`model`**
-  (`str | Any`)
+* **`tokenizer_id`**
+  (`str`)
   –The model identifier (string) or an already loaded tokenizer.
 * **`tokenizer_kwargs`**
   (`Any`, default:
@@ -30,28 +30,39 @@ Get the tokenizer from transformers model identifier, or from an already loaded
 
 **Returns:**
 
-* `AutoTokenizer`
+* `AutoTokenizer | None`
   –An instance of `AutoTokenizer`.
 
 <Accordion title="Source code in rigging/tokenize/tokenizer.py" icon="code">
 ```python
 def get_tokenizer(
-    model: str | t.Any,
+    tokenizer_id: str,
     **tokenizer_kwargs: t.Any,
-) -> AutoTokenizer:
+) -> AutoTokenizer | None:
     """
     Get the tokenizer from transformers model identifier, or from an already loaded tokenizer.
 
     Args:
-        model: The model identifier (string) or an already loaded tokenizer.
+        tokenizer_id: The model identifier (string) or an already loaded tokenizer.
         tokenizer_kwargs: Additional keyword arguments for the tokenizer initialization.
 
     Returns:
         An instance of `AutoTokenizer`.
     """
-    if isinstance(model, str):
-        return AutoTokenizer.from_pretrained(model, **tokenizer_kwargs)
-    return model
+    tokenizer: AutoTokenizer | None = None
+
+    try:
+        tokenizer = AutoTokenizer.from_pretrained(
+            tokenizer_id,
+            **tokenizer_kwargs,
+        )
+        logger.success(f"Loaded tokenizer for model '{tokenizer_id}'")
+
+    except Exception as e:  # noqa: BLE001
+        # Catch all exceptions to handle any issues with loading the tokenizer
+        logger.error(f"Failed to load tokenizer for model '{tokenizer_id}': {e}")
+
+    return tokenizer
 ```
 
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "rigging"
-version = "3.0.0"
+version = "3.0.3"
 description = "LLM Interaction Framework"
 authors = ["Nick Landers <monoxgas@gmail.com>"]
 license = "MIT"
@@ -17,7 +17,7 @@ pydantic-xml = "^2.11.0"
 loguru = "^0.7.2"
 litellm = "^1.67.2"
 pandas = "^2.2.2"
-eval-type-backport = "^0.2.0"                           # For 3.9 future annotations
+eval-type-backport = "^0.2.0"                          # For 3.9 future annotations
 elasticsearch = "^8.13.2"
 xmltodict = "^0.13.0"
 colorama = "^0.4.6"
@@ -66,7 +66,7 @@ pytest-asyncio = "^1.0.0"
 types-colorama = "^0.4.15.20240311"
 types-requests = "2.32.4.20250611"
 beautifulsoup4 = "^4.13.4"
-mkdocstrings = {extras = ["python"], version = "^0.29.1"}
+mkdocstrings = { extras = ["python"], version = "^0.29.1" }
 markdown = "^3.8"
 markdownify = "^1.1.0"
 
@@ -101,11 +101,7 @@ ignore_no_config = true
 # Security
 
 [tool.bandit]
-exclude_dirs = [
-    "examples/*",
-    ".github/*",
-    ".hooks/*",
-]
+exclude_dirs = ["examples/*", ".github/*", ".hooks/*"]
 
 # Type Checking
 
@@ -126,22 +122,22 @@ extend-exclude = [
 ]
 
 [tool.ruff.lint]
-select = [ "ALL" ]
+select = ["ALL"]
 ignore = [
-    "E501",     # line too long (we make best effort)
-    "TRY003",   # long messages in exception classes
-    "EM",       # picky message construction for exceptions
-    "C90",      # mccabe complexity
-    "A002",     # shadowing built-in
-    "D",        # docstrings
-    "ANN",      # annotations (handled by mypy)
-    "PLR0913",  # too many arguments
-    "ERA001",   # commented out code
-    "FIX002",   # contains todo, consider fixing
-    "TD002",    # TODO
-    "TD003",    # TODO
-    "PLR0911",  # too many return statements
-    "FBT003",   # boolean positional in function call
+    "E501",    # line too long (we make best effort)
+    "TRY003",  # long messages in exception classes
+    "EM",      # picky message construction for exceptions
+    "C90",     # mccabe complexity
+    "A002",    # shadowing built-in
+    "D",       # docstrings
+    "ANN",     # annotations (handled by mypy)
+    "PLR0913", # too many arguments
+    "ERA001",  # commented out code
+    "FIX002",  # contains todo, consider fixing
+    "TD002",   # TODO
+    "TD003",   # TODO
+    "PLR0911", # too many return statements
+    "FBT003",  # boolean positional in function call
 ]
 
 [tool.ruff.format]