@@ -837,34 +837,34 @@ def to_openai(self) -> list[dict[str, t.Any]]:
837837
838838``` python
839839to_tokens(
840- tokenizer : str ,
840+ tokenizer_id : str ,
841841 tokenizer_kwargs: dict[str , Any] | None = None ,
842842 * ,
843843 apply_chat_template_kwargs: dict[str , Any]
844844 | None = None ,
845845 encode_kwargs: dict[str , Any] | None = None ,
846846 decode_kwargs: dict[str , Any] | None = None ,
847- ) -> list[ int ]
847+ ) -> TokenizedChat
848848```
849849
850850Converts the chat messages to a list of tokenized messages.
851851
852852** Returns:**
853853
854- * ` list[int] `
854+ * ` TokenizedChat `
855855 –The serialized chat as a list of token lists.
856856
857857<Accordion title = " Source code in rigging/chat.py" icon = " code" >
858858``` python
859859async def to_tokens (
860860 self ,
861- tokenizer : str ,
861+ tokenizer_id : str ,
862862 tokenizer_kwargs : dict[str , t.Any] | None = None ,
863863 * ,
864864 apply_chat_template_kwargs : dict[str , t.Any] | None = None ,
865865 encode_kwargs : dict[str , t.Any] | None = None ,
866866 decode_kwargs : dict[str , t.Any] | None = None ,
867- ) -> list[ int ] :
867+ ) -> TokenizedChat :
868868 """
869869 Converts the chat messages to a list of tokenized messages.
870870
@@ -874,7 +874,10 @@ async def to_tokens(
874874 from rigging.data import chats_to_tokens
875875 from rigging.tokenize import get_tokenizer
876876
877- tokenizer = get_tokenizer(tokenizer, ** tokenizer_kwargs)
877+ if tokenizer_kwargs is None :
878+ tokenizer_kwargs = {}
879+
880+ tokenizer = get_tokenizer(tokenizer_id, ** tokenizer_kwargs)
878881
879882 return await chats_to_tokens(
880883 self ,
@@ -1085,34 +1088,34 @@ def to_openai(self) -> list[list[dict[str, t.Any]]]:
10851088
10861089``` python
10871090to_tokens(
1088- tokenizer : str ,
1091+ tokenizer_id : str ,
10891092 tokenizer_kwargs: dict[str , Any] | None = None ,
10901093 * ,
10911094 apply_chat_template_kwargs: dict[str , Any]
10921095 | None = None ,
10931096 encode_kwargs: dict[str , Any] | None = None ,
10941097 decode_kwargs: dict[str , Any] | None = None ,
1095- ) -> list[list[ int ] ]
1098+ ) -> list[TokenizedChat ]
10961099```
10971100
10981101Converts the chat list to a list of tokenized messages.
10991102
11001103** Returns:**
11011104
1102- * ` list[list[int] ] `
1105+ * ` list[TokenizedChat ] `
11031106 –The serialized chat list as a list of token lists.
11041107
11051108<Accordion title = " Source code in rigging/chat.py" icon = " code" >
11061109``` python
11071110async def to_tokens (
11081111 self ,
1109- tokenizer : str ,
1112+ tokenizer_id : str ,
11101113 tokenizer_kwargs : dict[str , t.Any] | None = None ,
11111114 * ,
11121115 apply_chat_template_kwargs : dict[str , t.Any] | None = None ,
11131116 encode_kwargs : dict[str , t.Any] | None = None ,
11141117 decode_kwargs : dict[str , t.Any] | None = None ,
1115- ) -> list[list[ int ] ]:
1118+ ) -> list[TokenizedChat ]:
11161119 """
11171120 Converts the chat list to a list of tokenized messages.
11181121
@@ -1123,9 +1126,11 @@ async def to_tokens(
11231126 from rigging.data import chats_to_tokens
11241127 from rigging.tokenize import get_tokenizer
11251128
1126- tokenizer = get_tokenizer(tokenizer, ** tokenizer_kwargs)
1129+ if tokenizer_kwargs is None :
1130+ tokenizer_kwargs = {}
1131+
1132+ tokenizer = get_tokenizer(tokenizer_id, ** tokenizer_kwargs)
11271133
1128- # openai_chats = [chat.to_openai() for chat in self]
11291134 return [
11301135 await chats_to_tokens(
11311136 chat,
0 commit comments