1- from typing import Callable , Mapping , Optional , Sequence , Union
1+ from typing import Callable , Mapping , Optional , Sequence , Tuple , Union
22
33ByteOrChar = Union [str , int ]
4+ TrieNodeID = int
5+ GeneralSAMNodeID = int
46
57class TrieNode :
68 def is_in_chars (self ) -> bool : ...
79 def is_in_bytes (self ) -> bool : ...
8- def get_node_id (self ) -> int : ...
10+ def get_node_id (self ) -> TrieNodeID : ...
911 def is_accepting (self ) -> bool : ...
10- def get_trans (self ) -> Mapping [ByteOrChar , int ]: ...
11- def get_parent (self ) -> int : ...
12+ def get_trans (self ) -> Mapping [ByteOrChar , TrieNodeID ]: ...
13+ def get_parent (self ) -> TrieNodeID : ...
1214
1315class Trie :
1416 @staticmethod
@@ -18,33 +20,33 @@ class Trie:
1820 def is_in_chars (self ) -> bool : ...
1921 def is_in_bytes (self ) -> bool : ...
2022 def num_of_nodes (self ) -> int : ...
21- def insert_chars (self , s : str ) -> int : ...
22- def insert_bytes (self , s : bytes ) -> int : ...
23- def get_bfs_order (self ) -> Sequence [int ]: ...
23+ def insert_chars (self , s : str ) -> TrieNodeID : ...
24+ def insert_bytes (self , s : bytes ) -> TrieNodeID : ...
25+ def get_bfs_order (self ) -> Sequence [TrieNodeID ]: ...
2426 def get_root (self ) -> TrieNode : ...
25- def get_node (self , node_id : int ) -> Optional [TrieNode ]: ...
27+ def get_node (self , node_id : TrieNodeID ) -> Optional [TrieNode ]: ...
2628 def dfs_travel (
2729 self ,
28- in_stack_callback : Callable [[int , Optional [ByteOrChar ]], None ],
29- out_stack_callback : Callable [[int ], None ],
30- root_node_id : Optional [int ] = None ,
30+ in_stack_callback : Callable [[TrieNodeID , Optional [ByteOrChar ]], None ],
31+ out_stack_callback : Callable [[TrieNodeID ], None ],
32+ root_node_id : Optional [TrieNodeID ] = None ,
3133 ) -> TrieNode : ...
3234 def bfs_travel (
3335 self ,
34- in_queue_callback : Callable [[int , Optional [ByteOrChar ]], None ],
35- out_queue_callback : Callable [[int ], None ],
36- root_node_id : Optional [int ] = None ,
36+ in_queue_callback : Callable [[TrieNodeID , Optional [ByteOrChar ]], None ],
37+ out_queue_callback : Callable [[TrieNodeID ], None ],
38+ root_node_id : Optional [TrieNodeID ] = None ,
3739 ) -> TrieNode : ...
3840
3941class GeneralSAMState :
4042 def is_in_str (self ) -> bool : ...
4143 def is_in_bytes (self ) -> bool : ...
42- def get_node_id (self ) -> int : ...
44+ def get_node_id (self ) -> GeneralSAMNodeID : ...
4345 def is_nil (self ) -> bool : ...
4446 def is_root (self ) -> bool : ...
4547 def is_accepting (self ) -> bool : ...
46- def get_trans (self ) -> Mapping [ByteOrChar , int ]: ...
47- def get_suffix_parent_id (self ) -> int : ...
48+ def get_trans (self ) -> Mapping [ByteOrChar , GeneralSAMNodeID ]: ...
49+ def get_suffix_parent_id (self ) -> GeneralSAMNodeID : ...
4850 def copy (self ) -> 'GeneralSAMState' : ...
4951 def goto_suffix_parent (self ) -> None : ...
5052 def goto_char (self , t : str ) -> None : ...
@@ -55,19 +57,19 @@ class GeneralSAMState:
5557 self ,
5658 trie : Trie ,
5759 in_stack_callback : Callable [
58- ['GeneralSAMState' , int , Optional [ByteOrChar ]], None
60+ ['GeneralSAMState' , TrieNodeID , Optional [ByteOrChar ]], None
5961 ],
60- out_stack_callback : Callable [['GeneralSAMState' , int ], None ],
61- trie_node_id : Optional [int ] = None ,
62+ out_stack_callback : Callable [['GeneralSAMState' , TrieNodeID ], None ],
63+ trie_node_id : Optional [TrieNodeID ] = None ,
6264 ) -> TrieNode : ...
6365 def bfs_along (
6466 self ,
6567 trie : Trie ,
6668 in_queue_callback : Callable [
67- ['GeneralSAMState' , int , Optional [ByteOrChar ]], None
69+ ['GeneralSAMState' , TrieNodeID , Optional [ByteOrChar ]], None
6870 ],
69- out_queue_callback : Callable [['GeneralSAMState' , int ], None ],
70- trie_node_id : Optional [int ] = None ,
71+ out_queue_callback : Callable [['GeneralSAMState' , TrieNodeID ], None ],
72+ trie_node_id : Optional [TrieNodeID ] = None ,
7173 ) -> TrieNode : ...
7274
7375class GeneralSAM :
@@ -81,5 +83,11 @@ class GeneralSAM:
8183 def is_in_bytes (self ) -> bool : ...
8284 def num_of_nodes (self ) -> int : ...
8385 def get_root_state (self ) -> GeneralSAMState : ...
84- def get_state (self , node_id : int ) -> GeneralSAMState : ...
86+ def get_state (self , node_id : GeneralSAMNodeID ) -> GeneralSAMState : ...
8587 def get_topo_and_suf_len_sorted_states (self ) -> Sequence [GeneralSAMState ]: ...
88+
89+ class GreedyTokenizer :
90+ @staticmethod
91+ def from_sam_and_trie (sam : GeneralSAM , trie : Trie ) -> 'GreedyTokenizer' : ...
92+ def tokenize_str (self , s : str ) -> Sequence [Tuple [TrieNodeID , int ]]: ...
93+ def tokenize_bytes (self , s : bytes ) -> Sequence [Tuple [TrieNodeID , int ]]: ...
0 commit comments