@@ -22,8 +22,8 @@ pub struct EagerBpeTokenization<T> {
2222 #[ debug( ignore) ]
2323 tokenizer : T ,
2424 nodes : VecDeque < EagerTokenNode > ,
25- frontier : u16 ,
26- num_frontier_bytes : u16 ,
25+ frontier : usize ,
26+ num_frontier_bytes : usize ,
2727 num_roots : u16 ,
2828 ac_state : ACNodeId ,
2929}
@@ -72,21 +72,21 @@ impl<T> EagerBpeTokenization<T> {
7272
7373 #[ inline( always) ]
7474 fn move_forward_frontier ( & mut self ) {
75- debug_assert ! ( self . frontier as usize + 1 < self . nodes. len( ) ) ;
75+ debug_assert ! ( self . frontier + 1 < self . nodes. len( ) ) ;
7676 let mut idx = self . frontier ;
7777 self . frontier += 1 ;
78- self . num_frontier_bytes -= self . nodes [ idx as usize ] . feed_len ;
78+ self . num_frontier_bytes -= self . nodes [ idx] . feed_len as usize ;
7979 loop {
80- let node = & self . nodes [ idx as usize ] ;
81- if node. num_alive_children != 0 || idx < node. skip_len {
80+ let node = & self . nodes [ idx] ;
81+ if node. num_alive_children != 0 || idx < node. skip_len as usize {
8282 if node. num_alive_children == 0 {
8383 debug_assert ! ( self . num_roots > 1 ) ;
8484 self . num_roots -= 1 ;
8585 }
8686 break ;
8787 }
88- idx -= node. skip_len ;
89- self . nodes [ idx as usize ] . num_alive_children -= 1 ;
88+ idx -= node. skip_len as usize ;
89+ self . nodes [ idx] . num_alive_children -= 1 ;
9090 }
9191 }
9292}
@@ -96,9 +96,9 @@ impl<T: Borrow<IncBpeTokenizer>> EagerBpeTokenization<T> {
9696 fn maintain_frontier ( & mut self ) {
9797 let tokenizer: & IncBpeTokenizer = self . tokenizer . borrow ( ) ;
9898 let target_frontier = tokenizer. ac_depths [ self . ac_state ] ;
99- while self . frontier as usize + 1 < self . nodes . len ( )
99+ while self . frontier + 1 < self . nodes . len ( )
100100 && self . num_frontier_bytes
101- > target_frontier + self . nodes [ self . frontier as usize ] . feed_len
101+ > target_frontier as usize + self . nodes [ self . frontier ] . feed_len as usize
102102 {
103103 self . move_forward_frontier ( ) ;
104104 }
@@ -116,7 +116,7 @@ impl<T: Borrow<IncBpeTokenizer>> EagerBpeTokenization<T> {
116116 let parent = self . nodes . len ( ) - skip_len as usize ;
117117 self . nodes [ parent] . num_alive_children += 1 ;
118118 }
119- self . num_frontier_bytes += feed_len;
119+ self . num_frontier_bytes += feed_len as usize ;
120120 self . nodes . push_back ( EagerTokenNode {
121121 forest_id,
122122 token_id,
@@ -160,7 +160,7 @@ impl<T: Borrow<IncBpeTokenizer>> EagerBpeTokenization<T> {
160160 self . pop_prefix_removed_nodes ( ) ;
161161 } else {
162162 self . ac_state = AC_NODE_ROOT ;
163- while self . frontier as usize + 1 < self . nodes . len ( ) {
163+ while self . frontier + 1 < self . nodes . len ( ) {
164164 self . move_forward_frontier ( ) ;
165165 }
166166 self . pop_prefix_removed_nodes ( ) ;
0 commit comments