1- #![ cfg( test) ]
21use crate :: {
32 Dictionary , RuleId , TokenId ,
4- heap:: AdjustableHeap ,
3+ test_utils :: heap:: AdjustableHeap ,
54 typed_vec:: { TypedVec , typed_vec_index} ,
65} ;
76
87typed_vec_index ! ( pub ( super ) InputTextPos , u32 ) ;
98
109type Heap = AdjustableHeap < InputTextPos , RuleId > ;
1110
12- pub fn sentence_piece_impl < const ALLOW_IMPROPER_RULES : bool > (
11+ pub fn bpe_with_heap < const ALLOW_IMPROPER_RULES : bool > (
1312 dict : & Dictionary ,
1413 seq : impl Into < Vec < TokenId > > ,
1514) -> Vec < TokenId > {
@@ -98,7 +97,7 @@ pub fn sentence_piece_impl<const ALLOW_IMPROPER_RULES: bool>(
9897
9998#[ cfg( test) ]
10099mod tests {
101- use crate :: { Dictionary , TokenId , Vocab , sp_impl :: sentence_piece_impl } ;
100+ use crate :: { Dictionary , TokenId , Vocab , test_utils :: bpe :: bpe_with_heap } ;
102101
103102 fn build_dict < T : AsRef < [ u8 ] > , R : IntoIterator < Item = ( T , T ) > > (
104103 vocab : & Vocab ,
@@ -114,7 +113,7 @@ mod tests {
114113 ) {
115114 let tokens: Vec < _ > = tokens. into_iter ( ) . map ( I :: into) . collect ( ) ;
116115 let inputs = dict. split_bytes_to_tokens ( seq. as_ref ( ) , 0usize ) ;
117- assert_eq ! ( sentence_piece_impl :: <true >( dict, inputs) , tokens) ;
116+ assert_eq ! ( bpe_with_heap :: <true >( dict, inputs) , tokens) ;
118117 assert ! ( dict. is_proper_in_bytes( ) . is_ok( ) ) ;
119118 check_properly_in_bytes ( dict, seq, tokens) ;
120119 }
@@ -126,7 +125,7 @@ mod tests {
126125 ) {
127126 let tokens: Vec < _ > = tokens. into_iter ( ) . map ( I :: into) . collect ( ) ;
128127 let inputs = dict. split_utf8_to_tokens ( seq. as_ref ( ) , 0usize ) ;
129- assert_eq ! ( sentence_piece_impl :: <true >( dict, inputs) , tokens) ;
128+ assert_eq ! ( bpe_with_heap :: <true >( dict, inputs) , tokens) ;
130129 assert ! ( dict. is_proper_in_utf8( ) . is_ok( ) ) ;
131130 check_properly_in_utf8 ( dict, seq, tokens) ;
132131 }
@@ -138,7 +137,7 @@ mod tests {
138137 ) {
139138 let tokens: Vec < _ > = tokens. into_iter ( ) . map ( I :: into) . collect ( ) ;
140139 let inputs = dict. split_bytes_to_tokens ( seq. as_ref ( ) , 0usize ) ;
141- assert_eq ! ( sentence_piece_impl :: <false >( dict, inputs) , tokens) ;
140+ assert_eq ! ( bpe_with_heap :: <false >( dict, inputs) , tokens) ;
142141 }
143142
144143 fn check_properly_in_utf8 < S : AsRef < str > , I : Into < TokenId > , T : IntoIterator < Item = I > > (
@@ -148,11 +147,11 @@ mod tests {
148147 ) {
149148 let tokens: Vec < _ > = tokens. into_iter ( ) . map ( I :: into) . collect ( ) ;
150149 let inputs = dict. split_utf8_to_tokens ( seq. as_ref ( ) , 0usize ) ;
151- assert_eq ! ( sentence_piece_impl :: <false >( dict, inputs) , tokens) ;
150+ assert_eq ! ( bpe_with_heap :: <false >( dict, inputs) , tokens) ;
152151 }
153152
154153 #[ test]
155- fn test_sp_impl ( ) {
154+ fn test_bpe_with_heap ( ) {
156155 let vocab = Vocab :: new ( [
157156 b"<unk>" as & [ _ ] ,
158157 b"a" ,
0 commit comments