File tree Expand file tree Collapse file tree
torchTextClassifiers/tokenizers Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -178,17 +178,14 @@ def load(cls, load_path: str):
178178 @classmethod
179179 def load_from_s3 (cls , s3_path : str , filesystem ):
180180 if filesystem .exists (s3_path ) is False :
181- raise FileNotFoundError (
182- f"Tokenizer not found at { s3_path } . Please train it first (see src/train_tokenizers)."
183- )
181+ raise FileNotFoundError (f"Tokenizer not found at { s3_path } ." )
184182
185183 with filesystem .open (s3_path , "rb" ) as f :
186184 json_str = f .read ().decode ("utf-8" )
187185
188186 tokenizer_obj = Tokenizer .from_str (json_str )
189- tokenizer = PreTrainedTokenizerFast (tokenizer_object = tokenizer_obj )
190- instance = cls (vocab_size = len (tokenizer ), trained = True )
191- instance .tokenizer = tokenizer
187+ instance = cls (vocab_size = tokenizer_obj .get_vocab_size (), trained = True )
188+ instance .tokenizer = tokenizer_obj
192189 instance ._post_training ()
193190 return instance
194191
You can’t perform that action at this time.
0 commit comments