@@ -216,7 +216,7 @@ def split_tokens(self, n: int) -> Iterable["Tokens"]:
216216
217217 def extend (self , tokens ) -> None :
218218 """Extend size of Tokens"""
219- self .tokens .extend (tokens )
219+ self .tokens .extend (tokens . tokens )
220220 if not self .metadata :
221221 self .metadata = tokens .metadata
222222 self .metadata ["end_byte" ] = tokens .metadata ["end_byte" ]
@@ -284,7 +284,7 @@ def load(cls, path):
284284 """Load tokens from disk"""
285285 with open (path , "rb" ) as input_file :
286286 tokens = pickle .load (input_file )
287- return cls (tokens ["tokens" ], tokens ["metadata" ])
287+ return Tokens (tokens ["tokens" ], tokens ["metadata" ])
288288
289289
290290def check_for_updates (language ) -> List [str ]:
@@ -451,11 +451,11 @@ def normalize_from_tokens(self, tokens: Tokens) -> Tokens:
451451 new_tokens = []
452452 for token in tokens :
453453 if self .__filter_token (token ) is True :
454- normalized_text = "#DEL# "
454+ normalized_text = ""
455455 else :
456456 normalized_text = self .__normalize_token (token )
457- if not normalized_text :
458- normalized_text = "#DEL# "
457+ if normalized_text == "#DEL#" :
458+ normalized_text = ""
459459 token .text = normalized_text
460460 new_tokens .append (token )
461461 return Tokens (new_tokens , tokens .metadata )
0 commit comments