Skip to content

Commit 21501ad

Browse files
committed
fixes
1 parent fc91d7a commit 21501ad

File tree

2 files changed

+11
-6
lines changed

2 files changed

+11
-6
lines changed

text_preprocessing/__init__.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
11
from .modernizer import Modernizer
22
from .preprocessor import PreProcessor
33
from .text_loader import text_loader
4-
from .spacy_helpers import load_language_model, PreProcessingPipe, Tokens, PreprocessorToken as Token
4+
from .spacy_helpers import (
5+
load_language_model,
6+
PreProcessingPipe,
7+
Tokens,
8+
PreprocessorToken as Token,
9+
)

text_preprocessing/spacy_helpers.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ def split_tokens(self, n: int) -> Iterable["Tokens"]:
216216

217217
def extend(self, tokens) -> None:
218218
"""Extend size of Tokens"""
219-
self.tokens.extend(tokens)
219+
self.tokens.extend(tokens.tokens)
220220
if not self.metadata:
221221
self.metadata = tokens.metadata
222222
self.metadata["end_byte"] = tokens.metadata["end_byte"]
@@ -284,7 +284,7 @@ def load(cls, path):
284284
"""Load tokens from disk"""
285285
with open(path, "rb") as input_file:
286286
tokens = pickle.load(input_file)
287-
return cls(tokens["tokens"], tokens["metadata"])
287+
return Tokens(tokens["tokens"], tokens["metadata"])
288288

289289

290290
def check_for_updates(language) -> List[str]:
@@ -451,11 +451,11 @@ def normalize_from_tokens(self, tokens: Tokens) -> Tokens:
451451
new_tokens = []
452452
for token in tokens:
453453
if self.__filter_token(token) is True:
454-
normalized_text = "#DEL#"
454+
normalized_text = ""
455455
else:
456456
normalized_text = self.__normalize_token(token)
457-
if not normalized_text:
458-
normalized_text = "#DEL#"
457+
if normalized_text == "#DEL#":
458+
normalized_text = ""
459459
token.text = normalized_text
460460
new_tokens.append(token)
461461
return Tokens(new_tokens, tokens.metadata)

0 commit comments

Comments
 (0)