From 5aa6f02a5512dc5a2f856653652ab3e357edebd7 Mon Sep 17 00:00:00 2001 From: space-ape Date: Fri, 21 Jan 2022 19:02:40 -0500 Subject: [PATCH] fix issue 105, 120 --- torchnlp/encoders/text/spacy_encoder.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/torchnlp/encoders/text/spacy_encoder.py b/torchnlp/encoders/text/spacy_encoder.py index 4e2b2a6..682f1ef 100644 --- a/torchnlp/encoders/text/spacy_encoder.py +++ b/torchnlp/encoders/text/spacy_encoder.py @@ -31,7 +31,7 @@ class SpacyEncoder(StaticTokenizerEncoder): """ - def __init__(self, *args, **kwargs): + def __init__(self, *args, language='en_core_web_sm', **kwargs): if 'tokenize' in kwargs: raise TypeError('``SpacyEncoder`` does not take keyword argument ``tokenize``.') @@ -41,12 +41,10 @@ def __init__(self, *args, **kwargs): print("Please install spaCy: " "`pip install spacy`") raise - # Use English as default when no language was specified - language = kwargs.get('language', 'en') # All languages supported by spaCy can be found here: # https://spacy.io/models/#available-models - supported_languages = ['en', 'de', 'es', 'pt', 'fr', 'it', 'nl', 'xx'] + supported_languages = ['en', 'de', 'es', 'pt', 'fr', 'it', 'nl', 'xx', 'en_core_web_sm'] if language in supported_languages: # Load the spaCy language model if it has been installed