@@ -101,8 +101,7 @@ def __init__(
101101 if nlp_model is not None :
102102 self .nlp = nlp_model
103103 else :
104- if language_model is not None :
105- self .nlp , using_gpu = load_language_model (language_model , self .normalize_options )
104+ self .nlp , using_gpu = load_language_model (language_model , self .normalize_options )
106105 self .using_gpu = using_gpu
107106 if workers is None :
108107 cpu_count = os .cpu_count () or 2
@@ -173,7 +172,7 @@ def process_texts(
173172 )
174173 if isinstance (tokens , PreparedDoc ):
175174 spacy_doc = make_spacy_doc (self .nlp , tokens )
176- if spacy_doc ._ .char_num > 100000 : # being conservative to preserve GPU RAM
175+ if spacy_doc ._ .char_num > 100000 and self . using_gpu is True : # being conservative to preserve GPU RAM
177176 split_doc = self .__split_spacy_docs (spacy_doc )
178177 rebuilt_doc = Doc .from_docs (list (self .nlp .pipe (split_doc , batch_size = 128 )))
179178 rebuilt_doc ._ .metadata = spacy_doc ._ .metadata
@@ -260,9 +259,7 @@ def __init__(
260259 else :
261260 cls .modernize = False
262261 cls .strip_tags = strip_tags
263-
264262 cls .is_philo_db = is_philo_db
265-
266263 cls .text_object_type = text_object_type
267264 cls .token_regex = re .compile (rf"({ word_regex } )|([{ '' .join (sentence_boundaries )} ])" )
268265 cls .sentence_boundaries = sentence_boundaries
0 commit comments