From 09673414edcf2c15e677c18d7181dc3a994514b2 Mon Sep 17 00:00:00 2001 From: Andrea Bruttomesso Date: Mon, 17 Feb 2025 13:20:46 +0100 Subject: [PATCH] fix: typo on vectorizer function call --- octis/preprocessing/preprocessing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/octis/preprocessing/preprocessing.py b/octis/preprocessing/preprocessing.py index 64ede3ee..294a8351 100644 --- a/octis/preprocessing/preprocessing.py +++ b/octis/preprocessing/preprocessing.py @@ -264,7 +264,7 @@ def filter_words(self, docs): self.preprocessing_steps.append('filter words with document frequency lower than ' + str(self.min_df) + ' and higher than ' + str(self.max_df)) self.preprocessing_steps.append('filter words with less than ' + str(self.min_chars) + " character") - vectorizer = TfidfVectorizer(df_max_freq=self.max_df, df_min_freq=self.min_df, vocabulary=self.vocabulary, + vectorizer = TfidfVectorizer(max_df=self.max_df, min_df=self.min_df, vocabulary=self.vocabulary, token_pattern=r"(?u)\b\w{" + str(self.min_chars) + ",}\b", lowercase=self.lowercase, stop_words=self.stopwords)