InseeFrLab · micedre · Nov 20, 2025 · Oct 22, 2025 · Oct 22, 2025 · Oct 27, 2025
diff --git a/notebooks/example.ipynb b/notebooks/example.ipynb
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,11 +1,9 @@
 [project]
 name = "torchtextclassifiers"
-description = "An implementation of the https://github.com/facebookresearch/fastText supervised learning algorithm for text classification using Pytorch."
+description = "A text classification toolkit to easily build, train and evaluate deep learning text classifiers using PyTorch."
 authors = [
-    { name = "Tom Seimandi", email = "tom.seimandi@gmail.com" },
-    { name = "Julien Pramil", email = "julien.pramil@insee.fr" },
-    { name = "Meilame Tayebjee", email = "meilame.tayebjee@insee.fr" },
     { name = "Cédric Couralet", email = "cedric.couralet@insee.fr" },
+    { name = "Meilame Tayebjee", email = "meilame.tayebjee@insee.fr" },
 ]
 readme = "README.md"
 repository = "https://github.com/InseeFrLab/torchTextClassifiers"
@@ -31,7 +29,10 @@ dev = [
   "nltk",
   "unidecode",
   "captum",
-  "pyarrow"
+  "pyarrow",
+  "pre-commit>=4.3.0",
+  "ruff>=0.14.3",
+  "ipywidgets>=8.1.8",
 ]
 docs = [
   "sphinx>=5.0.0",
@@ -46,6 +47,12 @@ docs = [
 [project.optional-dependencies]
 explainability = ["unidecode", "nltk", "captum"]
 preprocess =     ["unidecode", "nltk"]
+hf-dep = [
+    "tokenizers>=0.22.1",
+    "transformers>=4.57.1",
+    "datasets>=4.3.0",
+]
+
 
 [build-system]
 requires = ["uv_build>=0.9.3,<0.10.0"]
@@ -58,6 +65,3 @@ line-length = 100
 [tool.uv.build-backend]
 module-name="torchTextClassifiers"
 module-root = ""
-
-
-
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -1,19 +1,22 @@
-import pytest
+from unittest.mock import Mock
+
 import numpy as np
-from unittest.mock import Mock, MagicMock
+import pytest
 
 
 @pytest.fixture
 def sample_text_data():
     """Sample text data for testing."""
-    return np.array([
-        "This is a positive example",
-        "This is a negative example", 
-        "Another positive case",
-        "Another negative case",
-        "Good example here",
-        "Bad example here"
-    ])
+    return np.array(
+        [
+            "This is a positive example",
+            "This is a negative example",
+            "Another positive case",
+            "Another negative case",
+            "Good example here",
+            "Bad example here",
+        ]
+    )
 
 
 @pytest.fixture
@@ -25,14 +28,7 @@ def sample_labels():
 @pytest.fixture
 def sample_categorical_data():
     """Sample categorical data for testing."""
-    return np.array([
-        [1, 2],
-        [2, 1], 
-        [1, 3],
-        [3, 1],
-        [2, 2],
-        [3, 3]
-    ])
+    return np.array([[1, 2], [2, 1], [1, 3], [3, 1], [2, 2], [3, 3]])
 
 
 @pytest.fixture
@@ -48,33 +44,32 @@ def sample_X_text_only(sample_text_data):
 
 
 @pytest.fixture
-def fasttext_config():
-    """Mock FastText configuration."""
-    from torchTextClassifiers.classifiers.fasttext.core import FastTextConfig
-    
-    config = FastTextConfig(
+def model_config():
+    """Mock model configuration."""
+    from torchTextClassifiers import ModelConfig
+
+    config = ModelConfig(
         embedding_dim=10,
-        sparse=False,
-        num_tokens=1000,
-        min_count=1,
-        min_n=3,
-        max_n=6,
-        len_word_ngrams=2,
-        num_classes=2
+        categorical_vocabulary_sizes=[4, 5],
+        categorical_embedding_dims=[3, 4],
+        num_classes=10,
     )
     return config
 
 
 @pytest.fixture
 def mock_tokenizer():
-    """Mock NGramTokenizer for testing."""
+    """Mock BaseTokenizer for testing."""
     tokenizer = Mock()
-    tokenizer.min_count = 1
-    tokenizer.min_n = 3
-    tokenizer.max_n = 6
-    tokenizer.num_tokens = 1000
-    tokenizer.word_ngrams = 2
-    tokenizer.padding_index = 999
+    tokenizer.vocab_size = 1000
+    tokenizer.padding_idx = 1
+    tokenizer.tokenize = Mock(
+        return_value={
+            "input_ids": np.array([[1, 2, 3], [4, 5, 6]]),
+            "attention_mask": np.array([[1, 1, 1], [1, 1, 1]]),
+        }
+    )
+    tokenizer.output_dim = 50
     return tokenizer
 
 
@@ -108,4 +103,4 @@ def mock_dataset():
 @pytest.fixture
 def mock_dataloader():
     """Mock dataloader for testing."""
-    return Mock()
+    return Mock()