InseeFrLab
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/test_pipeline.py‎
Lines changed: 79 additions & 4 deletions b/‎tests/test_pipeline.py‎
Lines changed: 79 additions & 4 deletions
diff --git a/‎torchTextClassifiers/model/components/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎torchTextClassifiers/model/components/__init__.py‎
Lines changed: 1 addition & 0 deletions
@@ -183,3 +183,4 @@ example_files/
 _site/
 .quarto/
 **/*.quarto_ipynb
+my_ttc/
@@ -9,6 +9,7 @@
     AttentionConfig,
     CategoricalVariableNet,
     ClassificationHead,
+    LabelAttentionConfig,
     TextEmbedder,
     TextEmbedderConfig,
 )
@@ -51,7 +52,14 @@ def model_params():
     }
 
 
-def run_full_pipeline(tokenizer, sample_text_data, categorical_data, labels, model_params):
+def run_full_pipeline(
+    tokenizer,
+    sample_text_data,
+    categorical_data,
+    labels,
+    model_params,
+    label_attention_enabled: bool = False,
+):
     """Helper function to run the complete pipeline for a given tokenizer."""
     # Create dataset
     dataset = TextClassificationDataset(
@@ -83,6 +91,14 @@ def run_full_pipeline(tokenizer, sample_text_data, categorical_data, labels, mod
         embedding_dim=model_params["embedding_dim"],
         padding_idx=padding_idx,
         attention_config=attention_config,
+        label_attention_config=(
+            LabelAttentionConfig(
+                n_head=attention_config.n_head,
+                num_classes=model_params["num_classes"],
+            )
+            if label_attention_enabled
+            else None
+        ),
     )
 
     text_embedder = TextEmbedder(text_embedder_config=text_embedder_config)
@@ -98,7 +114,7 @@ def run_full_pipeline(tokenizer, sample_text_data, categorical_data, labels, mod
     expected_input_dim = model_params["embedding_dim"] + categorical_var_net.output_dim
     classification_head = ClassificationHead(
         input_dim=expected_input_dim,
-        num_classes=model_params["num_classes"],
+        num_classes=model_params["num_classes"] if not label_attention_enabled else 1,
     )
 
     # Create model
@@ -136,6 +152,14 @@ def run_full_pipeline(tokenizer, sample_text_data, categorical_data, labels, mod
         categorical_embedding_dims=model_params["categorical_embedding_dims"],
         num_classes=model_params["num_classes"],
         attention_config=attention_config,
+        label_attention_config=(
+            LabelAttentionConfig(
+                n_head=attention_config.n_head,
+                num_classes=model_params["num_classes"],
+            )
+            if label_attention_enabled
+            else None
+        ),
     )
 
     # Create training config
@@ -163,13 +187,41 @@ def run_full_pipeline(tokenizer, sample_text_data, categorical_data, labels, mod
 
     # Predict with explanations
     top_k = 5
-    predictions = ttc.predict(X, top_k=top_k, explain=True)
+
+    predictions = ttc.predict(
+        X,
+        top_k=top_k,
+        explain_with_label_attention=label_attention_enabled,
+        explain_with_captum=True,
+    )
+
+    # Test label attention assertions
+    if label_attention_enabled:
+        assert (
+            predictions["label_attention_attributions"] is not None
+        ), "Label attention attributions should not be None when label_attention_enabled is True"
+        label_attention_attributions = predictions["label_attention_attributions"]
+        expected_shape = (
+            len(sample_text_data),  # batch_size
+            model_params["n_head"],  # n_head
+            model_params["num_classes"],  # num_classes
+            tokenizer.output_dim,  # seq_len
+        )
+        assert label_attention_attributions.shape == expected_shape, (
+            f"Label attention attributions shape mismatch. "
+            f"Expected {expected_shape}, got {label_attention_attributions.shape}"
+        )
+    else:
+        # When label attention is not enabled, the attributions should be None
+        assert (
+            predictions.get("label_attention_attributions") is None
+        ), "Label attention attributions should be None when label_attention_enabled is False"
 
     # Test explainability functions
     text_idx = 0
     text = sample_text_data[text_idx]
     offsets = predictions["offset_mapping"][text_idx]
-    attributions = predictions["attributions"][text_idx]
+    attributions = predictions["captum_attributions"][text_idx]
     word_ids = predictions["word_ids"][text_idx]
 
     words, word_attributions = map_attributions_to_word(attributions, text, word_ids, offsets)
@@ -239,3 +291,26 @@ def test_ngram_tokenizer(sample_data, model_params):
 
     # Run full pipeline
     run_full_pipeline(tokenizer, sample_text_data, categorical_data, labels, model_params)
+
+
+def test_label_attention_enabled(sample_data, model_params):
+    """Test the full pipeline with label attention enabled (using WordPieceTokenizer)."""
+    sample_text_data, categorical_data, labels = sample_data
+
+    vocab_size = 100
+    tokenizer = WordPieceTokenizer(vocab_size, output_dim=50)
+    tokenizer.train(sample_text_data)
+
+    # Check tokenizer works
+    result = tokenizer.tokenize(sample_text_data)
+    assert result.input_ids.shape[0] == len(sample_text_data)
+
+    # Run full pipeline with label attention enabled
+    run_full_pipeline(
+        tokenizer,
+        sample_text_data,
+        categorical_data,
+        labels,
+        model_params,
+        label_attention_enabled=True,
+    )
@@ -8,5 +8,6 @@
     CategoricalVariableNet as CategoricalVariableNet,
 )
 from .classification_head import ClassificationHead as ClassificationHead
+from .text_embedder import LabelAttentionConfig as LabelAttentionConfig
 from .text_embedder import TextEmbedder as TextEmbedder
 from .text_embedder import TextEmbedderConfig as TextEmbedderConfig
Original file line number	Diff line number	Diff line change
`@@ -8,5 +8,6 @@`
`8`	`8`	`CategoricalVariableNet as CategoricalVariableNet,`
`9`	`9`	`)`
`10`	`10`	`from .classification_head import ClassificationHead as ClassificationHead`
	`11`	`+from .text_embedder import LabelAttentionConfig as LabelAttentionConfig`
`11`	`12`	`from .text_embedder import TextEmbedder as TextEmbedder`
`12`	`13`	`from .text_embedder import TextEmbedderConfig as TextEmbedderConfig`