BenjaminIsaac0111
diff --git a/‎config.yaml‎
Lines changed: 8 additions & 1 deletion b/‎config.yaml‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎tests/data/test_augmentation.py‎
Lines changed: 11 additions & 5 deletions b/‎tests/data/test_augmentation.py‎
Lines changed: 11 additions & 5 deletions
diff --git a/‎tests/data/test_gene_vocab.py‎ ‎tests/data/test_data_integrity.py‎tests/data/test_gene_vocab.py renamed to tests/data/test_data_integrity.py
Lines changed: 1 addition & 5 deletions b/‎tests/data/test_gene_vocab.py‎ ‎tests/data/test_data_integrity.py‎tests/data/test_gene_vocab.py renamed to tests/data/test_data_integrity.py
Lines changed: 1 addition & 5 deletions
diff --git a/‎tests/data/test_pathways.py‎
Lines changed: 2 additions & 187 deletions b/‎tests/data/test_pathways.py‎
Lines changed: 2 additions & 187 deletions
diff --git a/‎tests/data/test_visualization.py‎
Lines changed: 7 additions & 3 deletions b/‎tests/data/test_visualization.py‎
Lines changed: 7 additions & 3 deletions
@@ -11,9 +11,16 @@ training:
   num_genes: 1000
   batch_size: 8
   learning_rate: 0.0001
-  output_dir: "./checkpoints"
+  output_dir: "./runs"
 
 # MSigDB Pathway Settings
 pathways:
   default_collection: "hallmarks"
   cache_dir: ".cache"
+
+# Quality Control Defaults
+qc:
+  min_umis: 500
+  min_genes: 200
+  max_mt: 0.15
+  min_pathways: 25
@@ -1,5 +1,5 @@
 """
-Merged tests: test_spatial_augment.py, test_augmentation_sync.py, test_spatial_alignment.py
+Tests for spatial augmentation logic (dihedral groups) and coordinate alignment.
 """
 
 import torch
@@ -13,7 +13,9 @@
 )
 from spatial_transcript_former.models import SpatialTranscriptFormer
 
-# --- From test_spatial_augment.py ---
+# ---------------------------------------------------------------------------
+# Dihedral Logic
+# ---------------------------------------------------------------------------
 
 
 def test_apply_dihedral_augmentation_torch():
@@ -46,7 +48,9 @@ def test_apply_dihedral_augmentation_numpy():
     assert out.shape == coords.shape
 
 
-# --- From test_augmentation_sync.py ---
+# ---------------------------------------------------------------------------
+# Synchronization
+# ---------------------------------------------------------------------------
 
 
 def test_sync_logic():
@@ -92,7 +96,9 @@ def test_sync_logic():
             assert False, f"Pixel lost in op {op}"
 
 
-# --- From test_spatial_alignment.py ---
+# ---------------------------------------------------------------------------
+# Spatial Alignment
+# ---------------------------------------------------------------------------
 
 
 def test_spatial_mixing_with_large_coordinates():
@@ -104,7 +110,7 @@ def test_spatial_mixing_with_large_coordinates():
     token_dim = 64
 
     model = SpatialTranscriptFormer(
-        num_genes=10, token_dim=token_dim, n_layers=2, use_spatial_pe=True
+        token_dim=token_dim, n_layers=2, use_spatial_pe=True
     )
 
     # Create two patches that are physically adjacent (256px apart) but logically neighbors
 
@@ -1,13 +1,11 @@
 """
-Merged tests: test_data_management.py, test_data_integrity.py
+Tests for biological coverage and spatial data integrity.
 """
 
 import os
-
 import pytest
 import torch
 import numpy as np
-import h5py
 
 from spatial_transcript_former.recipes.hest.io import (
     get_hest_data_dir,
@@ -20,8 +18,6 @@
     MSIGDB_URLS,
 )
 
-# --- From test_data_integrity.py ---
-
 
 @pytest.fixture
 def data_dir():
 
@@ -1,5 +1,5 @@
 """
-Merged tests: test_pathways.py, test_pathways_robust.py, test_pathway_stability.py
+Tests for MSigDB pathway parsing and membership matrix construction.
 """
 
 import pytest
@@ -13,13 +13,6 @@
     MSIGDB_URLS,
 )
 from spatial_transcript_former.data.pathways import build_membership_matrix
-from spatial_transcript_former.models.interaction import SpatialTranscriptFormer
-from spatial_transcript_former.training.losses import (
-    AuxiliaryPathwayLoss,
-    MaskedMSELoss,
-)
-
-# --- From test_pathways.py ---
 
 
 @pytest.fixture(scope="module")
@@ -133,182 +126,4 @@ def test_core_pathways_exist(self, pathway_result):
 
 # ---------------------------------------------------------------------------
 # Pathway ground truth
-# ---------------------------------------------------------------------------
-
-
-class TestPathwayTruth:
-    def test_consistent_across_calls(self, gene_list):
-        """Ground truth from MSigDB membership should be identical across calls."""
-        from spatial_transcript_former.visualization import _compute_pathway_truth
-        from unittest.mock import MagicMock
-
-        args = MagicMock()
-        args.sparsity_lambda = 0.0
-        args.pathways = None
-
-        np.random.seed(42)
-        gene_truth = np.random.rand(200, len(gene_list)).astype(np.float32)
-
-        result1, names1 = _compute_pathway_truth(gene_truth, gene_list, args)
-        result2, names2 = _compute_pathway_truth(gene_truth, gene_list, args)
-
-        np.testing.assert_array_equal(result1, result2)
-        assert names1 == names2
-
-    def test_output_shape(self, gene_list):
-        """Pathway truth should be (N, P) where P=50 (Hallmarks default)."""
-        from spatial_transcript_former.visualization import _compute_pathway_truth
-        from unittest.mock import MagicMock
-
-        args = MagicMock()
-        args.sparsity_lambda = 0.0
-        args.pathways = None
-
-        N = 150
-        gene_truth = np.random.rand(N, len(gene_list)).astype(np.float32)
-        result, names = _compute_pathway_truth(gene_truth, gene_list, args)
-
-        assert result.shape == (N, 50)
-        assert len(names) == 50
-
-    def test_spatial_variation(self, gene_list):
-        """Pathway truth should have spatial variation (non-zero std)."""
-        from spatial_transcript_former.visualization import _compute_pathway_truth
-        from unittest.mock import MagicMock
-
-        args = MagicMock()
-        args.sparsity_lambda = 0.0
-        args.pathways = None
-
-        # Create gene expression with spatial patterns
-        N = 200
-        gene_truth = np.random.rand(N, len(gene_list)).astype(np.float32)
-        # Add spatial structure to first few genes
-        gene_truth[:100, 0] += 5.0
-        gene_truth[100:, 1] += 5.0
-
-        result, _ = _compute_pathway_truth(gene_truth, gene_list, args)
-
-        # At least some pathways should have non-trivial spatial variation
-        stds = np.std(result, axis=0)
-        assert np.any(stds > 0.01), "Pathway truth has no spatial variation"
-
-
-# --- From test_pathways_robust.py ---
-
-
-def test_build_membership_matrix_integrity():
-    """Verify that the membership matrix correctly maps genes to pathways."""
-    pathway_dict = {
-        "PATHWAY_A": ["GENE_1", "GENE_2"],
-        "PATHWAY_B": ["GENE_2", "GENE_3"],
-    }
-    gene_list = ["GENE_1", "GENE_2", "GENE_3", "GENE_4"]
-
-    matrix, names = build_membership_matrix(pathway_dict, gene_list)
-
-    assert names == ["PATHWAY_A", "PATHWAY_B"]
-    assert matrix.shape == (2, 4)
-
-    # Pathway A: GENE_1, GENE_2
-    assert matrix[0, 0] == 1.0
-    assert matrix[0, 1] == 1.0
-    assert matrix[0, 2] == 0.0
-    assert matrix[0, 3] == 0.0
-
-    # Pathway B: GENE_2, GENE_3
-    assert matrix[1, 0] == 0.0
-    assert matrix[1, 1] == 1.0
-    assert matrix[1, 2] == 1.0
-    assert matrix[1, 3] == 0.0
-
-
-def test_build_membership_matrix_empty():
-    """Check behavior with no matches."""
-    pathway_dict = {"EMPTY": ["XYZ"]}
-    gene_list = ["ABC", "DEF"]
-    matrix, names = build_membership_matrix(pathway_dict, gene_list)
-    assert matrix.sum() == 0
-    assert names == ["EMPTY"]
-
-
-# --- From test_pathway_stability.py ---
-
-
-def test_pathway_initialization_stability_and_gradients():
-    """
-    Verifies that initializing the model with a binary pathway matrix:
-    1. Does not cause predictions to exponentially explode (numerical stability).
-    2. Allows gradients to flow properly when using AuxiliaryPathwayLoss.
-    """
-    torch.manual_seed(42)
-    num_pathways = 50
-    num_genes = 100
-
-    # Create a synthetic MSigDB-style binary matrix
-    pathway_matrix = (torch.rand(num_pathways, num_genes) > 0.8).float()
-    # Ensure no empty pathways to avoid division by zero
-    pathway_matrix[:, 0] = 1.0
-
-    # Initialize model with pathway_init
-    model = SpatialTranscriptFormer(
-        num_genes=num_genes,
-        num_pathways=num_pathways,
-        pathway_init=pathway_matrix,
-        use_spatial_pe=False,
-        output_mode="counts",
-        pretrained=False,
-    )
-
-    # Dummy inputs
-    B, S, D = (
-        2,
-        10,
-        2048,
-    )  # Using D=2048 since backbone='resnet50' requires it natively, or provided features
-    feats = torch.randn(B, S, D, requires_grad=True)
-    coords = torch.randn(B, S, 2)
-    target_genes = torch.randn(B, S, num_genes).abs()
-    mask = torch.zeros(B, S, dtype=torch.bool)
-
-    # Forward pass
-    # return_pathways=True is needed to get the intermediate pathway preds for Auxiliary loss
-    gene_preds, pathway_preds = model(
-        feats, rel_coords=coords, return_dense=True, return_pathways=True
-    )
-
-    # 1. Numerical Stability Check
-    # Without L1 normalization and removing temperature, predictions would explode.
-    # With the fix, Softplus should keep outputs reasonably small.
-    max_pred = gene_preds.max().item()
-    print(f"Max prediction value at initialization: {max_pred:.2f}")
-    assert (
-        max_pred < 100.0
-    ), f"Predictions exploded! Max value: {max_pred}. Check L1 normalization."
-    assert not torch.isnan(gene_preds).any(), "Found NaNs in initial predictions."
-
-    # 2. Gradient Flow Check (Compatibility with Training)
-    loss_fn = AuxiliaryPathwayLoss(pathway_matrix, MaskedMSELoss(), lambda_pathway=1.0)
-    loss = loss_fn(gene_preds, target_genes, mask=mask, pathway_preds=pathway_preds)
-
-    assert loss.isfinite(), "Loss is not finite."
-
-    loss.backward()
-
-    # Verify gradients reached the core transformer layers
-    target_layer_grad = model.fusion_engine.layers[0].linear1.weight.grad
-    assert target_layer_grad is not None, "Gradients did not reach the fusion engine."
-    assert target_layer_grad.norm() > 0, "Vanishing gradients in the fusion engine."
-    assert torch.isfinite(
-        target_layer_grad
-    ).all(), "Exploding/NaN gradients in fusion engine."
-
-    # Verify gradients reached the final reconstructor layer
-    recon_grad = model.gene_reconstructor.weight.grad
-    assert recon_grad is not None, "Gradients did not reach the gene reconstructor."
-    assert recon_grad.norm() > 0, "Vanishing gradients in the gene reconstructor."
-    assert torch.isfinite(
-        recon_grad
-    ).all(), "Exploding/NaN gradients in gene reconstructor."
-
-    print("Pathway initialization is fully stable and compatible with NN training.")
+# ---------------------------------------------------------------------------
@@ -1,5 +1,5 @@
 """
-Merged tests: test_visualization.py, test_spatial_stats.py
+Tests for training summary visualization and spatial statistics (Moran's I, Coherence).
 """
 
 import os
@@ -16,7 +16,9 @@
     _build_knn_weights,
 )
 
-# --- From test_visualization.py ---
+# ---------------------------------------------------------------------------
+# Training Summary
+# ---------------------------------------------------------------------------
 
 
 matplotlib.use("Agg")
@@ -172,7 +174,9 @@ def test_constant_input_handled(self):
         assert np.allclose(z, 0.0, atol=1e-4)
 
 
-# --- From test_spatial_stats.py ---
+# ---------------------------------------------------------------------------
+# Spatial Statistics
+# ---------------------------------------------------------------------------
 
 
 def _make_grid(rows=10, cols=10):