|
| 1 | +import math |
| 2 | +from pathlib import Path |
| 3 | +from tempfile import TemporaryDirectory |
| 4 | + |
| 5 | +import numpy as np |
| 6 | +import pytest |
| 7 | +from click.testing import CliRunner |
| 8 | +from spatialdata import get_extent, read_zarr |
| 9 | +from spatialdata.models import get_table_keys |
| 10 | + |
| 11 | +from spatialdata_io.__main__ import visium_hd_wrapper |
| 12 | +from spatialdata_io._constants._constants import VisiumHDKeys |
| 13 | +from spatialdata_io.readers.visium_hd import ( |
| 14 | + _decompose_projective_matrix, |
| 15 | + _projective_matrix_is_affine, |
| 16 | + visium_hd, |
| 17 | +) |
| 18 | +from tests._utils import skip_if_below_python_version |
| 19 | + |
| 20 | +# --- UNIT TESTS FOR HELPER FUNCTIONS --- |
| 21 | + |
| 22 | +def test_projective_matrix_is_affine() -> None: |
| 23 | + """Test the affine matrix check function.""" |
| 24 | + # An affine matrix should have [0, 0, 1] as its last row |
| 25 | + affine_matrix = np.array([[2, 0.5, 10], [0.5, 2, 20], [0, 0, 1]]) |
| 26 | + assert _projective_matrix_is_affine(affine_matrix) |
| 27 | + |
| 28 | + # A projective matrix is not affine if the last row is different |
| 29 | + projective_matrix = np.array([[2, 0.5, 10], [0.5, 2, 20], [0.01, 0.02, 1]]) |
| 30 | + assert not _projective_matrix_is_affine(projective_matrix) |
| 31 | + |
| 32 | + |
| 33 | +def test_decompose_projective_matrix() -> None: |
| 34 | + """Test the decomposition of a projective matrix into affine and shift components.""" |
| 35 | + projective_matrix = np.array([[1, 2, 3], [4, 5, 6], [0.1, 0.2, 1]]) |
| 36 | + affine, shift = _decompose_projective_matrix(projective_matrix) |
| 37 | + |
| 38 | + expected_affine = np.array([[1, 2, 3], [4, 5, 6], [0, 0, 1]]) |
| 39 | + |
| 40 | + # The affine component should be correctly extracted |
| 41 | + assert np.allclose(affine, expected_affine) |
| 42 | + # Recomposing the affine and shift matrices should yield the original projective matrix |
| 43 | + assert np.allclose(affine @ shift, projective_matrix) |
| 44 | + |
| 45 | + |
| 46 | +# --- END-TO-END TESTS ON EXAMPLE DATA --- |
| 47 | + |
| 48 | +# TODO: Replace with the actual Visium HD test dataset folder name |
| 49 | +# This dataset name is used to locate the test data in the './data/' directory. |
| 50 | +# See https://github.com/scverse/spatialdata-io/blob/main/.github/workflows/prepare_test_data.yaml |
| 51 | +# for instructions on how to download and place the data on disk. |
| 52 | +DATASET_FOLDER = "Visium_HD_Mouse_Brain_Chunk" |
| 53 | +DATASET_ID = "visium_hd_tiny" |
| 54 | + |
| 55 | + |
| 56 | +@skip_if_below_python_version() |
| 57 | +def test_visium_hd_data_extent() -> None: |
| 58 | + """Check the spatial extent of the loaded Visium HD data.""" |
| 59 | + f = Path("./data") / DATASET_FOLDER |
| 60 | + if not f.is_dir(): |
| 61 | + pytest.skip(f"Test data not found at '{f}'. Skipping extent test.") |
| 62 | + |
| 63 | + sdata = visium_hd(f, dataset_id=DATASET_ID) |
| 64 | + extent = get_extent(sdata, exact=False) |
| 65 | + extent = {ax: (math.floor(extent[ax][0]), math.ceil(extent[ax][1])) for ax in extent} |
| 66 | + |
| 67 | + # TODO: Replace with the actual expected extent of your test data |
| 68 | + expected_extent = "{'x': (1000, 7000), 'y': (2000, 8000)}" |
| 69 | + assert str(extent) == expected_extent |
| 70 | + |
| 71 | + |
| 72 | +@skip_if_below_python_version() |
| 73 | +@pytest.mark.parametrize( |
| 74 | + "params", |
| 75 | + [ |
| 76 | + # Test case 1: Default binned data loading (squares) |
| 77 | + {"load_segmentations_only": False, "load_nucleus_segmentations": False, "bins_as_squares": True, "annotate_table_by_labels": False, "load_all_images": False}, |
| 78 | + # Test case 2: Binned data as circles |
| 79 | + {"load_segmentations_only": False, "load_nucleus_segmentations": False, "bins_as_squares": False, "annotate_table_by_labels": False, "load_all_images": False}, |
| 80 | + # Test case 3: Binned data with tables annotating labels instead of shapes |
| 81 | + {"load_segmentations_only": False, "load_nucleus_segmentations": False, "bins_as_squares": True, "annotate_table_by_labels": True, "load_all_images": False}, |
| 82 | + # Test case 4: Load binned data AND all segmentations (cell + nucleus) |
| 83 | + {"load_segmentations_only": False, "load_nucleus_segmentations": True, "bins_as_squares": True, "annotate_table_by_labels": False, "load_all_images": False}, |
| 84 | + # Test case 5: Load cell segmentations only |
| 85 | + {"load_segmentations_only": True, "load_nucleus_segmentations": False, "bins_as_squares": True, "annotate_table_by_labels": False, "load_all_images": False}, |
| 86 | + # Test case 6: Load all segmentations (cell + nucleus) only |
| 87 | + {"load_segmentations_only": True, "load_nucleus_segmentations": True, "bins_as_squares": True, "annotate_table_by_labels": False, "load_all_images": False}, |
| 88 | + # Test case 7: Load everything, including auxiliary images like CytAssist |
| 89 | + {"load_segmentations_only": False, "load_nucleus_segmentations": True, "bins_as_squares": True, "annotate_table_by_labels": False, "load_all_images": True}, |
| 90 | + ], |
| 91 | +) |
| 92 | +def test_visium_hd_data_integrity(params: dict[str, bool]) -> None: |
| 93 | + """Check the integrity of various components of the loaded SpatialData object.""" |
| 94 | + f = Path("./data") / DATASET_FOLDER |
| 95 | + if not f.is_dir(): |
| 96 | + pytest.skip(f"Test data not found at '{f}'. Skipping integrity test.") |
| 97 | + |
| 98 | + sdata = visium_hd(f, dataset_id=DATASET_ID, **params) |
| 99 | + |
| 100 | + # --- IMAGE CHECKS --- |
| 101 | + assert f"{DATASET_ID}_full_image" in sdata.images |
| 102 | + assert f"{DATASET_ID}_hires_image" in sdata.images |
| 103 | + assert f"{DATASET_ID}_lowres_image" in sdata.images |
| 104 | + if params.get("load_all_images", False): |
| 105 | + assert f"{DATASET_ID}_cytassist_image" in sdata.images |
| 106 | + |
| 107 | + # --- SEGMENTATION CHECKS (loaded in all modes if present) --- |
| 108 | + # TODO: Update placeholder values with actual data from your test dataset |
| 109 | + assert VisiumHDKeys.CELL_SEG_KEY_HD in sdata.tables |
| 110 | + assert f"{DATASET_ID}_{VisiumHDKeys.CELL_SEG_KEY_HD}" in sdata.shapes |
| 111 | + cell_table = sdata.tables[VisiumHDKeys.CELL_SEG_KEY_HD] |
| 112 | + assert cell_table.shape == (2485, 36738) # Example shape (n_obs, n_vars) |
| 113 | + assert "cellid_000000001-1" in cell_table.obs_names # Example cell ID |
| 114 | + |
| 115 | + if params["load_nucleus_segmentations"]: |
| 116 | + assert VisiumHDKeys.NUCLEUS_SEG_KEY_HD in sdata.tables |
| 117 | + assert f"{DATASET_ID}_{VisiumHDKeys.NUCLEUS_SEG_KEY_HD}" in sdata.shapes |
| 118 | + nuc_table = sdata.tables[VisiumHDKeys.NUCLEUS_SEG_KEY_HD] |
| 119 | + assert nuc_table.shape == (2485, 36738) # Example shape |
| 120 | + else: |
| 121 | + assert VisiumHDKeys.NUCLEUS_SEG_KEY_HD not in sdata.tables |
| 122 | + |
| 123 | + # --- BINNED DATA CHECKS --- |
| 124 | + if params["load_segmentations_only"]: |
| 125 | + assert "square_002um" not in sdata.tables |
| 126 | + else: |
| 127 | + assert "square_008um" in sdata.tables |
| 128 | + table = sdata.tables["square_008um"] |
| 129 | + assert table.shape == (39000, 36738) # Example shape |
| 130 | + assert "AAACCGGGTTTA-1" in table.obs_names # Example barcode |
| 131 | + assert np.array_equal(table.X.indices[:3], [10, 20, 30]) # Example indices |
| 132 | + |
| 133 | + shape_name = f"{DATASET_ID}_square_008um" |
| 134 | + labels_name = f"{shape_name}_labels" |
| 135 | + if params["annotate_table_by_labels"]: |
| 136 | + assert labels_name in sdata.labels |
| 137 | + region, _, _ = get_table_keys(table) |
| 138 | + assert region == labels_name |
| 139 | + else: |
| 140 | + assert shape_name in sdata.shapes |
| 141 | + region, _, _ = get_table_keys(table) |
| 142 | + assert region == shape_name |
| 143 | + # Check for circles vs. squares |
| 144 | + if params["bins_as_squares"]: |
| 145 | + assert "radius" not in sdata.shapes[shape_name] |
| 146 | + else: |
| 147 | + assert "radius" in sdata.shapes[shape_name] |
| 148 | + |
| 149 | +# --- CLI WRAPPER TEST --- |
| 150 | + |
| 151 | +@skip_if_below_python_version() |
| 152 | +def test_cli_visium_hd(runner: CliRunner) -> None: |
| 153 | + """Test the command-line interface for the Visium HD reader.""" |
| 154 | + f = Path("./data") / DATASET_FOLDER |
| 155 | + if not f.is_dir(): |
| 156 | + pytest.skip(f"Test data not found at '{f}'. Skipping CLI test.") |
| 157 | + |
| 158 | + with TemporaryDirectory() as tmpdir: |
| 159 | + output_zarr = Path(tmpdir) / "data.zarr" |
| 160 | + result = runner.invoke( |
| 161 | + visium_hd_wrapper, |
| 162 | + [ |
| 163 | + "--path", |
| 164 | + str(f), |
| 165 | + "--output", |
| 166 | + str(output_zarr), |
| 167 | + ], |
| 168 | + ) |
| 169 | + assert result.exit_code == 0, result.output |
| 170 | + # Verify the output can be read |
| 171 | + sdata = read_zarr(output_zarr) |
| 172 | + |
| 173 | + # A simple check to confirm data was loaded |
| 174 | + # The default dataset_id is inferred from the feature slice file name. |
| 175 | + # This assert may need adjustment based on your test data's file names. |
| 176 | + inferred_dataset_id = DATASET_FOLDER.replace("_outs", "") # Example inference |
| 177 | + assert f"{inferred_dataset_id}_full_image" in sdata.images |
0 commit comments