|
| 1 | +"""Benchmarks for SpatialData IO operations for large images. |
| 2 | +
|
| 3 | +Instructions: |
| 4 | + See benchmark_xenium.py for instructions. |
| 5 | +""" |
| 6 | + |
| 7 | +import logging |
| 8 | +import logging.handlers |
| 9 | +import tempfile |
| 10 | +from pathlib import Path |
| 11 | +from typing import Any |
| 12 | + |
| 13 | +import numpy as np |
| 14 | +import tifffile |
| 15 | +from spatialdata import SpatialData |
| 16 | +from spatialdata._logging import logger |
| 17 | +from xarray import DataArray |
| 18 | + |
| 19 | +from spatialdata_io import image # type: ignore[attr-defined] |
| 20 | + |
| 21 | +# ============================================================================= |
| 22 | +# CONFIGURATION - Edit these values to match your setup |
| 23 | +# ============================================================================= |
| 24 | +# Image dimensions: (channels, height, width) |
| 25 | +IMAGE_SHAPE = (3, 30000, 30000) |
| 26 | +# ============================================================================= |
| 27 | + |
| 28 | + |
| 29 | +class IOBenchmarkImage: |
| 30 | + """Benchmark IO read operations with different parameter combinations.""" |
| 31 | + |
| 32 | + timeout = 3600 |
| 33 | + repeat = 3 |
| 34 | + number = 1 |
| 35 | + warmup_time = 0 |
| 36 | + processes = 1 |
| 37 | + |
| 38 | + # Parameter combinations: scale_factors, (use_tiff_memmap, compressed), chunks |
| 39 | + # Combinations: (memmap=False, compressed=True), (memmap=False, compressed=False), (memmap=True, compressed=False) |
| 40 | + params = [ |
| 41 | + [None, [2, 2]], # scale_factors |
| 42 | + [(False, True), (False, False), (True, False)], # (use_tiff_memmap, compressed) |
| 43 | + [(1, 250, 250), (3, 250, 250)], # chunks |
| 44 | + ] |
| 45 | + param_names = ["scale_factors", "memmap_compressed", "chunks"] |
| 46 | + |
| 47 | + # Class-level temp directory for image files (persists across all benchmarks) |
| 48 | + _images_temp_dir: tempfile.TemporaryDirectory[str] | None = None |
| 49 | + _path_read_uncompressed: Path | None = None |
| 50 | + _path_read_compressed: Path | None = None |
| 51 | + |
| 52 | + @classmethod |
| 53 | + def _setup_images(cls) -> None: |
| 54 | + """Create fake image data once for all benchmarks.""" |
| 55 | + if cls._images_temp_dir is not None: |
| 56 | + return |
| 57 | + |
| 58 | + cls._images_temp_dir = tempfile.TemporaryDirectory() |
| 59 | + images_dir = Path(cls._images_temp_dir.name) |
| 60 | + cls._path_read_uncompressed = images_dir / "image_uncompressed.tif" |
| 61 | + cls._path_read_compressed = images_dir / "image_compressed.tif" |
| 62 | + |
| 63 | + # Generate fake image data |
| 64 | + rng = np.random.default_rng(42) |
| 65 | + data = rng.integers(0, 255, size=IMAGE_SHAPE, dtype=np.uint8) |
| 66 | + |
| 67 | + # Write uncompressed TIFF (memmappable) |
| 68 | + tifffile.imwrite(cls._path_read_uncompressed, data, compression=None) |
| 69 | + # Write compressed TIFF (not memmappable) |
| 70 | + tifffile.imwrite(cls._path_read_compressed, data, compression="zlib") |
| 71 | + |
| 72 | + def setup(self, *_: Any) -> None: |
| 73 | + """Set up paths for benchmarking.""" |
| 74 | + # Create images once (shared across all benchmark runs) |
| 75 | + self._setup_images() |
| 76 | + self.path_read_uncompressed = self._path_read_uncompressed |
| 77 | + self.path_read_compressed = self._path_read_compressed |
| 78 | + |
| 79 | + # Create a separate temp directory for output (cleaned up after each run) |
| 80 | + self._output_temp_dir = tempfile.TemporaryDirectory() |
| 81 | + self.path_write = Path(self._output_temp_dir.name) / "data_benchmark.zarr" |
| 82 | + |
| 83 | + def teardown(self, *_: Any) -> None: |
| 84 | + """Clean up output directory after each benchmark run.""" |
| 85 | + if hasattr(self, "_output_temp_dir"): |
| 86 | + self._output_temp_dir.cleanup() |
| 87 | + |
| 88 | + def _convert_image( |
| 89 | + self, scale_factors: list[int] | None, memmap_compressed: tuple[bool, bool], chunks: tuple[int, ...] |
| 90 | + ) -> SpatialData: |
| 91 | + """Read image data with specified parameters.""" |
| 92 | + use_tiff_memmap, compressed = memmap_compressed |
| 93 | + # Select file based on compression setting |
| 94 | + path_read = self.path_read_compressed if compressed else self.path_read_uncompressed |
| 95 | + assert path_read is not None |
| 96 | + |
| 97 | + # Capture log messages to verify memmappable warning behavior |
| 98 | + log_capture = logging.handlers.MemoryHandler(capacity=100) |
| 99 | + log_capture.setLevel(logging.WARNING) |
| 100 | + logger.addHandler(log_capture) |
| 101 | + original_propagate = logger.propagate |
| 102 | + logger.propagate = True |
| 103 | + |
| 104 | + try: |
| 105 | + im = image( |
| 106 | + input=path_read, |
| 107 | + data_axes=("c", "y", "x"), |
| 108 | + coordinate_system="global", |
| 109 | + use_tiff_memmap=use_tiff_memmap, |
| 110 | + chunks=chunks, |
| 111 | + scale_factors=scale_factors, |
| 112 | + ) |
| 113 | + finally: |
| 114 | + logger.removeHandler(log_capture) |
| 115 | + logger.propagate = original_propagate |
| 116 | + |
| 117 | + # Check warning behavior: when use_tiff_memmap=True with uncompressed file, no warning should be raised |
| 118 | + log_messages = [record.getMessage() for record in log_capture.buffer] |
| 119 | + has_memmap_warning = any("image data is not memory-mappable" in msg for msg in log_messages) |
| 120 | + if use_tiff_memmap and not compressed: |
| 121 | + assert not has_memmap_warning, ( |
| 122 | + "Uncompressed TIFF with memmap=True should not trigger memory-mappable warning" |
| 123 | + ) |
| 124 | + |
| 125 | + sdata = SpatialData.init_from_elements({"image": im}) |
| 126 | + # sanity check: chunks is (c, y, x) |
| 127 | + if scale_factors is None: |
| 128 | + assert isinstance(sdata["image"], DataArray) |
| 129 | + if chunks is not None: |
| 130 | + assert ( |
| 131 | + sdata["image"].chunksizes["x"][0] == chunks[2] |
| 132 | + or sdata["image"].chunksizes["x"][0] == sdata["image"].shape[2] |
| 133 | + ) |
| 134 | + assert ( |
| 135 | + sdata["image"].chunksizes["y"][0] == chunks[1] |
| 136 | + or sdata["image"].chunksizes["y"][0] == sdata["image"].shape[1] |
| 137 | + ) |
| 138 | + else: |
| 139 | + assert len(sdata["image"].keys()) == len(scale_factors) + 1 |
| 140 | + if chunks is not None: |
| 141 | + assert ( |
| 142 | + sdata["image"]["scale0"]["image"].chunksizes["x"][0] == chunks[2] |
| 143 | + or sdata["image"]["scale0"]["image"].chunksizes["x"][0] |
| 144 | + == sdata["image"]["scale0"]["image"].shape[2] |
| 145 | + ) |
| 146 | + assert ( |
| 147 | + sdata["image"]["scale0"]["image"].chunksizes["y"][0] == chunks[1] |
| 148 | + or sdata["image"]["scale0"]["image"].chunksizes["y"][0] |
| 149 | + == sdata["image"]["scale0"]["image"].shape[1] |
| 150 | + ) |
| 151 | + |
| 152 | + return sdata |
| 153 | + |
| 154 | + def time_io( |
| 155 | + self, scale_factors: list[int] | None, memmap_compressed: tuple[bool, bool], chunks: tuple[int, ...] |
| 156 | + ) -> None: |
| 157 | + """Walltime for data parsing.""" |
| 158 | + sdata = self._convert_image(scale_factors, memmap_compressed, chunks) |
| 159 | + sdata.write(self.path_write) |
| 160 | + |
| 161 | + def peakmem_io( |
| 162 | + self, scale_factors: list[int] | None, memmap_compressed: tuple[bool, bool], chunks: tuple[int, ...] |
| 163 | + ) -> None: |
| 164 | + """Peak memory for data parsing.""" |
| 165 | + sdata = self._convert_image(scale_factors, memmap_compressed, chunks) |
| 166 | + sdata.write(self.path_write) |
| 167 | + |
| 168 | + |
| 169 | +# if __name__ == "__main__": |
| 170 | +# # Run a single test case for quick verification |
| 171 | +# bench = IOBenchmarkImage() |
| 172 | +# |
| 173 | +# bench.setup() |
| 174 | +# bench.time_io(None, (True, False), (1, 5000, 5000)) |
| 175 | +# bench.teardown() |
| 176 | +# |
| 177 | +# # Clean up the shared images temp directory at the end |
| 178 | +# if IOBenchmarkImage._images_temp_dir is not None: |
| 179 | +# IOBenchmarkImage._images_temp_dir.cleanup() |
| 180 | +# IOBenchmarkImage._images_temp_dir = None |
0 commit comments