Skip to content

Commit 57005dd

Browse files
committed
Merge branch 'main' into improve_legacy_adata_reader
2 parents 3dd1cd9 + 28d05a2 commit 57005dd

14 files changed

Lines changed: 405 additions & 159 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,4 @@ data
4141
# data folder
4242
data/
4343
tests/data
44+
uv.lock

.pre-commit-config.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,17 @@ default_stages:
77
minimum_pre_commit_version: 2.16.0
88
repos:
99
- repo: https://github.com/rbubley/mirrors-prettier
10-
rev: v3.6.2
10+
rev: v3.7.4
1111
hooks:
1212
- id: prettier
1313
- repo: https://github.com/astral-sh/ruff-pre-commit
14-
rev: v0.14.1
14+
rev: v0.14.10
1515
hooks:
1616
- id: ruff
1717
args: [--fix, --exit-non-zero-on-fix, --unsafe-fixes]
1818
- id: ruff-format
1919
- repo: https://github.com/pre-commit/mirrors-mypy
20-
rev: v1.18.2
20+
rev: v1.19.1
2121
hooks:
2222
- id: mypy
2323
additional_dependencies: [numpy, types-PyYAML]

asv.conf.json

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
{
2+
"version": 1,
3+
"project": "spatialdata-io",
4+
"project_url": "https://github.com/scverse/spatialdata-io",
5+
"repo": ".",
6+
"branches": ["main", "xenium-labels-dask", "xenium-labels-dask-zipstore"],
7+
"dvcs": "git",
8+
"environment_type": "virtualenv",
9+
"pythons": ["3.12"],
10+
"build_command": [],
11+
"install_command": ["python -m pip install {build_dir}[test]"],
12+
"uninstall_command": ["python -m pip uninstall -y {project}"],
13+
"env_dir": ".asv/env",
14+
"results_dir": ".asv/results",
15+
"html_dir": ".asv/html",
16+
"benchmark_dir": "benchmarks",
17+
"hash_length": 8,
18+
"build_cache_size": 2,
19+
"install_timeout": 600,
20+
"repeat": 3,
21+
"processes": 1,
22+
"attribute_selection": ["time_*", "peakmem_*"]
23+
}

benchmarks/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# ASV benchmarks for spatialdata-io

benchmarks/bench_xenium.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
"""Benchmarks for SpatialData IO operations.
2+
3+
Configuration:
4+
Edit SANDBOX_DIR and DATASET below to point to your data.
5+
6+
Setup:
7+
cd <SANDBOX_DIR>/<DATASET>
8+
python download.py # use the same env where spatialdata is installed
9+
10+
Running:
11+
cd /path/to/spatialdata-io
12+
13+
# Quick benchmark (single run, for testing):
14+
asv run --python=same -b IOBenchmark --quick --show-stderr -v
15+
16+
# Full benchmark (multiple runs, for accurate results):
17+
asv run --python=same -b IOBenchmark --show-stderr -v
18+
19+
Comparing branches:
20+
# Run on specific commits:
21+
asv run main^! -b IOBenchmark --show-stderr -v
22+
asv run xenium-labels-dask^! -b IOBenchmark --show-stderr -v
23+
24+
# Or compare two branches directly:
25+
asv continuous main xenium-labels-dask -b IOBenchmark --show-stderr -v
26+
27+
# View comparison:
28+
asv compare main xenium-labels-dask
29+
30+
Results:
31+
- Console output shows timing and memory after each run
32+
- JSON results saved to: .asv/results/
33+
- Generate HTML report: asv publish && asv preview
34+
"""
35+
36+
import inspect
37+
import shutil
38+
from pathlib import Path
39+
from typing import TYPE_CHECKING
40+
41+
from spatialdata import SpatialData
42+
43+
from spatialdata_io import xenium # type: ignore[attr-defined]
44+
45+
# =============================================================================
46+
# CONFIGURATION - Edit these paths to match your setup
47+
# =============================================================================
48+
SANDBOX_DIR = Path(__file__).parent.parent.parent / "spatialdata-sandbox"
49+
DATASET = "xenium_2.0.0_io"
50+
# =============================================================================
51+
52+
53+
def get_paths() -> tuple[Path, Path]:
54+
"""Get paths for benchmark data."""
55+
path = SANDBOX_DIR / DATASET
56+
path_read = path / "data"
57+
path_write = path / "data_benchmark.zarr"
58+
59+
if not path_read.exists():
60+
raise ValueError(f"Data directory not found: {path_read}")
61+
62+
return path_read, path_write
63+
64+
65+
class IOBenchmark:
66+
"""Benchmark IO read operations."""
67+
68+
timeout = 3600
69+
repeat = 3
70+
number = 1
71+
warmup_time = 0
72+
processes = 1
73+
74+
def setup(self) -> None:
75+
"""Set up paths for benchmarking."""
76+
self.path_read, self.path_write = get_paths()
77+
if self.path_write.exists():
78+
shutil.rmtree(self.path_write)
79+
80+
def _read_xenium(self) -> SpatialData:
81+
"""Read xenium data with version-compatible kwargs."""
82+
signature = inspect.signature(xenium)
83+
kwargs = {}
84+
if "cleanup_labels_zarr_tmpdir" in signature.parameters:
85+
kwargs["cleanup_labels_zarr_tmpdir"] = False
86+
87+
return xenium(
88+
path=str(self.path_read),
89+
n_jobs=8,
90+
cell_boundaries=True,
91+
nucleus_boundaries=True,
92+
morphology_focus=True,
93+
cells_as_circles=True,
94+
**kwargs,
95+
)
96+
97+
def time_io(self) -> None:
98+
"""Walltime for data parsing."""
99+
sdata = self._read_xenium()
100+
sdata.write(self.path_write)
101+
102+
def peakmem_io(self) -> None:
103+
"""Peak memory for data parsing."""
104+
sdata = self._read_xenium()
105+
sdata.write(self.path_write)
106+
107+
108+
if __name__ == "__main__":
109+
IOBenchmark().time_io()

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ dependencies = [
3636
"readfcs",
3737
"tifffile>=2023.8.12",
3838
"ome-types",
39+
"xmltodict",
3940
]
4041

4142
[project.optional-dependencies]
@@ -59,6 +60,8 @@ doc = [
5960
test = [
6061
"pytest",
6162
"pytest-cov",
63+
# https://github.com/scverse/spatialdata-io/issues/334
64+
"pyarrow!=22"
6265
]
6366
# this will be used by readthedocs and will make pip also look for pre-releases, generally installing the latest available version
6467
# update: readthedocs doens't seem to try to install pre-releases even if when trying to install the pre optional-dependency. For

src/spatialdata_io/_constants/_constants.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@ class SeqfishKeys(ModeEnum):
6868
TIFF_FILE = ".tiff"
6969
GEOJSON_FILE = ".geojson"
7070
# file identifiers
71-
ROI = "Roi"
7271
TRANSCRIPT_COORDINATES = "TranscriptList"
7372
DAPI = "DAPI"
7473
COUNTS_FILE = "CellxGene"
@@ -78,6 +77,7 @@ class SeqfishKeys(ModeEnum):
7877
# transcripts
7978
TRANSCRIPTS_X = "x"
8079
TRANSCRIPTS_Y = "y"
80+
TRANSCRIPTS_Z = "z"
8181
FEATURE_KEY = "name"
8282
INSTANCE_KEY_POINTS = "cell"
8383
# cells
@@ -88,8 +88,6 @@ class SeqfishKeys(ModeEnum):
8888
SPATIAL_KEY = "spatial"
8989
REGION_KEY = "region"
9090
INSTANCE_KEY_TABLE = "instance_id"
91-
SCALEFEFACTOR_X = "PhysicalSizeX"
92-
SCALEFEFACTOR_Y = "PhysicalSizeY"
9391

9492

9593
@unique

src/spatialdata_io/readers/_utils/_utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from pathlib import Path
55
from typing import TYPE_CHECKING, Any, Union
66

7+
from anndata import AnnData
78
from anndata.io import read_text
89
from h5py import File
910
from ome_types import from_tiff

src/spatialdata_io/readers/cosmx.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,4 +289,4 @@ def cosmx(
289289
# logg.warning(f"FOV `{str(fov)}` does not exist, skipping it.")
290290
# continue
291291

292-
return SpatialData(images=images, labels=labels, points=points, table=table)
292+
return SpatialData(images=images, labels=labels, points=points, tables={"table": table})

0 commit comments

Comments
 (0)