Skip to content

Commit af90e05

Browse files
Merge pull request #53 from LLehner/seqFISH_reader
Reader for seqFISH data
2 parents d51d307 + ed06295 commit af90e05

6 files changed

Lines changed: 227 additions & 4 deletions

File tree

.gitignore

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
.DS_Store
33
*~
44
*.tmp
5+
temp/
56

67
# Compiled files
78
__pycache__/
@@ -31,9 +32,8 @@ __pycache__/
3132

3233
# other
3334
_version.py
34-
.code-workspace
35-
temp/
3635
node_modules/
36+
.code-workspace
3737

3838
# test datasets (e.g. Xenium ones)
3939
data/

docs/api.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ I/O for the `spatialdata` project.
2020
dbit
2121
mcmicro
2222
merscope
23+
seqfish
2324
steinbock
2425
stereoseq
2526
visium

src/spatialdata_io/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from spatialdata_io.readers.dbit import dbit
77
from spatialdata_io.readers.mcmicro import mcmicro
88
from spatialdata_io.readers.merscope import merscope
9+
from spatialdata_io.readers.seqfish import seqfish
910
from spatialdata_io.readers.steinbock import steinbock
1011
from spatialdata_io.readers.stereoseq import stereoseq
1112
from spatialdata_io.readers.visium import visium
@@ -18,6 +19,7 @@
1819

1920
__all__ = [
2021
"curio",
22+
"seqfish",
2123
"visium",
2224
"xenium",
2325
"codex",

src/spatialdata_io/_constants/_constants.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,36 @@ class CosmxKeys(ModeEnum):
5959
TARGET_OF_TRANSCRIPT = "target"
6060

6161

62+
@unique
63+
class SeqfishKeys(ModeEnum):
64+
"""Keys for *Spatial Genomics SeqFISH* formatted dataset."""
65+
66+
# file extensions
67+
CSV_FILE = ".csv"
68+
TIFF_FILE = ".tiff"
69+
OME_TIFF_FILE = ".ome.tiff"
70+
# file identifiers
71+
SECTION = "section"
72+
TRANSCRIPT_COORDINATES = "TranscriptCoordinates"
73+
DAPI = "DAPI"
74+
COUNTS_FILE = "CxG"
75+
CELL_MASK_FILE = "CellMask"
76+
CELL_COORDINATES = "CellCoordinates"
77+
# transcripts
78+
TRANSCRIPTS_X = "x"
79+
TRANSCRIPTS_Y = "y"
80+
FEATURE_KEY = "name"
81+
INSTANCE_KEY_POINTS = "cell"
82+
# cells
83+
AREA = "area"
84+
CELL_X = "center_x"
85+
CELL_Y = "center_y"
86+
# metadata
87+
SPATIAL_KEY = "spatial"
88+
REGION_KEY = "region"
89+
INSTANCE_KEY_TABLE = "instance_id"
90+
91+
6292
@unique
6393
class XeniumKeys(ModeEnum):
6494
"""Keys for *10X Genomics Xenium* formatted dataset."""
Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
from __future__ import annotations
2+
3+
import os
4+
import re
5+
from collections.abc import Mapping
6+
from pathlib import Path
7+
from types import MappingProxyType
8+
from typing import Any
9+
10+
import anndata as ad
11+
import numpy as np
12+
import pandas as pd
13+
from dask_image.imread import imread
14+
from spatialdata import SpatialData
15+
from spatialdata.models import (
16+
Image2DModel,
17+
Labels2DModel,
18+
PointsModel,
19+
ShapesModel,
20+
TableModel,
21+
)
22+
from spatialdata.transformations import Identity
23+
24+
from spatialdata_io._constants._constants import SeqfishKeys as SK
25+
from spatialdata_io._docs import inject_docs
26+
27+
__all__ = ["seqfish"]
28+
29+
30+
@inject_docs(vx=SK)
31+
def seqfish(
32+
path: str | Path,
33+
load_images: bool = True,
34+
load_labels: bool = True,
35+
load_points: bool = True,
36+
sections: list[int] | None = None,
37+
imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
38+
) -> SpatialData:
39+
"""
40+
Read *seqfish* formatted dataset.
41+
42+
This function reads the following files:
43+
44+
- ```{vx.COUNTS_FILE!r}{vx.SECTION!r}{vx.CSV_FILE!r}```: Counts and metadata file.
45+
- ```{vx.CELL_COORDINATES!r}{vx.SECTION!r}{vx.CSV_FILE!r}```: Cell coordinates file.
46+
- ```{vx.DAPI!r}{vx.SECTION!r}{vx.OME_TIFF_FILE!r}```: High resolution tiff image.
47+
- ```{vx.CELL_MASK_FILE!r}{vx.SECTION!r}{vx.TIFF_FILE!r}```: Cell mask file.
48+
- ```{vx.TRANSCRIPT_COORDINATES!r}{vx.SECTION!r}{vx.CSV_FILE!r}```: Transcript coordinates file.
49+
50+
.. seealso::
51+
52+
- `seqfish output <https://spatialgenomics.com/data/>`_.
53+
54+
Parameters
55+
----------
56+
path
57+
Path to the directory containing the data.
58+
load_images
59+
Whether to load the images.
60+
load_labels
61+
Whether to load the labels.
62+
load_points
63+
Whether to load the points.
64+
sections
65+
Which sections (specified as integers) to load. By default, all sections are loaded.
66+
imread_kwargs
67+
Keyword arguments to pass to :func:`dask_image.imread.imread`.
68+
69+
Returns
70+
-------
71+
:class:`spatialdata.SpatialData`
72+
"""
73+
path = Path(path)
74+
count_file_pattern = re.compile(rf"(.*?)_{SK.CELL_COORDINATES}_{SK.SECTION}[0-9]+" + re.escape(SK.CSV_FILE))
75+
count_files = [i for i in os.listdir(path) if count_file_pattern.match(i)]
76+
if not count_files:
77+
# no file matching tbe pattern found
78+
raise ValueError(
79+
f"No files matching the pattern {count_file_pattern} were found. Cannot infer the naming scheme."
80+
)
81+
matched = count_file_pattern.match(count_files[0])
82+
if matched is None:
83+
raise ValueError(f"File {count_files[0]} does not match the pattern {count_file_pattern}")
84+
prefix = matched.group(1)
85+
86+
n = len(count_files)
87+
all_sections = list(range(1, n + 1))
88+
if sections is None:
89+
sections = all_sections
90+
else:
91+
for section in sections:
92+
if section not in all_sections:
93+
raise ValueError(f"Section {section} not found in the data.")
94+
sections_str = [f"{SK.SECTION}{x}" for x in sections]
95+
96+
def get_cell_file(section: str) -> str:
97+
return f"{prefix}_{SK.CELL_COORDINATES}_{section}{SK.CSV_FILE}"
98+
99+
def get_count_file(section: str) -> str:
100+
return f"{prefix}_{SK.COUNTS_FILE}_{section}{SK.CSV_FILE}"
101+
102+
def get_dapi_file(section: str) -> str:
103+
return f"{prefix}_{SK.DAPI}_{section}{SK.OME_TIFF_FILE}"
104+
105+
def get_cell_mask_file(section: str) -> str:
106+
return f"{prefix}_{SK.CELL_MASK_FILE}_{section}{SK.TIFF_FILE}"
107+
108+
def get_transcript_file(section: str) -> str:
109+
return f"{prefix}_{SK.TRANSCRIPT_COORDINATES}_{section}{SK.CSV_FILE}"
110+
111+
adatas: dict[str, ad.AnnData] = {}
112+
for section in sections_str: # type: ignore[assignment]
113+
assert isinstance(section, str)
114+
cell_file = get_cell_file(section)
115+
count_matrix = get_count_file(section)
116+
adata = ad.read_csv(path / count_matrix, delimiter=",")
117+
cell_info = pd.read_csv(path / cell_file, delimiter=",")
118+
adata.obsm[SK.SPATIAL_KEY] = cell_info[[SK.CELL_X, SK.CELL_Y]].to_numpy()
119+
adata.obs[SK.AREA] = np.reshape(cell_info[SK.AREA].to_numpy(), (-1, 1))
120+
region = f"cells_{section}"
121+
adata.obs[SK.REGION_KEY] = region
122+
adata.obs[SK.INSTANCE_KEY_TABLE] = adata.obs.index.astype(int)
123+
adatas[section] = adata
124+
125+
scale_factors = [2, 2, 2, 2]
126+
127+
if load_images:
128+
images = {
129+
f"image_{x}": Image2DModel.parse(
130+
imread(path / get_dapi_file(x), **imread_kwargs),
131+
dims=("c", "y", "x"),
132+
scale_factors=scale_factors,
133+
transformations={x: Identity()},
134+
)
135+
for x in sections_str
136+
}
137+
else:
138+
images = {}
139+
140+
if load_labels:
141+
labels = {
142+
f"labels_{x}": Labels2DModel.parse(
143+
imread(path / get_cell_mask_file(x), **imread_kwargs).squeeze(),
144+
dims=("y", "x"),
145+
scale_factors=scale_factors,
146+
transformations={x: Identity()},
147+
)
148+
for x in sections_str
149+
}
150+
else:
151+
labels = {}
152+
153+
if load_points:
154+
points = {
155+
f"transcripts_{x}": PointsModel.parse(
156+
pd.read_csv(path / get_transcript_file(x), delimiter=","),
157+
coordinates={"x": SK.TRANSCRIPTS_X, "y": SK.TRANSCRIPTS_Y},
158+
feature_key=SK.FEATURE_KEY.value,
159+
instance_key=SK.INSTANCE_KEY_POINTS.value,
160+
transformations={x: Identity()},
161+
)
162+
for x in sections_str
163+
}
164+
else:
165+
points = {}
166+
167+
adata = ad.concat(adatas.values())
168+
adata.obs[SK.REGION_KEY] = adata.obs[SK.REGION_KEY].astype("category")
169+
adata.obs = adata.obs.reset_index(drop=True)
170+
table = TableModel.parse(
171+
adata,
172+
region=[f"cells_{x}" for x in sections_str],
173+
region_key=SK.REGION_KEY.value,
174+
instance_key=SK.INSTANCE_KEY_TABLE.value,
175+
)
176+
177+
shapes = {
178+
f"cells_{x}": ShapesModel.parse(
179+
adata.obsm[SK.SPATIAL_KEY],
180+
geometry=0,
181+
radius=np.sqrt(adata.obs[SK.AREA].to_numpy() / np.pi),
182+
index=adata.obs[SK.INSTANCE_KEY_TABLE].copy(),
183+
transformations={x: Identity()},
184+
)
185+
for x, adata in adatas.items()
186+
}
187+
188+
sdata = SpatialData(images=images, labels=labels, points=points, table=table, shapes=shapes)
189+
190+
return sdata

src/spatialdata_io/readers/xenium.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from shapely import Polygon
3030
from spatial_image import SpatialImage
3131
from spatialdata import SpatialData
32-
from spatialdata._core.query.relational_query import _get_unique_label_values_as_index
32+
from spatialdata._core.query.relational_query import get_element_instances
3333
from spatialdata._types import ArrayLike
3434
from spatialdata.models import (
3535
Image2DModel,
@@ -409,7 +409,7 @@ def _get_labels_and_indices_mapping(
409409

410410
# this information will probably be available in the `label_id` column for version > 2.0.0 (see public
411411
# release notes mentioned above)
412-
real_label_index = _get_unique_label_values_as_index(labels).values
412+
real_label_index = get_element_instances(labels).values
413413

414414
# background removal
415415
if real_label_index[0] == 0:

0 commit comments

Comments
 (0)