Skip to content

Commit e23b664

Browse files
committed
added breast cancer idc ilc
1 parent 0826f43 commit e23b664

2 files changed

Lines changed: 169 additions & 0 deletions

File tree

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
channels:
2+
- conda-forge
3+
dependencies:
4+
- python=3.11.6
5+
- scipy=1.11.4
6+
- pip
7+
- pip:
8+
- spatialdata==0.1.2
9+
- spatialdata-io==0.1.2
10+
- pypdl==1.3.2
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
#!/usr/bin/env python
2+
3+
# Made by Paul Kiessling pakiessling@ukaachen.de
4+
5+
import argparse
6+
import json
7+
import os
8+
import shutil
9+
import tempfile
10+
11+
import pandas as pd
12+
import scipy
13+
from pypdl import Downloader
14+
from spatialdata_io import xenium
15+
16+
LINKS = {
17+
"https://s3-us-west-2.amazonaws.com/10x.files/samples/xenium/1.0.2/Xenium_V1_FFPE_Human_Breast_IDC_With_Addon/Xenium_V1_FFPE_Human_Breast_IDC_With_Addon_outs.zip": "7d3374472092b320ee9b876cb56c520b",
18+
"https://cf.10xgenomics.com/samples/xenium/1.0.2/Xenium_V1_FFPE_Human_Breast_ILC_With_Addon/Xenium_V1_FFPE_Human_Breast_ILC_With_Addon_outs.zip": "cf779754817893dc98ff4311df2db61e",
19+
"https://zenodo.org/records/15411357/files/idc.csv": "219392e7c41587efaeb315d172fba7b0",
20+
"https://zenodo.org/records/15411357/files/ilc.csv": "d1b150c706539f0f20d9df5496a08984",
21+
}
22+
23+
24+
def download_links(links, temp_dir):
25+
headers = {
26+
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0"
27+
}
28+
dl = Downloader(headers=headers)
29+
for link, checksum in links.items():
30+
print(f"Downloading {link}")
31+
file = dl.start(
32+
url=link,
33+
file_path=temp_dir,
34+
segments=10,
35+
display=True,
36+
multithread=True,
37+
block=True,
38+
retries=3,
39+
)
40+
if not file.validate_hash(checksum, "md5"):
41+
raise ValueError(f"File {file} is corrupted")
42+
43+
44+
def process_files(temp_folder, out_path, annotation_path, sample_name):
45+
print(f"Transferring {temp_folder} to {out_path}")
46+
sdata = xenium(
47+
temp_folder,
48+
cells_boundaries=False,
49+
nucleus_boundaries=False,
50+
cells_as_circles=False,
51+
cells_labels=False,
52+
nucleus_labels=False,
53+
transcripts=False,
54+
morphology_mip=False,
55+
morphology_focus=False,
56+
)
57+
sdata = sdata["table"]
58+
annotation = pd.read_csv(annotation_path)
59+
sdata.obs = sdata.obs.merge(
60+
annotation, how="left", left_on="cell_id", right_on="cell"
61+
)
62+
sdata = sdata[sdata.obs["domain"].notna()].copy()
63+
process_adata(sdata, out_path, sample_name)
64+
65+
66+
def process_adata(adata, out_path, sample_name):
67+
complete_path = os.path.join(out_path, sample_name)
68+
os.makedirs(complete_path, exist_ok=True)
69+
70+
# Observations
71+
obs = adata.obs.copy()
72+
obs["selected"] = "true"
73+
obs.to_csv(f"{complete_path}/observations.tsv", sep="\t", index_label="")
74+
75+
# Features
76+
vars = adata.var.copy()
77+
vars["selected"] = "true"
78+
vars.to_csv(f"{complete_path}/features.tsv", sep="\t", index_label="")
79+
80+
# Coordinates
81+
coords = pd.DataFrame(adata.obsm["spatial"], columns=["x", "y"])
82+
coords.index = adata.obs.index
83+
coords.to_csv(f"{complete_path}/coordinates.tsv", sep="\t", index_label="")
84+
85+
# Matrix
86+
scipy.io.mmwrite(f"{complete_path}/counts.mtx", adata.X)
87+
88+
# Write labels.tsv
89+
labels = adata.obs["domain"]
90+
labels.to_csv(f"{complete_path}/labels.tsv", sep="\t", index_label="")
91+
92+
93+
def write_json(out_path):
94+
experiment = {
95+
"technology": "Xenium",
96+
"species": "human",
97+
"is_3D": False,
98+
}
99+
with open(os.path.join(out_path, "experiment.json"), "w") as f:
100+
json.dump(experiment, f)
101+
102+
103+
def write_table(out_path):
104+
data = {
105+
"patient": ["ILC", "IDC"],
106+
"sample": ["ILC", "IDC"],
107+
"position": [0, 0],
108+
"replicate": [0, 0],
109+
"directory": ["ILC", "IDC"],
110+
"n_clusters": [6, 8],
111+
}
112+
df = pd.DataFrame(data)
113+
df.to_csv(f"{out_path}/samples.tsv", sep="\t", index_label=False)
114+
115+
116+
def main():
117+
# Set up command-line argument parser
118+
parser = argparse.ArgumentParser(
119+
description="Convert Xenium data to Spacehack format."
120+
)
121+
122+
# Add arguments for output folder
123+
parser.add_argument(
124+
"-o", "--out_dir", help="Output directory to write files to.", required=True
125+
)
126+
127+
# Parse the command-line arguments
128+
args = parser.parse_args()
129+
130+
# Download and process
131+
with tempfile.TemporaryDirectory() as temp_dir: #
132+
download_links(LINKS, temp_dir)
133+
for file in os.listdir(temp_dir):
134+
if file.endswith(".zip"):
135+
sample_name = file.split(".")[0]
136+
sample_path = os.path.join(temp_dir, sample_name)
137+
138+
shutil.unpack_archive(os.path.join(temp_dir, file), sample_path)
139+
140+
if sample_name == "Xenium_V1_FFPE_Human_Breast_IDC_With_Addon_outs":
141+
annotation_path = os.path.join(temp_dir, "idc.csv")
142+
sample_name_short = "IDC"
143+
144+
elif sample_name == "Xenium_V1_FFPE_Human_Breast_ILC_With_Addon_outs":
145+
annotation_path = os.path.join(temp_dir, "ilc.csv")
146+
sample_name_short = "ILC"
147+
else:
148+
raise ValueError("Unknown sample found in downloaded files")
149+
150+
output_path = os.path.join(args.out_dir, sample_name_short)
151+
process_files(
152+
sample_path, output_path, annotation_path, sample_name_short
153+
)
154+
write_json(args.out_dir)
155+
write_table(args.out_dir)
156+
157+
158+
if __name__ == "__main__":
159+
main()

0 commit comments

Comments
 (0)