-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcomparison.py
More file actions
107 lines (91 loc) · 4.11 KB
/
comparison.py
File metadata and controls
107 lines (91 loc) · 4.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr
def _load_id_to_location(path_list_path):
id_to_location = {}
with open(path_list_path, "r") as f:
for line in f:
line = line.strip()
if not line or line.startswith("#"):
continue
parts = line.split(",")
if len(parts) >= 5:
id_to_location[parts[0]] = parts[4]
return id_to_location
def avg_by_location(config, path_list_path="path_list.csv"):
"""Average per-cell protein intensities grouped by location and PC bin."""
id_to_location = _load_id_to_location(path_list_path)
cells_assigned_path = os.path.join(config["output_dir"], "shapemode", "cells_assigned_to_pc_bins.json")
with open(cells_assigned_path, "r") as f:
cells_assigned = json.load(f)
avg_dir = os.path.join(config["output_dir"], "comparison", "avg_by_location")
os.makedirs(avg_dir, exist_ok=True)
for pc, bin_list in cells_assigned.items():
for bin_idx, image_ids in enumerate(bin_list):
loc_groups = {}
for image_id in image_ids:
loc = id_to_location.get(image_id)
if loc is None:
continue
loc_groups.setdefault(loc, []).append(image_id)
for loc, ids in loc_groups.items():
if config["protparam_mode"] == "rings":
data = []
for image_id in ids:
path = os.path.join(config["output_dir"], "protparam", image_id + "_protein.npy")
if os.path.exists(path):
data.append(np.load(path))
else:
data = []
for image_id in ids:
path = os.path.join(config["output_dir"], "protparam", image_id + "_warp.png")
if os.path.exists(path):
data.append(plt.imread(path))
if not data:
continue
avg = np.mean(data, axis=0)
safe_loc = loc.replace(" ", "_").replace("/", "-")
np.save(os.path.join(avg_dir, f"{pc}_bin{bin_idx}_{safe_loc}.npy"), avg)
def correlation_heatmap(config):
"""Compute Pearson correlation between per-location averages for each PC bin and save heatmaps."""
avg_dir = os.path.join(config["output_dir"], "comparison", "avg_by_location")
heatmap_dir = os.path.join(config["output_dir"], "comparison", "heatmaps")
os.makedirs(heatmap_dir, exist_ok=True)
files = [f for f in os.listdir(avg_dir) if f.endswith(".npy")]
# Parse filenames: {PC}_bin{idx}_{location}.npy
records = {}
for fname in files:
stem = fname[:-4] # strip .npy
parts = stem.split("_", 2)
if len(parts) != 3:
continue
pc, bin_str, loc = parts
records.setdefault((pc, bin_str), {})[loc] = np.load(os.path.join(avg_dir, fname))
for (pc, bin_str), loc_images in sorted(records.items()):
if len(loc_images) < 2:
continue
locs = sorted(loc_images.keys())
n = len(locs)
cor_mat = np.zeros((n, n))
for i, l1 in enumerate(locs):
for j, l2 in enumerate(locs):
v1 = loc_images[l1].flatten()
v2 = loc_images[l2].flatten()
try:
r, _ = pearsonr(v1, v2)
cor_mat[i, j] = r
except Exception:
cor_mat[i, j] = 0.0
df = pd.DataFrame(cor_mat, index=locs, columns=locs)
df.to_csv(os.path.join(heatmap_dir, f"{pc}_{bin_str}_pearsonr.csv"))
if config["plot"]:
fig, ax = plt.subplots(figsize=(max(6, n), max(5, n - 1)))
sns.heatmap(df, cmap="RdBu", vmin=-1, vmax=1, annot=True, fmt=".2f", ax=ax)
ax.set_title(f"{pc} {bin_str} — location correlation")
plt.tight_layout()
plt.savefig(os.path.join(heatmap_dir, f"{pc}_{bin_str}_pearsonr.png"), bbox_inches="tight")
plt.close()