-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathshapemodes.py
More file actions
84 lines (72 loc) · 3.43 KB
/
shapemodes.py
File metadata and controls
84 lines (72 loc) · 3.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import os
import numpy as np
from sklearn.decomposition import PCA
import pandas as pd
import json
from lib import plotting
def shapemodes(config, fftcoeffs_path):
lines = dict()
df_fft = pd.read_csv(fftcoeffs_path)
for index, row in df_fft.iterrows():
if row.isna().any():
config["log"].info("-- Dismissing " + row["image"] + " image due nan values")
continue
elif row["cell_area"] / row["nuc_area"] > config["dismiss_ratio"]:
config["log"].info("-- Dismissing " + row["image"] + " image due to cell/nuc ratio")
continue
else:
lines[row["image"]] = row[10:]
df = pd.DataFrame(lines).transpose()
df = df.map(lambda s: complex(s.replace("i", "j")))
df_ = pd.concat(
[
pd.DataFrame(df.to_numpy().real),
pd.DataFrame(df.to_numpy().imag),
],
axis=1,
)
pca = PCA() # IncrementalPCA(whiten=True) #PCA()
pca.fit(df_)
plotting.display_scree_plot(pca, save_dir=os.path.join(config["output_dir"], "shapemode"))
scree = pca.explained_variance_ratio_ * 100
for percent in np.arange(70, 100, 5):
n_pc = np.sum(scree.cumsum() < percent) + 1
config["log"].info(f"{n_pc} to explain {percent} % variance")
n_pc = np.sum(scree.cumsum() < 95) + 1
n_pc = 8 if n_pc < 8 else n_pc
pc_names = [f"PC{c}" for c in range(1, 1 + len(pca.components_))]
pc_keep = [f"PC{c}" for c in range(1, 1 + n_pc)]
# pc_keep = [f"PC{c}" for c in range(1, 1 + 6)]
matrix_of_features_transform = pca.transform(df_)
df_trans = pd.DataFrame(data=matrix_of_features_transform.copy())
df_trans.columns = pc_names
df_trans.index = df.index
df_trans[list(set(pc_names) - set(pc_keep))] = 0
df_trans.to_csv(os.path.join(config["output_dir"], "shapemode", "transformed_matrix.csv"))
# Cell density on major PC
plotting.plot_pc_density(df_trans["PC1"], df_trans["PC2"], save_path=os.path.join(config["output_dir"], "shapemode", "PC1vsPC2_cell_density.png"))
plotting.plot_pc_density(df_trans["PC2"], df_trans["PC3"], save_path=os.path.join(config["output_dir"], "shapemode", "PC2vsPC3_cell_density.png"))
pm = plotting.PlotShapeModes(
pca,
df_trans,
config['n_coeffs'],
pc_keep,
scaler=None,
complex_type=False
)
pm.plot_avg_cell(dark=False, save_dir=os.path.join(config["output_dir"], "shapemode"))
n_ = min(10, len(df_trans)) # number of random cells to plot
cells_assigned = dict()
for pc in pc_keep:
pm.plot_shape_variation_gif(pc, dark=False, save_dir=os.path.join(config["output_dir"], "shapemode"))
pm.plot_shape_variation(pc, dark=False, save_dir=os.path.join(config["output_dir"], "shapemode"))
pm.plot_pc_hist(pc, save_dir=os.path.join(config["output_dir"], "shapemode"))
pc_indexes_assigned, bin_links = pm.assign_cells(pc)
cells_assigned[pc] = [list(b) for b in bin_links]
plotting.plot_example_cells(bin_links,
n_coef=config['n_coeffs'],
cells_per_bin=5,
shape_coef_path=fftcoeffs_path,
save_path=os.path.join(config["output_dir"], "shapemode", pc + "_example_cells.png"))
with open(os.path.join(config["output_dir"], "shapemode", "cells_assigned_to_pc_bins.json"), "w") as fp:
json.dump(cells_assigned, fp)