-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprocess.py
More file actions
162 lines (132 loc) · 7.46 KB
/
process.py
File metadata and controls
162 lines (132 loc) · 7.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import argparse
import datetime
import logging
import os
import yaml
import pandas as pd
import matplotlib.pyplot as plt
import shapespace
import shapemodes
import protparam
import comparison
# This is the log configuration. It will log everything to a file AND the console
logging.basicConfig(filename='log.txt', encoding='utf-8', format='%(levelname)s: %(message)s', filemode='w', level=logging.DEBUG)
console = logging.StreamHandler()
logging.getLogger().addHandler(console)
logger = logging.getLogger("2dShapeSpacePortable")
logging.getLogger("PIL").setLevel(logging.WARNING)
logging.getLogger("matplotlib").setLevel(logging.WARNING)
# This is the general configuration variable. We are going to use the special key "log" in the dictionary to use the log in our code
config = { "log": logger}
# If you want to use constants with your script, add them here
config["output_dir"] = "results"
config["fftcoeff_step"] = True
config["shapemode_step"] = True
config["protparam_step"] = True
config["n_coeffs"] = 128
config["alignment"] = "fft_major_axis_polarized"
config["dismiss_ratio"] = 8
config["protparam_mode"] = "rings"
config["plot"] = True
config["seed"] = 0
config["comparison_step"] = False
# If you want to use a configuration file with your script, add it here
if os.path.exists("config.yaml"):
with open("config.yaml", "r") as file:
config = config | yaml.safe_load(file)
# If you want to use command line parameters with your script, add them here
argparser = argparse.ArgumentParser(description="Please input the following parameters")
argparser.add_argument("--output_dir", help="use this directory for compiling all results")
argparser.add_argument("--fftcoeff_step", help="calculate FFT coefficients and cell statistics", type=bool)
argparser.add_argument("--shapemode_step", help="calculate shape modes", type=bool)
argparser.add_argument("--protparam_step", help="calculate shape modes", type=bool)
argparser.add_argument("--n_coeffs", help="use this number of coefficients", type=int)
argparser.add_argument("--alignment", help="alignment, choose between [fft_major_axis,fft_major_axis_polarized,fft_centroid]")
argparser.add_argument("--dismiss_ratio", help="cells with cell/nuc ratio bigger than this will be removed for the shape modes calculation")
argparser.add_argument("--protparam_mode", help="calculate protein parametrization using [rings,warp]")
argparser.add_argument("--plot", help="generate intermediate plots", type=bool)
argparser.add_argument("--seed", help="forced seed for reproducibility", type=int)
argparser.add_argument("--comparison_step", help="compute per-location average intensities and correlation heatmaps", type=bool)
args = argparser.parse_args()
config = config | {k: v for k, v in args.__dict__.items() if v is not None}
# Log the start time and the final configuration so you can keep track of what you did
config["log"].info('Start: ' + datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S"))
config["log"].info('Parameters used:')
config["log"].info(config)
config["log"].info('----------')
os.makedirs(config["output_dir"], exist_ok=True)
plt.rcParams["savefig.directory"] = config["output_dir"]
if config["fftcoeff_step"]:
config["log"].info("- Calculating fft coeficients")
os.makedirs(os.path.join(config["output_dir"], "shapespace"), exist_ok=True)
final_columns = ["image", "nuc_area", "cell_area", "prot_int_sum_nuc", "prot_int_sum_cell", "theta", "centroid_y", "centroid_x", "e_c", "e_n"]
len_coefs = config["n_coeffs"] * 4
for i in range(config["n_coeffs"]):
final_columns.append("fftcoef" + "%03d" % (i,) + "_nuc_x")
for i in range(config["n_coeffs"]):
final_columns.append("fftcoef" + "%03d" % (i,) + "_nuc_y")
for i in range(config["n_coeffs"]):
final_columns.append("fftcoef" + "%03d" % (i,) + "_cell_x")
for i in range(config["n_coeffs"]):
final_columns.append("fftcoef" + "%03d" % (i,) + "_cell_y")
df = pd.DataFrame(columns=final_columns)
path_list = open("./path_list.csv", 'r').readlines()
for curr_path in path_list:
if curr_path.strip() != "" and not curr_path.startswith("#"):
curr_image_id = curr_path.strip().split(",")[0]
curr_nuc_path = curr_path.strip().split(",")[1]
curr_cell_path = curr_path.strip().split(",")[2]
curr_protein_path = curr_path.strip().split(",")[3]
try:
l_result = shapespace.shapespace(config, curr_image_id, curr_nuc_path, curr_cell_path, curr_protein_path)
df.loc[len(df.index)] = l_result
config["log"].info("-- Saved fft coeficients results for " + curr_image_id)
except Exception as e:
config["log"].error("-- Error while calculating fft coeficients results for " + curr_image_id + ": " + str(e))
df.to_csv(os.path.join(config["output_dir"], "shapespace", "fft_coeffs.csv"), index=False)
if config["shapemode_step"]:
config["log"].info("- Calculating shape modes")
os.makedirs(os.path.join(config["output_dir"], "shapemode"), exist_ok=True)
try:
shapemodes.shapemodes(config, os.path.join(config["output_dir"], "shapespace", "fft_coeffs.csv"))
config["log"].info("-- Saved shape modes results")
except Exception as e:
config["log"].error("-- Error while calculating shape modes results: " + str(e))
if config["protparam_step"]:
config["log"].info("- Calculating protein parametrization")
df_fft = pd.read_csv(os.path.join(config["output_dir"], "shapespace", "fft_coeffs.csv"))
os.makedirs(os.path.join(config["output_dir"], "protparam"), exist_ok=True)
path_list = open("./path_list.csv", 'r').readlines()
for curr_path in path_list:
if curr_path.strip() != "" and not curr_path.startswith("#"):
curr_image_id = curr_path.strip().split(",")[0]
curr_nuc_path = curr_path.strip().split(",")[1]
curr_cell_path = curr_path.strip().split(",")[2]
curr_protein_path = curr_path.strip().split(",")[3]
curr_location = curr_path.strip().split(",")[4]
try:
protparam.protparam(config, curr_image_id, curr_nuc_path, curr_cell_path, curr_protein_path, curr_location, df_fft)
config["log"].info("-- Saved protein parametrization results for " + curr_image_id)
except Exception as e:
config["log"].error("-- Error while calculating protein parametrization results for " + curr_image_id + ": " + str(e))
config["log"].info("- Averaging protein parametrization results")
try:
protparam.avg_protparam(config)
config["log"].info("-- Saved averaged protein parametrization results")
except Exception as e:
config["log"].error("-- Error while averaging protein parametrization results: " + str(e))
if config["comparison_step"]:
config["log"].info("- Calculating location comparison")
os.makedirs(os.path.join(config["output_dir"], "comparison"), exist_ok=True)
try:
comparison.avg_by_location(config)
config["log"].info("-- Saved per-location averages")
except Exception as e:
config["log"].error("-- Error while calculating per-location averages: " + str(e))
try:
comparison.correlation_heatmap(config)
config["log"].info("-- Saved correlation heatmaps")
except Exception as e:
config["log"].error("-- Error while calculating correlation heatmaps: " + str(e))
config["log"].info('----------')
config["log"].info('End: ' + datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S"))