forked from jensenlab/BacterAI
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsummarize_experiment.py
More file actions
105 lines (88 loc) · 3.37 KB
/
summarize_experiment.py
File metadata and controls
105 lines (88 loc) · 3.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import collections
import os
from math import comb
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
import torch
import net
import utils
def main(folder):
max_round_n = 14
folders = [
os.path.join(folder, i, "results_all.csv")
for i in os.listdir(folder)
if "Round" in i
]
folders = sorted(folders, key=lambda x: (len(x), x))[:max_round_n]
print(folders)
output_path = os.path.join(folder, "summary")
if not os.path.exists(output_path):
os.makedirs(output_path)
all_round_data = []
for i, f in enumerate(folders):
round_data = pd.read_csv(f, index_col=None)
# round_data.append(df)
# type_counts = dict(collections.Counter(df["type"].values.tolist()))
# print(type_counts)
# round_data = pd.concat(round_data, ignore_index=True)
# round_data = pd.concat(round_data, ignore_index=True)
round_data = round_data.drop(
columns=[
"var",
"environment",
"strain",
"parent_plate",
"initial_od",
"final_od",
"bad",
"delta_od",
]
)
all_round_data.append(round_data)
# round_output = os.path.join(output_path, f"Round{i+1}")
# if not os.path.exists(round_output):
# os.makedirs(round_output)
# round_data_grouped = round_data.groupby(by=["type"])
# threshold = 0.25
# for group_type, df in round_data_grouped:
# results = count(df, threshold)
# # print(depth_counts)
# grows = df[df["fitness"] >= threshold]
# grows = grows.sort_values(by=["depth", "fitness"], ascending=[False, False])
# # grows.to_csv(
# # os.path.join(round_output, f"{group_type}_grows.csv"),
# # index=False,
# # )
# results.to_csv(
# os.path.join(round_output, f"summarize_{group_type}_results.csv")
# )
# results_all = count(round_data, threshold)
# results_all.to_csv(os.path.join(round_output, f"summarize_ALL_results.csv"))
all_round_data = pd.concat(all_round_data, ignore_index=True)
all_round_data = all_round_data.iloc[:, :20]
all_round_data["sum"] = all_round_data.iloc[:, :20].sum(axis=1)
sum_counts = dict(collections.Counter(all_round_data["sum"].values.tolist()))
print(all_round_data.shape)
print(sum_counts)
# def collect_data(folder):
# files = [f for f in os.listdir(folder) if "mapped_data" in f]
# dfs = [utils.process_mapped_data(os.path.join(folder, f))[0] for f in files]
# df = pd.concat(dfs, ignore_index=True)
# df = df.replace(
# {"Anaerobic Chamber @ 37 C": "anaerobic", "5% CO2 @ 37 C": "aerobic"}
# )
# df = df[df["environment"] == "aerobic"]
# df = df.drop(
# columns=["strain", "parent_plate", "initial_od", "final_od", "bad", "delta_od"]
# )
# df.to_csv(os.path.join(folder, "SGO CH1 Processed-Aerobic.csv"), index=False)
if __name__ == "__main__":
# f = "experiments/05-31-2021_7"
# f = "experiments/05-31-2021_8"
# f = "experiments/05-31-2021_7 copy"
f = "experiments/07-26-2021_10"
# f = "experiments/07-26-2021_11"
main(f)
# collect_data("data/SGO_data")