Skip to content

Commit 46d8d45

Browse files
committed
Further modifications to generate new figures
1 parent 19ea6dd commit 46d8d45

3 files changed

Lines changed: 267 additions & 94 deletions

File tree

explore_idr0028_data.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
import pandas as pd
2+
3+
df_1A = pd.read_csv('./LM2_GEFGAP_ONTARGETPlus_1A_instances.csv')
4+
df_2A = pd.read_csv('./LM2_GEFGAP_ONTARGETPlus_2A_instances.csv')
5+
df_2B = pd.read_csv('./LM2_GEFGAP_ONTARGETPlus_2B_instances.csv')
6+
7+
combined_df = pd.concat([df_1A, df_2A, df_2B])
8+
combined_summary_df = combined_df.groupby('Treatment').agg({
9+
'YAPTAZ_Ratio': 'mean', # Calculate the average 'YAPTAZ_Ratio'
10+
'Well': 'count' # Count the number of instances
11+
})
12+
13+
combined_summary_df.to_csv('./combined_summary.csv')

notebooks/companion_notebook_idr0028.ipynb

Lines changed: 234 additions & 88 deletions
Large diffs are not rendered by default.

notebooks/utility_functions.py

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def download_csv(file_path, url):
5656
print("File already exists.")
5757

5858

59-
def prepare_data(nuc_data, cyto_data, image_data, image_indices, treatments, treatments_to_compounds, compounds,
59+
def prepare_data(nuc_data, cyto_data, image_data, image_indices, treatments, plate_number, qc, treatments_to_compounds, compounds,
6060
selected_wells):
6161
# Rename columns
6262
nuc_data = nuc_data.rename(columns=lambda x: 'Nuclear_' + x if 'Intensity' in x else x)
@@ -90,6 +90,20 @@ def prepare_data(nuc_data, cyto_data, image_data, image_indices, treatments, tre
9090
#
9191
# sample_data.to_csv('./sample_data.csv')
9292

93+
# new_df = combined_data.groupby('Well', as_index=False).agg({
94+
# 'YAPTAZ_Ratio': 'mean',
95+
# 'Treatment': 'first'
96+
# })
97+
# new_df['QC'] = new_df['Well'].map(qc)
98+
# new_df = new_df[new_df['QC'] == 'Pass']
99+
#
100+
# summary_df = new_df.groupby('Treatment').agg({
101+
# 'YAPTAZ_Ratio': 'mean', # Calculate the average 'YAPTAZ_Ratio'
102+
# 'Well': 'count' # Count the number of instances
103+
# })
104+
# new_df.to_csv(f'./../{plate_number}_instances.csv')
105+
# summary_df.to_csv(f'./../{plate_number}_summary.csv')
106+
93107
# Filter by selected wells if specified
94108
if selected_wells:
95109
combined_data = combined_data[combined_data['Well'].isin(selected_wells)]
@@ -139,7 +153,7 @@ def generate_swarmplot(plot_order, data, color_dict, treatment_col, variable_of_
139153
# Sample the data if sample_size > 0
140154
if sample_size > 0:
141155
sampled_data = pd.concat([
142-
data[data[treatment_col] == 'ARHGAP40'].sample(n=sample_size, replace=False, random_state=random_seed),
156+
data[data[treatment_col] == 'ARAP2'].sample(n=sample_size, replace=False, random_state=random_seed),
143157
data[data[treatment_col] == 'YAP'].sample(n=sample_size, replace=False, random_state=random_seed),
144158
data[data[treatment_col] == 'MOCK'].sample(n=sample_size, replace=False, random_state=random_seed),
145159
data[data[treatment_col] == 'LATS1'].sample(n=sample_size, replace=False, random_state=random_seed)
@@ -161,7 +175,7 @@ def generate_swarmplot(plot_order, data, color_dict, treatment_col, variable_of_
161175
# Calculate and plot the confidence intervals
162176
for treatment in plot_order:
163177
y_values = sampled_data[sampled_data[treatment_col] == treatment][variable_of_interest]
164-
print(f'Treatment: {treatment}, Mean: {y_values.mean()}')
178+
# print(f'Treatment: {treatment}, Mean: {y_values.mean()}')
165179
lower, upper = ci(y_values, 0.95)
166180
x_pos = plot_order.index(treatment)
167181
ax.errorbar(x_pos, y_values.mean(), yerr=[[y_values.mean() - lower], [upper - y_values.mean()]],
@@ -298,7 +312,7 @@ def plot_effect_size_v_sample_size(sample_sizes, num_iterations, data, treatment
298312
for treatment in treatments:
299313
subsample = data[data[treatment_col] == treatment].sample(n=sample_size, replace=False,
300314
random_state=random_seed)
301-
control_subsample = data[data[treatment_col] == 'Untreated'].sample(n=sample_size, replace=False,
315+
control_subsample = data[data[treatment_col] == 'MOCK'].sample(n=sample_size, replace=False,
302316
random_state=random_seed)
303317
mean = (subsample[variable_of_interest].mean() - control_subsample[variable_of_interest].mean()) / \
304318
control_subsample[variable_of_interest].std()
@@ -432,8 +446,8 @@ def plot_cumulative_histogram_samples(data, variable_of_interest, treatment_col,
432446
plt.title(f'{len(total_samples)} {treatment} Cells')
433447
plt.xlabel(x_label)
434448
plt.ylabel('Frequency (%)')
435-
plt.ylim(bottom=0, top=20)
436-
plt.xlim(left=0, right=1)
449+
plt.ylim(bottom=0, top=40)
450+
plt.xlim(left=0.4, right=0.9)
437451
plt.grid(True)
438452
plt.show()
439453
filecount = filecount + 1

0 commit comments

Comments
 (0)