@@ -56,7 +56,7 @@ def download_csv(file_path, url):
5656 print ("File already exists." )
5757
5858
59- def prepare_data (nuc_data , cyto_data , image_data , image_indices , treatments , treatments_to_compounds , compounds ,
59+ def prepare_data (nuc_data , cyto_data , image_data , image_indices , treatments , plate_number , qc , treatments_to_compounds , compounds ,
6060 selected_wells ):
6161 # Rename columns
6262 nuc_data = nuc_data .rename (columns = lambda x : 'Nuclear_' + x if 'Intensity' in x else x )
@@ -90,6 +90,20 @@ def prepare_data(nuc_data, cyto_data, image_data, image_indices, treatments, tre
9090 #
9191 # sample_data.to_csv('./sample_data.csv')
9292
93+ # new_df = combined_data.groupby('Well', as_index=False).agg({
94+ # 'YAPTAZ_Ratio': 'mean',
95+ # 'Treatment': 'first'
96+ # })
97+ # new_df['QC'] = new_df['Well'].map(qc)
98+ # new_df = new_df[new_df['QC'] == 'Pass']
99+ #
100+ # summary_df = new_df.groupby('Treatment').agg({
101+ # 'YAPTAZ_Ratio': 'mean', # Calculate the average 'YAPTAZ_Ratio'
102+ # 'Well': 'count' # Count the number of instances
103+ # })
104+ # new_df.to_csv(f'./../{plate_number}_instances.csv')
105+ # summary_df.to_csv(f'./../{plate_number}_summary.csv')
106+
93107 # Filter by selected wells if specified
94108 if selected_wells :
95109 combined_data = combined_data [combined_data ['Well' ].isin (selected_wells )]
@@ -139,7 +153,7 @@ def generate_swarmplot(plot_order, data, color_dict, treatment_col, variable_of_
139153 # Sample the data if sample_size > 0
140154 if sample_size > 0 :
141155 sampled_data = pd .concat ([
142- data [data [treatment_col ] == 'ARHGAP40 ' ].sample (n = sample_size , replace = False , random_state = random_seed ),
156+ data [data [treatment_col ] == 'ARAP2 ' ].sample (n = sample_size , replace = False , random_state = random_seed ),
143157 data [data [treatment_col ] == 'YAP' ].sample (n = sample_size , replace = False , random_state = random_seed ),
144158 data [data [treatment_col ] == 'MOCK' ].sample (n = sample_size , replace = False , random_state = random_seed ),
145159 data [data [treatment_col ] == 'LATS1' ].sample (n = sample_size , replace = False , random_state = random_seed )
@@ -161,7 +175,7 @@ def generate_swarmplot(plot_order, data, color_dict, treatment_col, variable_of_
161175 # Calculate and plot the confidence intervals
162176 for treatment in plot_order :
163177 y_values = sampled_data [sampled_data [treatment_col ] == treatment ][variable_of_interest ]
164- print (f'Treatment: { treatment } , Mean: { y_values .mean ()} ' )
178+ # print(f'Treatment: {treatment}, Mean: {y_values.mean()}')
165179 lower , upper = ci (y_values , 0.95 )
166180 x_pos = plot_order .index (treatment )
167181 ax .errorbar (x_pos , y_values .mean (), yerr = [[y_values .mean () - lower ], [upper - y_values .mean ()]],
@@ -298,7 +312,7 @@ def plot_effect_size_v_sample_size(sample_sizes, num_iterations, data, treatment
298312 for treatment in treatments :
299313 subsample = data [data [treatment_col ] == treatment ].sample (n = sample_size , replace = False ,
300314 random_state = random_seed )
301- control_subsample = data [data [treatment_col ] == 'Untreated ' ].sample (n = sample_size , replace = False ,
315+ control_subsample = data [data [treatment_col ] == 'MOCK ' ].sample (n = sample_size , replace = False ,
302316 random_state = random_seed )
303317 mean = (subsample [variable_of_interest ].mean () - control_subsample [variable_of_interest ].mean ()) / \
304318 control_subsample [variable_of_interest ].std ()
@@ -432,8 +446,8 @@ def plot_cumulative_histogram_samples(data, variable_of_interest, treatment_col,
432446 plt .title (f'{ len (total_samples )} { treatment } Cells' )
433447 plt .xlabel (x_label )
434448 plt .ylabel ('Frequency (%)' )
435- plt .ylim (bottom = 0 , top = 20 )
436- plt .xlim (left = 0 , right = 1 )
449+ plt .ylim (bottom = 0 , top = 40 )
450+ plt .xlim (left = 0.4 , right = 0.9 )
437451 plt .grid (True )
438452 plt .show ()
439453 filecount = filecount + 1
0 commit comments