Skip to content

Commit a3ce743

Browse files
ammarcsjclaude
andcommitted
Remove dead utility and analysis functions
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent d482eea commit a3ce743

7 files changed

Lines changed: 0 additions & 139 deletions

File tree

alphaquant/diffquant/diffutils.py

Lines changed: 0 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -56,32 +56,6 @@ def get_samples_used_from_samplemap_df(samplemap_df, cond1, cond2):
5656
samples_c2 = samplemap_df[[cond2 == x for x in samplemap_df["condition"]]]["sample"]
5757
return list(samples_c1), list(samples_c2)
5858

59-
def get_all_samples_from_samplemap_df(samplemap_df):
60-
return list(samplemap_df["sample"])
61-
62-
# Cell
63-
import pandas as pd
64-
65-
def get_samplenames_from_input_df(data):
66-
"""extracts the names of the samples of the AQ input dataframe"""
67-
names = list(data.columns)
68-
names.remove('protein')
69-
names.remove(QUANT_ID)
70-
return names
71-
72-
# Cell
73-
import numpy as np
74-
def filter_df_to_min_valid_values(quant_df_wideformat, samples_c1, samples_c2, min_valid_values):
75-
"""filters dataframe in alphaquant format such that each column has a minimum number of replicates
76-
"""
77-
quant_df_wideformat = quant_df_wideformat.replace(0, np.nan)
78-
df_c1_min_valid_values = quant_df_wideformat[samples_c1].dropna(thresh = min_valid_values, axis = 0)
79-
df_c2_min_valid_values = quant_df_wideformat[samples_c2].dropna(thresh = min_valid_values, axis = 0)
80-
idxs_both = df_c1_min_valid_values.index.intersection(df_c2_min_valid_values.index)
81-
quant_df_reduced = quant_df_wideformat.iloc[idxs_both].reset_index()
82-
return quant_df_reduced
83-
84-
8559
# Cell
8660
def get_condpairname(condpair):
8761
return f"{condpair[0]}_VS_{condpair[1]}"
@@ -102,39 +76,6 @@ def make_dir_w_existcheck(dir):
10276
if not os.path.exists(dir):
10377
os.makedirs(dir)
10478

105-
# Cell
106-
import os
107-
def get_results_plot_dir_condpair(results_dir, condpair):
108-
results_dir_plots = f"{results_dir}/{condpair}_plots"
109-
make_dir_w_existcheck(results_dir_plots)
110-
return results_dir_plots
111-
112-
# Cell
113-
def get_middle_elem(sorted_list):
114-
nvals = len(sorted_list)
115-
if nvals==1:
116-
return sorted_list[0]
117-
middle_idx = nvals//2
118-
if nvals%2==1:
119-
return sorted_list[middle_idx]
120-
return 0.5* (sorted_list[middle_idx] + sorted_list[middle_idx-1])
121-
122-
# Cell
123-
import numpy as np
124-
def get_nonna_array(array_w_nas):
125-
res = []
126-
isnan_arr = np.isnan(array_w_nas)
127-
128-
for idx in range(len(array_w_nas)):
129-
sub_res = []
130-
sub_array = array_w_nas[idx]
131-
na_array = isnan_arr[idx]
132-
for idx2 in range(len(sub_array)):
133-
if not na_array[idx2]:
134-
sub_res.append(sub_array[idx2])
135-
res.append(np.array(sub_res))
136-
return np.array(res)
137-
13879
# Cell
13980
import numpy as np
14081
def get_non_nas_from_pd_df(df):
@@ -152,12 +93,6 @@ def get_ionints_from_pd_df(df):
15293
}
15394

15495
# Cell
155-
def invert_dictionary(my_map):
156-
inv_map = {}
157-
for k, v in my_map.items():
158-
inv_map[v] = inv_map.get(v, []) + [k]
159-
return inv_map
160-
16196
from collections import defaultdict
16297
def invert_tuple_list_w_nonunique_values(tuple_list):
16398
inverted_dict = defaultdict(list)
@@ -373,20 +308,6 @@ def get_path_to_unformatted_file(input_file_name):
373308

374309

375310

376-
# Cell
377-
378-
# Cell
379-
import os
380-
def check_for_processed_runs_in_results_folder(results_folder):
381-
contained_condpairs = []
382-
folder_files = os.listdir(results_folder)
383-
result_files = list(filter(lambda x: "results.tsv" in x ,folder_files))
384-
for result_file in result_files:
385-
res_name = result_file.replace(".results.tsv", "")
386-
if ((f"{res_name}.normed.tsv" in folder_files) and (f"{res_name}.results.ions.tsv" in folder_files)):
387-
contained_condpairs.append(res_name)
388-
return contained_condpairs
389-
390311
# Cell
391312
import pandas as pd
392313
import os

alphaquant/norm/normalization.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -136,11 +136,6 @@ def determine_anchor_and_shift_sample(sample2counts, i_min, j_min, min_distance)
136136
flip = 1 if anchor_idx == i_min else -1
137137
return anchor_idx, shift_idx, flip*min_distance
138138

139-
# Cell
140-
def shift_samples(samples, sampleidx2anchoridx, sample2shift):
141-
for sample_idx in range(samples.shape[0]):
142-
samples[sample_idx] = samples[sample_idx]+get_total_shift(sampleidx2anchoridx, sample2shift, sample_idx)
143-
144139
# Cell
145140
def get_total_shift(sampleidx2anchoridx, sample2shift,sample_idx):
146141

alphaquant/quant_reader/quant_reader_manager.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,5 +72,3 @@ def reformat_and_save_input_file(
7272
return outfile_name
7373

7474

75-
def set_quanttable_config_location(quanttable_config_file):
76-
config_dict_loader.INTABLE_CONFIG = quanttable_config_file

alphaquant/resources/database_loader.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,6 @@ def get_genename2sequence_dict( organism = "human"):
2828

2929
return gene2sequence_dict
3030

31-
def get_swissprot2sequence_dict( organism = "human"):
32-
swissprot_file = get_swissprot_path(organism)
33-
swissprot_df = pd.read_csv(swissprot_file, sep = '\t', usecols=['Entry', 'Sequence'])
34-
swissprot_ids = swissprot_df['Entry'].astype(str).tolist()
35-
sequences = swissprot_df['Sequence'].astype(str).tolist()
36-
37-
swissprot2sequence_dict = dict(zip(swissprot_ids, sequences))
38-
return swissprot2sequence_dict
39-
4031
def get_uniprot2sequence_dict( organism = "human"):
4132
swissprot_file = get_swissprot_path(organism)
4233
swissprot_df = pd.read_csv(swissprot_file, sep = '\t', usecols=['Entry', 'Sequence'])
Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +0,0 @@
1-
import numpy as np
2-
def subset_df_to_n_most_complete_proteins(proteome_df_aq_reformat, proteome_df_original, n = 100, protein_header = "PG.ProteinGroups",
3-
protein_subset_to_use = None, use_only_complete_columns = False):
4-
proteome_df_aq_reformat = proteome_df_aq_reformat.set_index(["protein", "quant_id"]).replace(0, np.nan)
5-
if use_only_complete_columns:
6-
proteome_df_aq_reformat = proteome_df_aq_reformat.dropna()
7-
8-
proteome_df_aq_reformat = proteome_df_aq_reformat.reset_index()
9-
10-
set_of_proteins = set(proteome_df_aq_reformat["protein"].unique())
11-
if protein_subset_to_use is not None:
12-
set_of_proteins = protein_subset_to_use.intersection(set_of_proteins)
13-
14-
return np.random.choice(list(set_of_proteins), n)
Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,2 @@
11
import pandas as pd
22
import numpy as np
3-
4-
5-
6-
def find_non_outlier_indices_ipr(data, threshold=1.5, percentile_lower = 25, percentile_upper = 75):
7-
8-
value_lower, value_upper = np.percentile(data, [percentile_lower, percentile_upper])
9-
iqr = value_upper - value_lower
10-
11-
# Calculate the bounds for non-outliers
12-
cut_off = iqr * threshold
13-
lowest_tolerated_value = value_lower - cut_off
14-
highest_tolerated_value = value_upper + cut_off
15-
16-
# Identify non-outlier indices
17-
non_outlier_indices = np.where((data >= lowest_tolerated_value) & (data <= highest_tolerated_value))[0]
18-
19-
return non_outlier_indices

alphaquant/utils/utils.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -55,19 +55,6 @@ def cut_trailing_parts_seqstring(seqstring):
5555
def get_condpairname(condpair):
5656
return f"{condpair[0]}_VS_{condpair[1]}"
5757

58-
def get_condpair_from_condpairname(condpairname):
59-
return condpairname.split(aq_variables.CONDITION_PAIR_SEPARATOR)
60-
61-
62-
def convert_ion_string_to_node_type(ionstring, node_type): #for example I have a full quant_id that describes a fragment ion, I want to shorten it to the specified leve, e.g. sequence
63-
regex = NODETYPE2REGEX[node_type]
64-
match = re.match(regex, ionstring)
65-
if match:
66-
return match.group(1)
67-
else:
68-
raise ValueError(f"Could not match {ionstring} to {node_type}. This function only works for the following node types: seq, mod_seq, mod_seq_charge")
69-
70-
7158
def get_progress_folder_filename(input_file, file_ending, remove_extension = True): #file ending needs to include all dots, e.g. ".aq_reformat.tsv"
7259
input_file = os.path.abspath(input_file) #to make sure that the path is absolute
7360
dirname_input_file = os.path.dirname(input_file)

0 commit comments

Comments
 (0)