From f8168afa68ef4e87ee4c463259ce9ddd9de25c40 Mon Sep 17 00:00:00 2001
From: ammarcsj <70114795+ammarcsj@users.noreply.github.com>
Date: Mon, 25 May 2026 10:05:03 +0200
Subject: [PATCH 1/3] Delete unused modules (outlier_scoring,
 diffresults_handling, phospho_inference)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 alphaquant/cluster/outlier_scoring.py        | 397 -------------------
 alphaquant/multicond/diffresults_handling.py | 128 ------
 alphaquant/ptm/phospho_inference.py          |  27 --
 3 files changed, 552 deletions(-)
 delete mode 100644 alphaquant/multicond/diffresults_handling.py
 delete mode 100644 alphaquant/ptm/phospho_inference.py

diff --git a/alphaquant/cluster/outlier_scoring.py b/alphaquant/cluster/outlier_scoring.py
index 7b33caf2..cbc9925b 100644
--- a/alphaquant/cluster/outlier_scoring.py
+++ b/alphaquant/cluster/outlier_scoring.py
@@ -115,400 +115,3 @@ def __remove_peptide_if_necessary__(self, peptide_node):
 
 
 
-class ProtNodeRetriever():
-    @staticmethod
-    def get_protnodes_from_diffclust_list(condpair_tree, diffclust_list):
-        proteins_wanted = {x.protein_name for x in diffclust_list}
-        protnodes = list(filter(lambda x : x.name in proteins_wanted, condpair_tree.children))
-        return protnodes
-
-# Cell
-
-class OutlierPeptideLoader():
-    def __init__(self, condpair_tree):
-        self._condpair_tree = condpair_tree
-        self.outlier_peptides = []
-        self._add_outlier_peptides()
-
-    def _add_outlier_peptides(self):
-        for protnode in self._condpair_tree.children:
-            nodechecker = ProtnodeClusterCheckerPeptideInfos(protnode)
-            self.outlier_peptides += nodechecker.get_outlier_peptide_infos()
-
-
-class ProtnodeClusterCheckerPeptideInfos(ProtnodeClusterChecker):
-    def __init__(self, protnode):
-        super().__init__(protnode)
-        self._outlier_peptide_infos = []
-
-    def get_outlier_peptide_infos(self):
-        diffclusts = self.get_diffclusts()
-        for clusterdiffinfo in diffclusts:
-            self._update_outlier_peptide_infos(clusterdiffinfo)
-        return self._outlier_peptide_infos
-
-    def _update_outlier_peptide_infos(self, clusterdiffinfo):
-        peptide_nodes = self._get_outlier_peptide_nodes(clusterdiffinfo)
-        for peptide_node in peptide_nodes:
-            self._outlier_peptide_infos.append(OutlierPeptideInfo(peptide_node))
-
-    def _get_outlier_peptide_nodes(self, clusterdiffinfo):
-        peptide_names = set(clusterdiffinfo.outlier_peptide_names)
-        return anytree.findall(self._protnode, filter_= lambda x : x.name in peptide_names, maxlevel=2)
-
-
-class ProteinInfo():
-    def __init__(self, peptide_node):
-        self.protein_fc = self._get_protein_fc(peptide_node)
-
-    def _get_protein_fc(self, peptide_node):
-        return aqclustutils.find_node_parent_at_level(peptide_node, "gene").fc
-
-
-class OutlierPeptideInfo(ProteinInfo):
-    def __init__(self, peptide_node):
-        super().__init__(peptide_node)
-        self._peptide_node = peptide_node
-        self.peptide_sequence = peptide_node.name
-        self.fc = peptide_node.fc
-        self.quality_score = self._get_quality_score(peptide_node)
-        self.protnormed_fc = None
-        self.num_mainclust_peptides = self._get_number_mainclust_peptides()
-        self._calc_protnormed_fc()
-
-    def _get_quality_score(self, peptide_node):
-        has_ml_score = hasattr(peptide_node, 'ml_score')
-        if has_ml_score:
-            return abs(peptide_node.ml_score)
-        else:
-            return 1/peptide_node.fraction_consistent
-
-    def _calc_protnormed_fc(self):
-        self.protnormed_fc = self.fc - self.protein_fc
-
-    def _get_number_mainclust_peptides(self):
-        samelevel_nodes = self._peptide_node.parent.children
-        mainclust_nodes = filter(lambda x : x.cluster ==0, samelevel_nodes)
-        return len(list(mainclust_nodes))
-
-
-# Cell
-import anytree
-
-
-class ModifiedPeptideLoader():
-    def __init__(self, condpair_tree, specific_modification = "[Phospho (STY)]"):
-        self.specific_modification = specific_modification
-        self.condpair_tree = condpair_tree
-        self._pepname2modpep = {}
-        self._load_modified_peptides_from_tree()
-
-    def get_modpep_from_sequence(self, peptide_sequence):
-        return self._pepname2modpep.get(peptide_sequence)
-
-    def _load_modified_peptides_from_tree(self):
-        modified_pepnodes = self._get_modified_peptide_nodes()
-        for mod_pep_node in modified_pepnodes:
-            self._update_pepname2modpep(mod_pep_node)
-
-    def _get_modified_peptide_nodes(self):
-        return anytree.search.findall(self.condpair_tree, lambda x : getattr(x,'type',"") == 'mod_seq', maxlevel=4)
-
-    def _update_pepname2modpep(self, mod_pep_node):
-        modified_peptide = PeptideWithSpecificModification(mod_pep_node, self.specific_modification)
-        if modified_peptide.specific_modification_found:
-            self._pepname2modpep[modified_peptide.peptide_sequence] = modified_peptide
-
-
-class PeptideWithSpecificModification(OutlierPeptideInfo):
-    def __init__(self, node_modpeptide, specific_modification= "[Phospho (STY)]"):
-        self.protein_name = self._get_protein_name(node_modpeptide)
-        self.modified_sequence = node_modpeptide.name
-        self.specific_modification_found = self._check_for_specific_modification(specific_modification)
-        if not self.specific_modification_found:
-            return
-        self.peptide_sequence = self._get_peptide_sequence(node_modpeptide)
-        self.fc = node_modpeptide.fc
-        self.quality_score = self._get_quality_score(node_modpeptide)
-
-    def _check_for_specific_modification(self, specific_modification):
-        return specific_modification in self.modified_sequence
-
-    def _get_peptide_sequence(self, node_modpeptide):
-        pepnode = aqclustutils.find_node_parent_at_level(node_modpeptide, level='seq')
-        return pepnode.name
-
-    def _get_protein_name(self, node_modpeptide):
-        pepnode = aqclustutils.find_node_parent_at_level(node_modpeptide, level='gene')
-        return pepnode.name
-
-# Cell
-import numpy as np
-class ComplementedClusterLoader():
-    def __init__(self, outlier_peptide_loader, modified_peptide_loader):
-        self._outlier_peptides = outlier_peptide_loader.outlier_peptides
-        self._modified_peptide_loader = modified_peptide_loader
-        self.complemented_clusters = []
-        self._find_complemented_clusters()
-
-    def _find_complemented_clusters(self):
-        for outlier_peptide in self._outlier_peptides:
-            modified_peptide = self._get_modified_peptide(outlier_peptide)
-            if modified_peptide is not None:
-                self.complemented_clusters.append(ComplementedCluster(outlier_peptide, modified_peptide))
-
-    def _get_modified_peptide(self, outlier_peptide):
-        return self._modified_peptide_loader.get_modpep_from_sequence(outlier_peptide.peptide_sequence)
-
-
-class ComplementedCluster():
-    def __init__(self, outlier_peptide, modified_peptide):
-        self.outlier_peptide = outlier_peptide
-        self.modified_peptide = modified_peptide
-        self._add_normfc_to_modpep()
-
-    def has_opposite_regulation(self):
-        return np.sign(self.outlier_peptide.protnormed_fc) == -np.sign(self.modified_peptide.protnormed_fc)
-
-    def get_quality_score(self):
-        return max(self.outlier_peptide.quality_score, self.modified_peptide.quality_score)
-
-    def get_outlier_quality_score(self):
-        return self.outlier_peptide.quality_score
-
-    def get_modpep_quality_score(self):
-        return self.modified_peptide.quality_score
-
-    def get_min_abs_normfc(self):
-        return min(abs(self.outlier_peptide.protnormed_fc), abs(self.modified_peptide.protnormed_fc))
-
-    def get_max_abs_normfc(self):
-        return max(abs(self.outlier_peptide.protnormed_fc), abs(self.modified_peptide.protnormed_fc))
-
-    def get_outlier_abs_normfc(self):
-        return abs(self.outlier_peptide.protnormed_fc)
-
-    def get_ptm_abs_normfc(self):
-        return abs(self.modified_peptide.protnormed_fc)
-
-    def get_ptm_abs_fc(self):
-        return abs(self.modified_peptide.fc)
-
-    def get_number_mainclust_peptides(self):
-        return self.outlier_peptide.num_mainclust_peptides
-
-    def _add_normfc_to_modpep(self):
-        self.modified_peptide.protein_fc = self.outlier_peptide.protein_fc
-        self.modified_peptide._calc_protnormed_fc()
-
-
-
-# Cell
-import numpy as np
-import matplotlib.pyplot as plt
-import seaborn as sns
-import scipy.stats
-
-
-class ComplementedClusterEvaluator():
-    def __init__(self, complmented_clusters):
-        self._complemented_clusters = complmented_clusters
-        self._fcs_outliers = None
-        self._fcs_modpeps = None
-        self._assign_fold_change_lists()
-
-
-    def compare_regulation_directions(self, ax):
-        opposite_regulation_overview = [int(x.has_opposite_regulation()) for x in self._complemented_clusters]
-        self._plot_regulation_direction_histogram(ax, opposite_regulation_overview)
-
-    def scatter_fold_changes(self,ax):
-
-        num_opposite = sum([np.sign(x[0])==-np.sign(x[1]) for x in zip(self._fcs_outliers, self._fcs_modpeps)])
-        num_same = sum([np.sign(x[0])==np.sign(x[1]) for x in zip(self._fcs_outliers, self._fcs_modpeps)])
-        LOGGER.info(f"{num_same} same, {num_opposite} opposite")
-        sns.scatterplot(x =self._fcs_outliers, y=self._fcs_modpeps,ax=ax)
-        ax.set_xlabel("outliers")
-        ax.set_ylabel("modified_peptides")
-        self._set_axis_limits(ax)
-        self._draw_horizontal_vertical_line(ax)
-
-    def calculate_correlation(self):
-        r, p = scipy.stats.pearsonr(self._fcs_outliers, self._fcs_modpeps)
-        LOGGER.info(f"pval is {p}")
-        return r
-
-
-    def _assign_fold_change_lists(self):
-        self._fcs_outliers = list([x.outlier_peptide.protnormed_fc for x in self._complemented_clusters])
-        self._fcs_modpeps = list([x.modified_peptide.protnormed_fc for x in self._complemented_clusters])
-
-    def _set_axis_limits(self,ax):
-        all_lims = ax.get_xlim() + ax.get_ylim() #returns tuples with the lims
-        most_extreme_val =  max((abs(x) for x in all_lims))
-        ax.set_xlim(-most_extreme_val, most_extreme_val)
-        ax.set_ylim(-most_extreme_val, most_extreme_val)
-
-    def _draw_horizontal_vertical_line(self, ax):
-        ax.hlines(y=0, xmin=ax.get_xlim()[0], xmax=ax.get_xlim()[1], colors='black')
-        ax.vlines(x=0, ymin = ax.get_ylim()[0], ymax = ax.get_ylim()[1], colors='black')
-
-
-
-
-    @staticmethod
-    def _plot_regulation_direction_histogram(ax, opposite_regulation_overview):
-        ax.hist(opposite_regulation_overview)
-
-
-
-
-# Cell
-class QuantileFilterer():
-    def __init__(self, objects_to_filter, filterconfigs):
-        """Filters objects (e.g. ClusterDiffInfos) based on the
-        FilterConfigs defined below. The objects need to have
-        a function with the identical name as in FilterConfig.property_name
-        which returns a scalar. The quantiles are defined with respect to this scalar.
-
-        Args:
-            objects_to_filter (_type_): _description_
-            filterconfigs (_type_): _description_
-        """
-
-        self._objects_to_filter = objects_to_filter
-        self._filterconfigs = filterconfigs
-
-    def get_filtered_list_of_objects(self):
-        if self._filterconfigs is None:
-            return self._objects_to_filter
-        else:
-            return self._filter_objects()
-
-    def _filter_objects(self):
-        individually_filtered = []
-        for filterconf in self._filterconfigs:
-            filtered_cclusts = set(self._filter_to_property_quantile(filterconf))
-            individually_filtered.append(filtered_cclusts)
-        return list(set.intersection(*individually_filtered)) #we filter the quantile of the COMPLETE set for
-        #every condition and intersect in the end, alternatively one could successively filter the quantiles
-
-
-    def _filter_to_property_quantile(self, filterconf):#get the quantiles with the best property scores i.e. quality score
-        property_sorted = self._sort_objects_to_filter_by_score(filterconf.property_name)
-
-        if filterconf.quantile_starts_at_lowest:
-            number_to_retain = int(filterconf.quantile * len(property_sorted))
-            return property_sorted[:number_to_retain]
-        else:
-            number_to_discard = int((1-filterconf.quantile) * len(property_sorted))
-            return property_sorted[number_to_discard:]
-
-    def _sort_objects_to_filter_by_score(self, property_name):
-        try:
-            return self._property_encodes_instance_variable(property_name)
-        except:
-            return self._property_encodes_function_name(property_name)
-
-    def _property_encodes_instance_variable(self, property_name):
-        return sorted(self._objects_to_filter, key = lambda x : getattr(x, property_name))
-
-    def _property_encodes_function_name(self, property_name):
-        return sorted(self._objects_to_filter, key = lambda x : getattr(x, property_name)())
-
-
-
-class FilterConfig():
-    def __init__(self, property_name, quantile, quantile_starts_at_lowest):
-        self.property_name = property_name
-        self.quantile = quantile
-        self.quantile_starts_at_lowest = quantile_starts_at_lowest
-
-
-
-# Cell
-class ComplementedClusterFilterer(QuantileFilterer):
-    def __init__(self, complemented_clusterloader, clusterfilterconfigs):
-        super().__init__(complemented_clusterloader.complemented_clusters, clusterfilterconfigs)
-
-    def get_filtered_complemented_clusters(self):
-        return self.get_filtered_list_of_objects()
-
-
-
-class ComplementedClusterFilterConfigs():
-    def __init__(self, min_abs_normfc_quantile = 1, ptm_abs_normfc_quantile = 1, outlier_abs_normfc_quantile = 1, ptm_absfc_quantile = 1,
-    modpep_quality_quantile = 1, outlier_quality_quantile = 1, number_mainclustpeps_quantile = 1):
-        self.filterconfigs = []
-        self._min_abs_normfc_quantile =min_abs_normfc_quantile
-        self._ptm_abs_normfc_quantile=ptm_abs_normfc_quantile
-        self._number_mainclustpeps_quantile = number_mainclustpeps_quantile
-        self._outlier_abs_normfc_quantile= outlier_abs_normfc_quantile
-        self._ptm_absfc_quantile=ptm_absfc_quantile
-        self._outlier_quality_quantile=outlier_quality_quantile
-        self._modpep_quality_quantile = modpep_quality_quantile
-        self._number_mainclustpeps_quantile = number_mainclustpeps_quantile
-        self._initialize_filter_configs()
-
-    def _initialize_filter_configs(self):
-        self.filterconfigs.append(FilterConfig("get_min_abs_normfc", self._min_abs_normfc_quantile, False))
-        self.filterconfigs.append(FilterConfig("get_ptm_abs_normfc", self._ptm_abs_normfc_quantile, False))
-        self.filterconfigs.append(FilterConfig("get_ptm_abs_fc", self._ptm_absfc_quantile, False))
-        self.filterconfigs.append(FilterConfig("get_outlier_abs_normfc", self._outlier_abs_normfc_quantile, False))
-        self.filterconfigs.append(FilterConfig("get_outlier_quality_score", self._outlier_quality_quantile, True))
-        self.filterconfigs.append(FilterConfig("get_modpep_quality_score", self._modpep_quality_quantile, True))
-        self.filterconfigs.append(FilterConfig("get_number_mainclust_peptides", self._number_mainclustpeps_quantile, False))
-
-
-
-
-# Cell
-
-class DiffClusterFilterer(QuantileFilterer):
-    def __init__(self, diffclust_list, diffclustfilterconfigs):
-        super().__init__(diffclust_list, diffclustfilterconfigs)
-
-    def get_filtered_diffclust_list(self):
-        return self.get_filtered_list_of_objects()
-
-
-class DiffClusterFilterConfig(FilterConfig):
-    def __init__(self, fcdiff_quantile = 1, quality_score_quantile = 1, num_mainclust_peptides_quantile = 1, num_outlierclust_peptides_quantile = 1):
-        self.filterconfigs = []
-        self._fcdiff_quantile = fcdiff_quantile
-        self._quality_score_quantile = quality_score_quantile
-        self._num_mainclust_peptides_quantile = num_mainclust_peptides_quantile
-        self._num_outlierclust_peptides_quantile = num_outlierclust_peptides_quantile
-        self._initialize_filter_configs()
-
-    def _initialize_filter_configs(self):
-        self.filterconfigs.append(FilterConfig("fcdiff", self._fcdiff_quantile,False))
-        self.filterconfigs.append(FilterConfig("quality_score", self._quality_score_quantile, True))
-        self.filterconfigs.append(FilterConfig("get_num_mainclust_peptides", self._num_mainclust_peptides_quantile, False))
-        self.filterconfigs.append(FilterConfig("get_num_outlierclust_peptides", self._num_outlierclust_peptides_quantile, False))
-
-
-
-
-# Cell
-
-class OutlierPeptideFilterer(QuantileFilterer):
-    def __init__(self, outlier_peptide_list, outlierpeptide_filterconfigs):
-        super().__init__(outlier_peptide_list, outlierpeptide_filterconfigs)
-
-    def get_filtered_outlier_peptide_list(self):
-        return self.get_filtered_list_of_objects()
-
-
-class OutlierPeptideFilterConfigs(FilterConfig):
-    def __init__(self, quality_score_quantile = 1, num_mainclust_peptides_quantile = 1, protnormed_fc_quantile = 1):
-        self.filterconfigs = []
-        self._quality_score_quantile = quality_score_quantile
-        self._num_mainclust_peptides_quantile = num_mainclust_peptides_quantile
-        self._protnormed_fc_quantile = protnormed_fc_quantile
-        self._initialize_filter_configs()
-
-    def _initialize_filter_configs(self):
-        self.filterconfigs.append(FilterConfig("quality_score", self._quality_score_quantile, True))
-        self.filterconfigs.append(FilterConfig("num_mainclust_peptides", self._num_mainclust_peptides_quantile, False))
-        self.filterconfigs.append(FilterConfig("protnormed_fc", self._protnormed_fc_quantile, False))
diff --git a/alphaquant/multicond/diffresults_handling.py b/alphaquant/multicond/diffresults_handling.py
deleted file mode 100644
index 43a1c2ef..00000000
--- a/alphaquant/multicond/diffresults_handling.py
+++ /dev/null
@@ -1,128 +0,0 @@
-
-
-class QuantifiedMultiConditionComparison():
-    """
-    class that contains all information to be used for the multi-condition
-    analysis.
-    """
-    def __init__(self):
-        self._proteinname2multicondprots = {}
-
-    def add_quantified_proteins_to_comparison(self, quantified_proteins):
-        for quantified_protein in quantified_proteins:
-            self._initialize_multicond_protein_if_necessary(quantified_protein)
-            self._add_quantified_protein_to_multicond(quantified_protein)
-
-    def get_quantified_protein(self, protein_name, condpair):
-        multicondprot =  self._proteinname2multicondprots.get(protein_name)
-        return multicondprot.condpair2quantified_protein.get(condpair)
-
-
-    def _initialize_multicond_protein_if_necessary(self, quantified_protein):
-        if quantified_protein.name not in self._proteinname2multicondprots:
-            self._proteinname2multicondprots[quantified_protein.name] = QuantifiedProteinMultiCondition(quantified_protein.name)
-
-    def _add_quantified_protein_to_multicond(self, quantified_protein):
-        multicondprot = self._proteinname2multicondprots.get(quantified_protein.name)
-        multicondprot.condpair2quantified_protein[quantified_protein.condpair] = quantified_protein
-
-class QuantifiedProteinMultiCondition():
-    def __init__(self, name):
-        self.name = name
-        self.condpair2quantified_protein = {}
-
-
-class QuantifiedProteinInCondpair():
-    def __init__(self, condpair, name, log2fc, p_value, fdr):
-        self.condpair = condpair
-        self.name = name
-        self.log2fc = log2fc
-        self.p_value = p_value
-        self.fdr = fdr
-
-
-class QuantifiedProteinCanditateInCondpair(QuantifiedProteinInCondpair):
-    """stump for cluster-based protein processing"""
-    def __init__(self, peptide_nodes_of_cluster):
-        self.peptides = None
-
-    def _get_quantprot_properties_from_peptide_nodes(self, peptide_nodes_of_cluster):
-        pass
-
-
-# Cell
-
-class ResultsDirectoryReader():
-    def __init__(self, results_dir, condpairs_selected):
-        self.quantified_multicondition_comparison = QuantifiedMultiConditionComparison() #initialize empty multicomparison object
-
-        self.__condpairs_selected = condpairs_selected
-        self.__localizer = ResultstableLocalizer(results_dir)
-        self._add_all_condpairs_to_multicondition_comparison()
-
-    def _add_all_condpairs_to_multicondition_comparison(self):
-        for condpair in self.__condpairs_selected:
-            file = self.__localizer.condpairname2file.get(condpair)
-            quantified_proteins = ResultsTableReader(condpair, file).quantified_proteins
-            self.quantified_multicondition_comparison.add_quantified_proteins_to_comparison(quantified_proteins)
-
-# Cell
-import pandas as pd
-
-class ResultsTableReader():
-    def __init__(self, condpair, file):
-        self.quantified_proteins = []
-
-        self.__condpair = condpair
-        self.__results_df = self._read_table(file)
-        self.__protnames = self._get_property("protein")
-        self.__log2fcs = self._get_property("log2fc")
-        self.__p_values = self._get_property("pval")
-        self.__fdrs = self._get_property("fdr")
-
-        self._init_quantified_proteins()
-
-    def _read_table(self, file):
-        return pd.read_csv(file, sep = "\t")
-
-    def _get_property(self, property):
-        return list(self.__results_df[property])
-
-    def _init_quantified_proteins(self):
-        for idx in range(len(self.__protnames)):
-            self.quantified_proteins.append(QuantifiedProteinInCondpair(condpair=self.__condpair, name = self.__protnames[idx],
-            log2fc= self.__log2fcs[idx], p_value=self.__p_values[idx], fdr = self.__fdrs[idx]))
-
-
-
-
-
-# Cell
-import re
-class ResultstableLocalizer():
-    def __init__(self, results_dir):
-        self._resultsfilepaths = ResultsFiles(results_dir=results_dir).filepaths
-        self.condpairname2file = {}
-        self._load_condpairname2file()
-
-    def _load_condpairname2file(self):
-        for filepath in self._resultsfilepaths:
-            filename = self._parse_condpairname_from_filepath(filepath)
-            self.condpairname2file.update({filename: filepath})
-
-    @staticmethod
-    def _parse_condpairname_from_filepath(file):
-        pattern = "(.*\/|^)(results.*\/)(.*)(.results.tsv)"
-        matched = re.search(pattern, file)
-        return matched.group(3)
-
-
-import glob
-class ResultsFiles():
-    def __init__(self, results_dir):
-        self._results_dir = results_dir
-        self.filepaths = self.__get_result_filepaths()
-
-    def __get_result_filepaths(self):
-        return glob.glob(f'{self._results_dir}/*.results.tsv')
-
diff --git a/alphaquant/ptm/phospho_inference.py b/alphaquant/ptm/phospho_inference.py
deleted file mode 100644
index 851d2c7f..00000000
--- a/alphaquant/ptm/phospho_inference.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import os
-import pathlib
-import pandas as pd
-import alphaquant.cluster.outlier_scoring as aqoutlier
-import alphaquant.resources.database_loader as aq_resource_dbloader
-
-def get_inferred_phospho_peptides(results_dir, cond1, cond2):
-    outlier_handler = aqoutlier.OutlierHandler(results_dir = results_dir, cond1 = cond1, cond2 = cond2)
-    clusterdiff_list = outlier_handler.get_diffclust_overview_list()
-    predicted_phosphoprone_sequences = aq_resource_dbloader.load_dl_predicted_phosphoprone_sequences()
-    inferred_phospho_peptides = get_regulation_inferred_phosphoprone_peptides(predicted_phosphoprone_sequences, clusterdiff_list)
-    return inferred_phospho_peptides
-
-
-
-
-def get_regulation_inferred_phosphoprone_peptides(phosphoprone_seqs, clusterdiff_list):
-    regulation_inferred_phosphoprone_peptides = []
-    for clusterdiff in clusterdiff_list:
-        cluster_is_phosphoprone = False
-        for seq in clusterdiff.outlier_peptide_names:
-            if seq in phosphoprone_seqs:
-                cluster_is_phosphoprone = True
-                break
-        if cluster_is_phosphoprone:
-            regulation_inferred_phosphoprone_peptides.extend(clusterdiff.outlier_peptide_names)
-    return set(regulation_inferred_phosphoprone_peptides)

From d482eea5f1229fb85b69fcaa9673c1598a715837 Mon Sep 17 00:00:00 2001
From: ammarcsj <70114795+ammarcsj@users.noreply.github.com>
Date: Mon, 25 May 2026 10:05:11 +0200
Subject: [PATCH 2/3] Remove dead plotting functions

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 alphaquant/plotting/base_functions.py         | 914 +-----------------
 alphaquant/plotting/classify.py               |  16 -
 alphaquant/plotting/fcviz_medianref.py        |   8 -
 alphaquant/plotting/multicond.py              |  47 -
 alphaquant/plotting/pairwise.py               |  38 -
 alphaquant/ui/dashboad_parts_plots_basic.py   |   7 -
 .../ui/dashboard_parts_plots_proteoforms.py   |  22 -
 7 files changed, 2 insertions(+), 1050 deletions(-)

diff --git a/alphaquant/plotting/base_functions.py b/alphaquant/plotting/base_functions.py
index 579272ef..762a3162 100644
--- a/alphaquant/plotting/base_functions.py
+++ b/alphaquant/plotting/base_functions.py
@@ -86,73 +86,6 @@ def plot_pvals(result_df):
     plt.plot(x, x)
     plt.show()
 
-# Cell
-from scipy.stats import norm
-import matplotlib.pyplot as plt
-
-def plot_bgdist(bgdist):
-    fc2counts_rescaled = tranform_fc2count_to_fc_space(bgdist.fc2counts, bgdist.cumulative[-1],1/100.0)
-
-    plt.bar(list(fc2counts_rescaled.keys()), fc2counts_rescaled.values(),width=0.01,color='g')
-    axes2 = plt.twinx()
-    x = np.linspace(-4, 4, 1000)
-    axes2.plot(x, norm.pdf(x, 0, bgdist.SD)/1.15)
-    axes2.set_ylim(0.0, 0.4)
-    plt.show()
-
-def tranform_fc2count_to_fc_space(fc2counts, num_fcs, rescale_factor):
-    fc2counts_fcscales = {}
-    for fc, count in fc2counts.items():
-        fc2counts_fcscales[fc*rescale_factor] = count/num_fcs
-
-    return fc2counts_fcscales
-
-# Cell
-
-import matplotlib.pyplot as plt
-from scipy import stats
-import itertools
-
-def plot_withincond_fcs(normed_intensity_df, cut_extremes = True):
-    """takes a normalized intensity dataframe and plots the fold change distribution between all samples. Column = sample, row = ion"""
-
-    samplecombs = list(itertools.combinations(normed_intensity_df.columns, 2))
-
-    for spair in samplecombs:#compare all pairs of samples
-        s1 = spair[0]
-        s2 = spair[1]
-        diff_fcs = normed_intensity_df[s1].to_numpy() - normed_intensity_df[s2].to_numpy() #calculate fold changes by subtracting log2 intensities of both samples
-
-        if cut_extremes:
-            cutoff = max(abs(np.nanquantile(diff_fcs,0.025)), abs(np.nanquantile(diff_fcs, 0.975))) #determine 2.5% - 97.5% interval, i.e. remove extremes
-            range = (-cutoff, cutoff)
-        else:
-            range = None
-        plt.hist(diff_fcs,80,density=True, histtype='step',range=range) #set the cutoffs to focus the visualization
-        plt.xlabel("log2 peptide fcs")
-
-    plt.show()
-
-# Cell
-from anytree.exporter import DotExporter
-import anytree
-
-def get_iontree_img(root, protein,saveloc = None):
-    protein_node = anytree.findall_by_attr(root, protein, maxlevel=2)[0]
-    exporter = DotExporter(protein_node, nodenamefunc=lambda n: f"{get_nodename(n)}\ncluster{n.cluster}\n{n.is_included}")
-    exporter.to_picture(saveloc)
-
-def get_nodename(node):
-    parentname = node.parent.name
-    shortened_name = node.name.replace(parentname, "")
-    return shortened_name
-
-# Cell
-
-import matplotlib.pyplot as plt
-from scipy import stats
-
-
 # Cell
 import matplotlib.pyplot as plt
 def scatter_df_columns(merged_df, log_axes = False):
@@ -171,57 +104,6 @@ def scatter_df_columns(merged_df, log_axes = False):
             plt.yscale('log')
         plt.show()
 
-# Cell
-import matplotlib.pyplot as plt
-def plot_cumhist_dfcols(merged_df):
-    col = (0.2, 0.4, 0.6, 0.4)
-    ref_columns = list(filter(lambda x : "_ref" in x, merged_df.columns.to_list())) #filter the reference columns from the merged df
-
-    for ref in ref_columns:
-        compare = ref.replace("_ref", "")
-        plt.hist(merged_df[ref], 100, density=True, histtype='step', label='reference')
-        plt.hist(merged_df[compare], 100, density=True, histtype='step',label='compare')
-        corr = merged_df[ref].corr(merged_df[compare])
-        plt.title(f"{ref} vs. {compare} corr {corr}")
-        plt.show()
-
-# Cell
-
-# Cell
-import matplotlib.pyplot as plt
-import seaborn as sns
-
-def plot_fold_change(df, key1, key2):
-    to_plot = df.copy()
-    to_plot[f'Ratio ({key1}/{key2})'] = np.log2(to_plot[key1] / to_plot[key2])
-    to_plot[f'Inten_{key1}'] = np.log10(to_plot[key1])
-
-    species = 'Human'
-    val = to_plot.loc[to_plot['species']==species, f'Ratio ({key1}/{key2})'].values
-    val = val[~np.isnan(val)&~np.isinf(val)&~np.isneginf(val)]
-    print(f'Species={species}, n={len(val)}, median={np.median(val)}, dev={np.std(val)}')
-    species='Ecoli'
-    val = to_plot.loc[to_plot['species']==species, f'Ratio ({key1}/{key2})'].values
-    val = val[~np.isnan(val)&~np.isinf(val)&~np.isneginf(val)]
-    print(f'species={species}, n={len(val)}, median={np.median(val)}, dev={np.std(val)}')
-
-    plt.figure(figsize=(7,7))
-    ax = sns.scatterplot(x=f'Ratio ({key1}/{key2})', y=f'Inten_{key1}', hue="species", data=to_plot, alpha=0.5)
-    plt.title('Fold Change')
-    plt.xlim([-4.5, 6.5])
-    #plt.ylim([6,11.5])
-    plt.show()
-
-
-# Cell
-
-import matplotlib.pyplot as plt
-
-import numpy as np
-
-
-
-
 # Cell
 import anytree
 import time
@@ -251,263 +133,6 @@ def get_melted_protein_ion_intensity_table(protein, diffresults_df, normed_df, s
     print(f"times melted protein intensities:\n t_melted: {t_melted - t_start} \n t_annotated: {t_annotated - t_melted}")
     return df_melted, diffresults_line
 
-# Cell
-
-import time
-def get_betweencond_fcs_table(melted_df, c1, c2, ion_header = "quant_id"):
-    t_start = time.time()
-    has_clust_info = "is_included" in melted_df.columns
-    melted_df = melted_df.set_index(["condition"])
-    sorted_ions = melted_df[aq_variables.QUANT_ID]
-    c1_df = melted_df.loc[c1]
-    c2_df = melted_df.loc[c2]
-    ions = set(c1_df[ion_header]).intersection(set(c2_df[ion_header]))
-    sorted_ions = [x for x in sorted_ions if x in ions]
-    c1_df = c1_df.set_index([ion_header]).sort_index()
-    c2_df = c2_df.set_index([ion_header]).sort_index()
-    result_ions = []
-    result_fcs = []
-    result_included = []
-    t_localized_index_set = time.time()
-    for ion in sorted_ions:
-        is_included = c1_df.loc[ion]["is_included"][0] if has_clust_info else True
-
-        ions1 = c1_df.loc[[ion]]["intensity"]
-        ions2 = c2_df.loc[[ion]]["intensity"]
-        fcs = [x-y for x,y in itertools.product(ions1, ions2)]
-        result_ions.extend([ion for x in range(len(fcs))])
-        result_fcs.extend(fcs)
-        result_included.extend([is_included for x in range(len(fcs))])
-    t_list_created = time.time()
-    res_df = pd.DataFrame({ion_header: result_ions, "log2fc": result_fcs, "is_included" : result_included})
-    print(f"times betweencond fcs table:\n t_localized_index_set: {t_localized_index_set - t_start} \n t_list_created: {t_list_created - t_localized_index_set}")
-    return res_df
-
-# Cell
-
-def beeswarm_ion_plot(df_melted, diffresults_protein, only_boxplot = False,saveloc = None):
-    """takes pre-formatted long-format dataframe which contains all ion intensities for a given protein.
-      Columns are QUANT_ID, "intensity", "condition". Also takes results of the protein differential analysis as a series
-      to annotate the plot"""
-
-    #get annotations from diffresults
-    fdr = float(diffresults_protein.at["fdr"])
-    log2fc = float(diffresults_protein.at["log2fc"])
-    protein = diffresults_protein.name
-
-    #define greyscale color palette for the two conditions
-    pal2 = [(0.94, 0.94, 0.94),(1.0, 1.0, 1.0)]
-
-    #searborn standard functions
-    if only_boxplot:
-        ax = sns.boxplot(x=aq_variables.QUANT_ID, y="intensity", hue="condition", data=df_melted, palette="Set2", dodge=True)
-    else:
-        ax = sns.boxplot(x=aq_variables.QUANT_ID, y="intensity", hue="condition", data=df_melted, palette=pal2)
-        ax = sns.stripplot(x=aq_variables.QUANT_ID, y="intensity", hue="condition", data=df_melted, palette="Set2", dodge=True)#size = 10/len(protein_df.index)
-
-    #annotate and format
-    handles, labels = ax.get_legend_handles_labels()
-
-    l = plt.legend(handles[2:4], labels[2:4])
-
-    plt.xticks(rotation=90)
-    if "gene" in diffresults_protein.index:
-        gene = diffresults_line.at["gene"]
-        plt.title(f"{gene} ({protein}) FDR: {fdr:.1e}")
-    else:
-        plt.title(f"{protein} FDR: {fdr:.1e} log2FC: {log2fc:.1f}")
-    if saveloc is not None:
-        plt.savefig(saveloc)
-
-    plt.show()
-
-
-# Cell
-
-import seaborn as sns
-import matplotlib.pyplot as plt
-
-def foldchange_ion_plot(df_melted, diffresults_protein, saveloc = None):
-    """takes pre-formatted long-format dataframe which contains all between condition fold changes. All ions of a given protein
-    are visualized, the columns are aq_variables.QUANT_ID and "log2fc".  Also takes results of the protein differential analysis as a series
-      to annotate the plot"""
-    #get annotations from diffresults
-    fdr = float(diffresults_protein.at["fdr"])
-    protein = diffresults_protein.name
-
-    #define greyscale color palette
-    pal2 = [(0.94, 0.94, 0.94),(1.0, 1.0, 1.0)]
-
-    #specify color for included proteins
-    my_pal = {row[aq_variables.QUANT_ID]: "lightblue" if row["is_included"] else "grey" for _,row in df_melted.iterrows()}
-
-    #plot with seaborn standard functions
-    ax = sns.boxplot(x=aq_variables.QUANT_ID, y="log2fc", data=df_melted, color = "white")
-    ax = sns.stripplot(x=aq_variables.QUANT_ID, y="log2fc", data=df_melted, palette= my_pal)
-
-    #annotate and format
-    handles, labels = ax.get_legend_handles_labels()
-    ax.axhline(y = 0, color='black', linewidth=2, alpha=.7, linestyle = "dashed")
-    l = plt.legend(handles[2:4], labels[2:4])
-    plt.xticks(rotation=90)
-    if "gene" in diffresults_protein.index:
-        gene = diffresults_line.at["gene"]
-        plt.title(f"{gene} ({protein}) FDR: {fdr:.1e}")
-    else:
-        plt.title(f"{protein} FDR: {fdr:.1e}")
-    if saveloc is not None:
-        plt.savefig(saveloc)
-
-    plt.show()
-
-
-
-# Cell
-import numpy.ma as ma
-import scipy.cluster.hierarchy as hierarchy
-
-def compare_direction(array1, array2):
-    identical_elements  = array1 == array2
-    num_same_direction = np.sum(identical_elements)
-    return num_same_direction
-
-def compare_correlation(array1, array2):
-    corr = ma.corrcoef(ma.masked_invalid(array1), ma.masked_invalid(array2))[0][1]
-    return corr
-
-def get_condensed_distance_matrix(arrays, compare_function):
-
-    res = np.ones(int(len(arrays) * (len(arrays)-1)/2))
-    count = 0
-    for i in range(len(arrays)):
-        for j in range(i+1, len(arrays)):
-            array1 = arrays[i]
-            array2 = arrays[j]
-            distance = 1/compare_function(array1, array2)
-            res[count] = distance
-            count+=1
-
-    return res
-
-def clustersort_numerical_arrays(arrays, names , cluster_method ='average',compare_function = compare_direction):
-    condensed_distance_matrix = get_condensed_distance_matrix(arrays, compare_function)
-    linkage_matrix = hierarchy.linkage(condensed_distance_matrix, method = cluster_method)
-    sorted_array_idxs = hierarchy.leaves_list(linkage_matrix)
-
-    sorted_array = [arrays[x] for x in sorted_array_idxs]
-    sorted_names = [names[x] for x in sorted_array_idxs]
-    return sorted_array, sorted_names, linkage_matrix
-
-# Cell
-import numpy as np
-def compare_direction(array1, array2):
-    identical_elements  = array1 == array2
-    num_same_direction = np.sum(identical_elements)
-    return num_same_direction
-
-# Cell
-import numpy.ma as ma
-def compare_correlation(array1, array2):
-    corr = ma.corrcoef(ma.masked_invalid(array1), ma.masked_invalid(array2))[0][1]
-    return corr
-
-# Cell
-
-import scipy.cluster.hierarchy as hierarchy
-import scipy.spatial.distance as dist
-def clustersort_numerical_arrays(arrays, names , cluster_method ='average',compare_function = compare_direction):
-    #condensed_distance_matrix = get_condensed_distance_matrix(arrays, compare_function)
-    condensed_distance_matrix = dist.pdist(arrays, lambda u, v: 1/(compare_function(u,v)+1))
-    linkage_matrix = hierarchy.linkage(condensed_distance_matrix, method = cluster_method)
-    sorted_array_idxs = hierarchy.leaves_list(linkage_matrix)
-
-    sorted_array = np.array([arrays[x] for x in sorted_array_idxs])
-    sorted_names = np.array([names[x] for x in sorted_array_idxs])
-    return sorted_array, sorted_names, linkage_matrix
-
-# Cell
-
-import pandas as pd
-import numpy as np
-import os
-def get_clustered_dataframe(overview_df, cluster_method ='average',compare_function = compare_direction, clust_rows = True, clust_columns = True):
-
-    df_numbered = overview_df.select_dtypes(include=np.number)
-    contains_floats = ['float' in str(x) for x in df_numbered.dtypes]
-    type = 'float' if True in contains_floats else 'int'
-    df_numbered = df_numbered.astype(type) #ensure that the df has no mixed types
-
-    rows = df_numbered.to_numpy()
-    rownames = list(df_numbered.index)
-    colnames = list(df_numbered.columns)
-
-    if clust_rows:
-        if(len(rownames)>10000):
-            print(f"large number of rows, skipping cluster step of rows to avoid long runtime.")
-        else:
-            print(f"clustering on {len(rownames)} rows")
-            rows, rownames, _ = clustersort_numerical_arrays(rows, rownames, cluster_method, compare_function)
-    if clust_columns:
-        if(len(colnames)>10000):
-            print(f"large number of columns, skipping cluster step of columns to avoid long runtime")
-        else:
-            print(f"clustering on {len(colnames)} columns")
-            columns, colnames,_ = clustersort_numerical_arrays(rows.T, colnames, cluster_method, compare_function)
-            rows = columns.T
-    print("finished clustering")
-    df_clustered = pd.DataFrame(rows, index= rownames)
-    df_clustered.columns = colnames
-    return df_clustered
-
-# Cell
-import re
-import os
-def get_sample_overview_dataframe(results_folder = os.path.join(".", "results"), regulated_object = "protein",condpairs_to_compare = [], show_fcs = False, name_transformation_function = None):
-    """
-    goes through the results folder and extracts up- and downregulated genes for each (specified) condition comparison
-    """
-
-    if len(condpairs_to_compare) == 0:
-        condpairs_to_compare = [f.replace(".results.tsv", "").split(aq_variables.CONDITION_PAIR_SEPARATOR) for f in os.listdir(results_folder) if re.match(r'.*results.tsv', f)]
-
-    dfs = []
-    count = 0
-    for row in condpairs_to_compare:
-        c1 = row[0]
-        c2 = row[1]
-        results = get_diffresult_dataframe(c1, c2, results_folder)
-        if(type(results) == type(None)):
-            continue
-        site_df = results
-        prot2fc = dict(zip(site_df[regulated_object], site_df["log2fc"]))
-        positive_sites = list(set(site_df[(site_df["fdr"]<0.05) & (site_df["log2fc"]>0.5)][regulated_object]))
-        negative_sites = list(set(site_df[(site_df["fdr"]<0.05) & (site_df["log2fc"]<-0.5)][regulated_object]))
-
-        if show_fcs:
-            df_local = pd.DataFrame([[prot2fc.get(x) for x in positive_sites]+[prot2fc.get(x) for x in negative_sites]],columns=positive_sites+negative_sites)
-        else:
-            df_local = pd.DataFrame([[1 for x in positive_sites]+[-1 for x in negative_sites]],columns=positive_sites+negative_sites)
-        df_local["condpair"] = utils.get_condpairname([c1, c2])
-        #df_loc["num_regulated"] = len(positive_sites) + len(negative_sites)
-        dfs.append(df_local)
-        #print(count)
-        count+=1
-
-    result_df = pd.concat(dfs)
-    result_df = result_df.replace(np.nan, 0).set_index("condpair")
-    if name_transformation_function is not None:
-        result_df = name_transformation_function(result_df)
-
-    return result_df
-
-def reformat_dataframe_header(df):
-    header_genes = [x.split("_")[0] for x in df.columns]
-    header_proteins = [x.split("_")[1] for x in df.columns]
-    header_sitenums = [x.split("_")[2] for x in df.columns]
-
-    header_df = pd.DataFrame.from_dict(data= {'gene_name': header_genes, 'protein' : header_proteins, 'site_id': header_sitenums}, columns=df.columns, orient='index')
-    return pd.concat([header_df, df])
-
 # Cell
 import pandas as pd
 import os
@@ -532,31 +157,6 @@ def get_diffresult_dataframe(cond1, cond2, results_folder = os.path.join(".", "r
 
     return diffprots
 
-# Cell
-import alphaquant.utils.utils as aqutils
-import alphaquant.diffquant.diffutils as aqdiffutils
-import numpy as np
-
-def get_diffresult_dict_ckg_format(cond1, cond2, results_folder = os.path.join(".", "results")):
-    """
-    ckg wrapper, reads the results dataframe for a given condpair and reformats it to the ckg volcano plot input format
-    """
-    result_df = get_diffresult_dataframe(cond1, cond2, results_folder)
-    log2fcs = result_df["log2fc"].to_numpy()
-    logged_fdrs = result_df["-log10fdr"].to_numpy()
-    min_fdr = np.min(logged_fdrs)
-    result_ckg_dict = {}
-    result_ckg_dict[("volcano", aqutils.get_condpairname([cond1, cond2]))] = {"x": log2fcs, "y" : np.array([1.2,3.3,1,3,4,5,6]), "pvalue" : min_fdr, "text" : "testext", "color" : "grey",'is_samr':False, 'annotations' : []}
-    return result_ckg_dict
-
-# Cell
-import pandas as pd
-def subset_normed_peptides_df_to_condition(cond, sample2cond_df, normed_df):
-    columns_to_keep = set(sample2cond_df[sample2cond_df["condition"]==cond]["sample"]).intersection(set(normed_df.columns))
-    columns_to_drop = set(normed_df.columns) - columns_to_keep
-    subset_df = normed_df.drop(columns = columns_to_drop)
-    return subset_df
-
 # Cell
 import pandas as pd
 import os
@@ -712,421 +312,10 @@ def plot_volcano_plotly(
 
     return fig
 
-# Cell
-import pandas as pd
-import numpy as np
-import plotly.graph_objects as go
-import matplotlib.pyplot as plt
-import itertools
-
-def plot_withincond_fcs_plotly(
-    normed_intensity_df,
-    title,
-    cut_extremes = True
-):
-    """takes a normalized intensity dataframe and plots the fold change distribution between all samples. Column = sample, row = ion"""
-    fig = go.Figure()
-    samplecombs = list(itertools.combinations(normed_intensity_df.columns, 2))
-    cutoff_common = 0
-    for spair in samplecombs:#compare all pairs of samples
-        s1 = spair[0]
-        s2 = spair[1]
-        diff_fcs = normed_intensity_df[s1].to_numpy() - normed_intensity_df[s2].to_numpy() #calculate fold changes by subtracting log2 intensities of both samples
-
-        if cut_extremes:
-            cutoff_cur = max(abs(np.nanquantile(diff_fcs,0.025)), abs(np.nanquantile(diff_fcs, 0.975))) #determine 2.5% - 97.5% interval, i.e. remove extremes
-            if cutoff_cur and cutoff_cur > cutoff_common:
-                cutoff_common = cutoff_cur
-
-        n, bins, _ = plt.hist(diff_fcs, 250, density=True, histtype='step')
-        fig.add_trace(
-            go.Scatter(
-                x=bins,
-                y=n,
-                mode='lines',
-                line=dict(
-                    shape='hvh',
-                    width = 0.5
-                )
-            )
-        )
-    fig.update_layout(
-        xaxis = dict(
-            title="log2 peptide fcs",
-            range=(-cutoff_common, cutoff_common)
-        ),
-        barmode='stack',
-        template='plotly_white',
-        showlegend=False,
-        title = dict(
-                text=title,
-                font=dict(size=16, color='black', family='Arial, sans-serif'),
-                y=0.91,
-                x=0.5,
-                xanchor='center',
-                yanchor='middle',
-            ),
-        height=400,
-        width=870,
-    )
-    return fig
-
-# Cell
-import pandas as pd
-import numpy as np
-import plotly.graph_objects as go
-import matplotlib.pyplot as plt
-
-def plot_betweencond_fcs_plotly(
-    df_c1_normed,
-    df_c2_normed,
-    title,
-    merge_samples = True
-):
-    """takes normalized intensity dataframes of each condition and plots the distribution of direct peptide fold changes between conditions"""
-    fig = go.Figure()
-    if merge_samples: #samples can be merged to median intensity
-        df_c1_normed = df_c1_normed.median(axis = 1, skipna = True).to_frame()
-        df_c2_normed = df_c2_normed.median(axis = 1, skipna = True).to_frame()
-
-    both_idx = df_c1_normed.index.intersection(df_c2_normed.index)
-    df1 = df_c1_normed.loc[both_idx]
-    df2 = df_c2_normed.loc[both_idx]
-    cutoff_common = 0
-
-    fig.add_vline(
-        x=0,
-        line_width=1,
-        line_dash="dash",
-        line_color="red"
-    ) #the data is normalized around 0, draw in helper line
-
-    for col1 in df1.columns:
-        for col2 in df2.columns:
-            diff_fcs = df1[col1].to_numpy() - df2[col2].to_numpy() #calculate fold changes by subtracting log2 intensities of both conditions
-            cutoff_cur = max(abs(np.nanquantile(diff_fcs,0.025)), abs(np.nanquantile(diff_fcs, 0.975))) #determine 2.5% - 97.5% interval, i.e. remove extremes
-            if cutoff_cur and cutoff_cur > cutoff_common:
-                cutoff_common = cutoff_cur
-
-            n, bins, _ = plt.hist(diff_fcs, 100, density=True, histtype='step')
-            fig.add_trace(
-                go.Scatter(
-                    x=bins,
-                    y=n,
-                    mode='lines',
-                    line=dict(
-                        shape='hvh'
-                    )
-                )
-            )
-
-    fig.update_layout(
-        xaxis = dict(
-            title="log2(fc)",
-            range=(-cutoff_common, cutoff_common)
-        ),
-        barmode='stack',
-        template='plotly_white',
-        showlegend=False,
-        title = dict(
-            text=title,
-            y=0.90,
-            x=0.5,
-            xanchor='center',
-            yanchor='middle',
-        ),
-        height=400,
-        width=870,
-    )
-    return fig
-
-# Cell
-def beeswarm_ion_plot_plotly(
-    df_melted,
-    diffresults_protein,
-):
-    """takes pre-formatted long-format dataframe which contains all ion intensities for a given protein.
-      Columns are QUANT_ID, "intensity", "condition". Also takes results of the protein differential analysis as a series
-      to annotate the plot"""
-
-    fig = go.Figure()
-
-    #get annotations from diffresults
-    fdr = float(diffresults_protein.at["fdr"])
-    fc = float(diffresults_protein.at["log2fc"])
-    protein = diffresults_protein.name
-
-    for cond, color, line_color in zip(df_melted.condition.unique(), ['#FF7F0E', '#2CA02C'], ['#808080', '#a6a6a6']):
-        data = df_melted[df_melted.condition == cond]
-
-        fig.add_trace(
-            go.Box(
-                x=data.ion,
-                y=data.intensity,
-                boxpoints='all',
-                name=cond,
-                line=dict(
-                    color=line_color
-                ),
-                marker=dict(
-                    color=color,
-                    opacity=0.3
-                ),
-                pointpos=0
-            )
-        )
-
-
-    if "gene" in diffresults_protein.index:
-        gene = diffresults_line.at["gene"]
-        title = f"{gene} ({protein}) FDR: {fdr:.1e} log2FC: {fc:.2f}"
-    else:
-        title = f"{protein} FDR: {fdr:.1e} log2FC: {fc:.2f}"
-
-    fig.update_layout(
-        xaxis_title='ion',
-        yaxis_title='intensity',
-        boxmode='group', # group together boxes of the different traces for each value of x
-        template='plotly_white',
-        title = dict(
-            text=title,
-            #y=0.85,
-            x=0.5,
-            xanchor='center',
-            yanchor='middle',
-        ),
-        titlefont=dict(
-            size=14,
-            color='black',
-            family='Arial, sans-serif'
-        ),
-        legend=dict(
-            title='conditions:',
-            orientation="h",
-            yanchor="bottom",
-            y=1.03,
-            xanchor="right",
-            x=1
-        ),
-        height=300,
-    )
-
-    return fig
-
-# Cell
-from matplotlib.pyplot import cm
-import anytree
-
-def foldchange_ion_plot_plotly(
-    df_melted,
-    diffresults_protein,
-    protein_node = None,
-    level = 'seq'
-):
-    """takes pre-formatted long-format dataframe which contains all between condition fold changes. All ions of a given protein
-    are visualized, the columns are QUANT_ID and "log2fc".  Also takes results of the protein differential analysis as a series
-      to annotate the plot"""
-
-    fig = go.Figure()
-
-    #get annotations from diffresults
-    fdr = float(diffresults_protein.at["fdr"])
-    fc = float(diffresults_protein.at["log2fc"])
-    protein = diffresults_protein.name
-
-    fig.add_hline(
-        y=0,
-        line_width=2,
-        opacity=.7,
-        line_dash="dash",
-        line_color="black"
-    )
-
-    if protein_node is not None:
-        clust2col, clust2ions = get_color2ions(protein_node, level)
-    else:
-        clust2col = {-1 : 'lightblue'}
-        clust2ions = {-1 : [x for x in df_melted[aq_variables.QUANT_ID].drop_duplicates()]}
-
-
-
-    for clust in sorted(clust2ions.keys()):
-        ions = clust2ions.get(clust)
-        color = clust2col.get(clust)
-
-        df_subset = df_melted[[x in ions for x in df_melted[aq_variables.QUANT_ID]]]
-
-        fig.add_trace(
-            go.Box(
-                x=df_subset.ion,
-                y=df_subset.log2fc,
-
-                boxpoints='all',
-                line=dict(
-                    color=color
-                ),
-                marker=dict(
-                    color=color,
-                    opacity=0.7
-                ),
-                pointpos=0,
-                name = clust
-            )
-        )
-
-    if "gene" in diffresults_protein.index:
-        gene = diffresults_line.at["gene"]
-        title = f"{gene} ({protein}) FDR: {fdr:.1e} log2FC: {fc:.2f}"
-    else:
-        title = f"{protein} FDR: {fdr:.1e} log2FC: {fc:.2f}"
-
-    fig.update_layout(
-        xaxis_title='ion',
-        yaxis_title='log2fc',
-        template='plotly_white',
-        title = dict(
-            text=title,
-            y=0.85,
-            x=0.5,
-            xanchor='center',
-            yanchor='middle',
-        ),
-        titlefont=dict(
-            size=14,
-            color='black',
-            family='Arial, sans-serif'
-        ),
-        height=300,
-    )
-
-    return fig
-
-def get_color2ions(protein_node, level):
-    clust2ions = {}
-
-    relevant_subnodes = anytree.findall(protein_node,filter_ = lambda x : x.type == level)
-    excluded_leaves = []
-
-    for subnode in relevant_subnodes:
-        clust = subnode.cluster
-        clust2ions[clust] = clust2ions.get(clust, []) + [x.name for x in subnode.leaves if subnode.type in x.inclusion_levels]
-        excluded_leaves.extend([x.name for x in subnode.leaves if subnode.type not in x.inclusion_levels])
-
-    colors= cm.rainbow(np.linspace(0, 1, len(clust2ions.keys())))
-    clust2col =  {clust:convert_to_plotly_color_format(col) for clust, col in zip(sorted(clust2ions.keys()), colors)}
-    clust2ions[-1] = excluded_leaves
-    clust2col[-1] = 'lightgrey'
-    #color2ions = {convert_to_plotly_color_format(clust2col.get(x)) : clust2ions.get(x) for x in clust2ions.keys()}
-
-    return clust2col, clust2ions
-
-def convert_to_plotly_color_format(color_array):
-    return f"rgb({int(color_array[0]*250)},{int(color_array[1]*250)},{int(color_array[2]*250)})"
-
-
-# Cell
-
-import seaborn as sns
-def make_mz_fc_boxplot(merged_df, ion_nodes):
-    merged_df = merged_df.copy()
-    ion2fc = {node.name:node.fc for node in ion_nodes}
-    merged_df["log2fc"] = [ion2fc.get(ion) for ion in merged_df[aq_variables.QUANT_ID]]
-    #merged_df = merged_df[merged_df["log2fc"] >0]
-    #merged_df = merged_df[merged_df["EG.ApexRT"] <80]
-    transf_fc = list(2**merged_df["log2fc"])
-    merged_df["fc"]  = transf_fc
-    merged_df['mz_cathegory'] = [assign_mz_cathegory(mz) for mz in merged_df["FG.PrecMz"]]
-    sns.boxplot(x = 'mz_cathegory', y = "log2fc",data = merged_df)
-    plt.ylim(-2, 2)
-    plt.show()
-
-
-def assign_mz_cathegory(mz_val):
-    if mz_val<400:
-        return 400
-    if mz_val < 500:
-        return 500
-    if mz_val < 600:
-        return 600
-    if mz_val < 700:
-        return 700
-    if mz_val < 800:
-        return 800
-    if mz_val < 900:
-        return 900
-    if mz_val < 1000:
-        return 1000
-    if mz_val < 1100:
-        return 1100
-    else:
-        return 1200
-
-# Cell
-import matplotlib.pyplot as plt
-
-def plot_fc_histogram_ml_filtered(y_test, y_pred, fc_cutoff, show_filtered):
-    if show_filtered:
-        fcs = [y_test[i] for i in range(len(y_test)) if y_pred[i]>fc_cutoff]
-        if len(fcs)==0:
-            return
-    else:
-        fcs = [y_test[i] for i in range(len(y_test)) if y_pred[i]<fc_cutoff]
-
-    #is_logged = min(fcs)<0
-    #if not is_logged:
-     #   fcs = np.log2(np.array(fcs))
-    percent_survived = round(len(fcs)/len(y_test), 2)
-    print(f"{percent_survived} of fcs retained of {len(fcs)} total")
-    plt.hist(fcs, 60, density=True, histtype='step',cumulative=False)
-    plt.xlim(-1.8, 1.8)
-
-def plot_log_loss_score(loss_score, y_test, loss_score_cutoff = 0):
-    plt.hist(loss_score, 60, density=False, histtype='step',cumulative=False)
-    plt.show()
-    if loss_score_cutoff > 0:
-        fcs = [y_test[i] for i in range(len(y_test)) if loss_score[i]>loss_score_cutoff]
-        print(f"{len(fcs)} retained of {len(y_test)}")
-        plt.hist(fcs, 60, density=True, histtype='step',cumulative=False)
-        plt.show()
-    plt.scatter(y_test, loss_score, color='blue', alpha=0.3)
-    plt.show()
-
 # Cell
 from sklearn.metrics import mean_squared_error, r2_score
 import seaborn as sns
 
-def scatter_ml_regression_perturbation_aware(y_test, y_pred, ionnames, nodes, results_dir = None):
-    y_test, y_pred = [np.array(y_test), np.array(y_pred)]
-    perturbed_ions = {x.name  for x in nodes if hasattr(x, "perturbation_added")}
-    is_unperturbed_vec = [x for x in range(len(ionnames)) if ionnames[x] not in perturbed_ions]
-    y_unpert_test = y_test[is_unperturbed_vec]
-    y_unpert_pred = y_pred[is_unperturbed_vec]
-
-    fig_perturb, ax_perturb = plt.subplots()
-
-
-    sns.regplot(x = y_unpert_test, y = y_unpert_pred, scatter_kws=dict(alpha=0.1), ax = ax_perturb)
-
-    if len(perturbed_ions)>0:
-        is_perturbed_vec = [x for x in range(len(ionnames)) if ionnames[x] in perturbed_ions]
-        y_pert_test = y_test[is_perturbed_vec]
-        y_pert_pred = y_pred[is_perturbed_vec]
-        sns.scatterplot(x = y_pert_test, y = y_pert_pred, alpha=0.1, label="perturbed", color = sns.color_palette()[1], ax = ax_perturb)
-        plot_perturbed_unperturbed_fcs(fcs_perturbed=y_pert_test,fcs_unperturbed=y_unpert_test, results_dir = results_dir)
-        plot_perturbed_unperturbed_fcs(fcs_perturbed=[x.fc for x in nodes if hasattr(x, "perturbation_added")], fcs_unperturbed=[x.fc for x in nodes if not hasattr(x, "perturbation_added")])
-        plot_perturbation_histogram([x for x in nodes if hasattr(x, "perturbation_added")], results_dir)
-
-
-    err = mean_squared_error(y_test, y_pred)
-    r2 = r2_score(y_test, y_pred)
-
-    ax_perturb.set_title(f"MSE: {err:.2f}, R2: {r2:.2f}")
-
-    if results_dir is not None:
-        fig_perturb.savefig(f"{results_dir}/ml_regression.pdf")
-    plt.show()
-
-
 def scatter_ml_regression(y_test, y_pred, results_dir = None):
     fig, ax = plt.subplots()
 
@@ -1142,13 +331,6 @@ def scatter_ml_regression(y_test, y_pred, results_dir = None):
 
 
 
-def plot_perturbation_histogram(perturbed_nodes, results_dir):
-    fig, ax = plt.subplots()
-    perturbations = [x.perturbation_added for x in perturbed_nodes]
-    ax.hist(perturbations, 60, density=True, histtype='step',cumulative=True)
-    if results_dir is not None:
-        ax.figure.savefig(f"{results_dir}/perturbation_histogram.pdf")
-
 def plot_perturbed_unperturbed_fcs(fcs_perturbed, fcs_unperturbed, results_dir = None):
     fig, ax = plt.subplots()
     ax.hist(bins = 60, x=fcs_unperturbed, label= 'unperturbed',density=True, histtype='step')
@@ -1157,17 +339,6 @@ def plot_perturbed_unperturbed_fcs(fcs_perturbed, fcs_unperturbed, results_dir =
         ax.figure.savefig(f"{results_dir}/compare_pertubed_unperturbed.pdf")
 
 
-def plot_ml_fc_histograms(y_test, y_pred, cutoff, results_dir = None):
-
-    plot_fc_histogram_ml_filtered(y_test, y_pred, cutoff, show_filtered=False)
-    if results_dir is not None:
-        plt.savefig(f"{results_dir}/observed_offsets_ml_filtered.pdf")
-    plt.show()
-    plot_fc_histogram_ml_filtered(y_test, y_pred, cutoff, show_filtered=True)
-    if results_dir is not None:
-        plt.savefig(f"{results_dir}/observed_offsets_nofilt.pdf")
-    plt.show()
-
 # Cell
 import seaborn as sns
 import numpy as np
@@ -1246,89 +417,6 @@ def filter_sort_top_n(imp, names, top_n):
     names = [x[1] for x in tuplelist]
     return imp, names
 
-# Cell
-
-import numpy as np
-import matplotlib.pyplot as plt
-
-def visualize_gaussian_mixture_fit(gmm, y_pred):
-
-    data = np.array(y_pred) ##loading univariate data.
-
-    plt.figure()
-    plt.hist(data, bins=50, histtype='stepfilled', density=True, alpha=0.5)
-    plt.xlim(min(y_pred), max(y_pred))
-    f_axis = data.copy().ravel()
-    f_axis.sort()
-    a = []
-    weight_mean_cov = list(zip(gmm.weights_, gmm.means_, gmm.covariances_))
-    weight_mean_cov.sort(key = lambda x : x[0], reverse=True)
-    for weight, mean, covar in weight_mean_cov:
-        a.append(weight*norm.pdf(f_axis, mean, np.sqrt(covar)).ravel())
-        plt.plot(f_axis, a[-1])
-    plt.plot(f_axis, np.array(a).sum(axis =0), 'k-')
-    plt.xlabel('Variable')
-    plt.ylabel('PDF')
-    plt.tight_layout()
-    plt.show()
-
-# Cell
-
-import numpy as np
-import matplotlib.pyplot as plt
-
-def visualize_gaussian_nomix_subfit(mean, var, y_pred, y_subset):
-
-    data = np.array(y_pred) ##loading univariate data.
-
-    plt.figure()
-    plt.hist(data, bins=50, histtype='stepfilled', density=True, alpha=0.5)
-    plt.xlim(min(y_pred), max(y_pred))
-    f_axis = data.copy().ravel()
-    f_axis.sort()
-    a = []
-    weight = len(y_subset)/len(y_pred)
-
-    a.append(weight*norm.pdf(f_axis, mean, np.sqrt(var)).ravel())
-    plt.plot(f_axis, a[-1])
-    plt.plot(f_axis, np.array(a).sum(axis =0), 'k-')
-    plt.xlabel('Variable')
-    plt.ylabel('PDF')
-    plt.tight_layout()
-    plt.show()
-
-# Cell
-import matplotlib.pyplot as plt
-
-def visualize_filtered_non_filtered_precursors(all_precursors, ml_score = None):
-
-    fcs_unfilt = [x.fc for x in all_precursors]
-    if ml_score is not None:
-        fcs_filt = [x.fc for x in all_precursors if abs(x.ml_score)<ml_score]
-    else:
-        fcs_filt = [x.fc for x in all_precursors if not x.ml_excluded]
-    plt.hist([x.ml_score for x in all_precursors],  histtype='step', bins=60)
-    plt.show()
-    plt.hist(fcs_unfilt, histtype='step', bins=60, label=f"unfilt ({len(fcs_unfilt)})", density=True)
-    plt.hist(fcs_filt, histtype='step', bins=60, label = f"filt ({len(fcs_filt)})", density=True)
-    #plt.xlim(-2, 2)
-    plt.legend()
-    plt.show()
-
-# Cell
-import seaborn as sns
-import matplotlib.pyplot as plt
-def plot_fcs_node(nodes, percentile, node_filterfunction = None):
-    if node_filterfunction is not None:
-        nodes = [x for x in nodes if node_filterfunction(x)]
-    nodes_sorted = sorted(nodes,key= lambda x : abs(x.ml_score))
-    nodes_sorted = nodes_sorted[:int(len(nodes_sorted)*percentile)]
-    sns.stripplot(data = [x.fc for x in nodes_sorted])
-   # plt.show()
-
-
-
-
 # Cell
 
 import matplotlib.pyplot as plt
@@ -1336,6 +424,7 @@ def plot_fcs_node(nodes, percentile, node_filterfunction = None):
 from sklearn import metrics
 import random
 import seaborn as sns
+import alphaquant.diffquant.diffutils as aqdiffutils
 
 
 
@@ -1450,6 +539,7 @@ def plot_precision_recall_curve(true_falses, scores, name, ax):
 
 # Cell
 import alphaquant.plotting.base_functions as aqviz
+import alphaquant.utils.utils as aqutils
 
 import anytree
 import matplotlib.pyplot as plt
diff --git a/alphaquant/plotting/classify.py b/alphaquant/plotting/classify.py
index 57905bd2..be6ce96b 100644
--- a/alphaquant/plotting/classify.py
+++ b/alphaquant/plotting/classify.py
@@ -13,22 +13,6 @@ def plot_value_histogram(y, results_dir = None):
     plt.show()
 
 
-def scatter_ml_regression_combined(y_test, y_pred, results_dir = None):
-    fig, ax = plt.subplots()
-
-    sns.regplot(x = abs(y_test), y = y_pred, scatter_kws=dict(alpha=0.1), ax = ax)
-    err = sklearn.metrics.mean_squared_error(y_test, y_pred)
-    r2 = sklearn.metrics.r2_score(y_test, y_pred)
-
-    ax.set_xlabel("true offset")
-    ax.set_ylabel("predicted offset")
-
-    ax.set_title(f"MSE: {err:.2f}, R2: {r2:.2f}")
-
-    if results_dir is not None:
-        fig.savefig(f"{results_dir}/ml_regression.pdf")
-    plt.show()
-
 def scatter_ml_regression_testsets(test_set_predictions, results_dir = None):
 
     fig, axes = plt.subplots(ncols=len(test_set_predictions), figsize=(2.5 * len(test_set_predictions), 4))
diff --git a/alphaquant/plotting/fcviz_medianref.py b/alphaquant/plotting/fcviz_medianref.py
index 5f104757..3a96cae0 100644
--- a/alphaquant/plotting/fcviz_medianref.py
+++ b/alphaquant/plotting/fcviz_medianref.py
@@ -94,14 +94,6 @@ def _plot_individual_figures(self, protein_of_interest, valid_pairs, fig, axes):
                 LOGGER.error(f"Failed to plot protein {protein_of_interest} for condition pair {condpair}: {e}")
         return fig, axes
 
-    def _get_available_proteins(self):
-        """Get list of all available proteins across all condition pairs."""
-        all_proteins = set()
-        for protein_dict in self.protein2nodes:
-            all_proteins.update(protein_dict.keys())
-        return all_proteins
-
-
 class MedianRefConditionPairGetter():
     def __init__(self, results_directory):
         self.condition_pairs = self._get_condition_pairs(results_directory)
diff --git a/alphaquant/plotting/multicond.py b/alphaquant/plotting/multicond.py
index 5549e4b5..e69de29b 100644
--- a/alphaquant/plotting/multicond.py
+++ b/alphaquant/plotting/multicond.py
@@ -1,47 +0,0 @@
-import matplotlib.pyplot as plt
-import pandas as pd
-import numpy as np
-
-def plot_proteoform_intensity_profiles(proteoform_df : pd.DataFrame):
-    """creates a panel of proteoform intensity profiles for each protein in the dataframe
-
-    Args:
-        proteoform_df (pd.DataFrame): proteoform dataframe loaded from the AlphaQuant output file `medianref_proteoforms.tsv`
-
-    Returns:
-        fig, axes: matplotlib figure and axes objects
-    """
-    fixed_columns = ['proteoform_id', 'peptides', 'number_of_peptides', 'protein', 'corr_to_ref', 'is_reference']
-    conditions = proteoform_df.columns.difference(fixed_columns)
-
-    # Group by protein
-    grouped = proteoform_df.groupby('protein')
-    n_plots = len(grouped)
-
-    # Create a figure with a grid of subplots
-    n_cols = int(np.ceil(np.sqrt(n_plots)))
-    n_rows = int(np.ceil(n_plots / n_cols))
-
-    fig, axes = plt.subplots(n_rows, n_cols, figsize=(5 * n_cols, 4 * n_rows))
-
-    # Flatten the axes array for easy indexing
-    axes = axes.flatten()
-
-    # Loop through each group and create a plot
-    for ax, (protein, sub_df) in zip(axes, grouped):
-        for idx, row in sub_df.iterrows():
-            style = '-' if row['is_reference'] else '--'
-            ax.plot(conditions, row[conditions], style, label=row['proteoform_id'])
-
-        ax.set_title(f'{protein}')
-        ax.set_xlabel('Tissue')
-        ax.set_ylabel('Expression Level')
-        ax.legend(title='Proteoform ID')
-
-    # Turn off any unused subplots
-    for i in range(n_plots, len(axes)):
-        axes[i].axis('off')
-    
-    fig.tight_layout()
-    plt.show()
-    return fig, axes
\ No newline at end of file
diff --git a/alphaquant/plotting/pairwise.py b/alphaquant/plotting/pairwise.py
index 2a21dc03..4fc97817 100644
--- a/alphaquant/plotting/pairwise.py
+++ b/alphaquant/plotting/pairwise.py
@@ -11,18 +11,6 @@
 LOGGER = logging.getLogger(__name__)
 
 
-def plot_normalization_overview(normed_df, samplemap_df):
-    normed_df, sample2cond = aq_diff_utils.prepare_loaded_tables(normed_df, samplemap_df)
-    sample2cond = dict(zip(samplemap_df["sample"], samplemap_df["condition"]))
-    conditions = list(set([sample2cond.get(x) for x in normed_df.columns]))
-    conditions = [x for x in conditions if x is not None]
-    df_c1 = normed_df[[x for x in normed_df.columns if sample2cond.get(x) == conditions[0]]]
-    df_c2 = normed_df[[x for x in normed_df.columns if sample2cond.get(x) == conditions[1]]]
-
-    plot_betweencond_fcs(df_c1, df_c2, merge_samples=True)
-    plot_sample_vs_median_fcs(df_c1, df_c2)
-
-
 def plot_withincond_normalization(df_c1, df_c2):
     LOGGER.info("without missingvals (if applicable)")
     plot_betweencond_fcs(aqnorm.drop_nas_if_possible(df_c1), aqnorm.drop_nas_if_possible(df_c2), True)
@@ -57,32 +45,6 @@ def plot_betweencond_fcs(df_c1_normed, df_c2_normed, merge_samples=True, cumulat
     return fig, axes
 
 
-def plot_sample_vs_median_fcs(df_c1_normed, df_c2_normed):
-    """Plots the distribution of fold changes between each sample and the median across all samples."""
-
-    # Calculate the median across all samples from both conditions
-    combined_median = pd.concat([df_c1_normed, df_c2_normed], axis=1).median(axis=1, skipna=True)
-
-    fig, axes = plt.subplots()  # Create a new figure and axes
-
-    # Compare each sample against the combined median and plot
-    for df in [df_c1_normed, df_c2_normed]:
-        for col in df.columns:
-            diff_fcs = df[col].subtract(combined_median)
-
-            axes.axvline(0, color='red', linestyle="dashed")  # helper line at 0
-            cutoff = max(abs(np.nanquantile(diff_fcs, 0.025)), abs(np.nanquantile(diff_fcs, 0.975)))  # determine 2.5% - 97.5% interval
-
-            axes.hist(diff_fcs, 80, density=True, histtype='step', range=(-cutoff, cutoff), label=col)  # set the cutoffs to focus the visualization
-
-    axes.set_xlabel("log2(fc)")
-    axes.legend()
-    #place legend outside of plot
-    axes.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
-    return fig, axes
-
-
-
 def volcano_plot(results_df, fc_header="log2fc", fdr_header="fdr", fdr_cutoff=0.05,
                  log2fc_cutoff=0.5, xlim=None, ylim = None,
                  organism_column=None, organism2color_dict=None,
diff --git a/alphaquant/ui/dashboad_parts_plots_basic.py b/alphaquant/ui/dashboad_parts_plots_basic.py
index 4dc47e42..0cac0952 100644
--- a/alphaquant/ui/dashboad_parts_plots_basic.py
+++ b/alphaquant/ui/dashboad_parts_plots_basic.py
@@ -326,13 +326,6 @@ def _update_fc_visualizer(self):
             except Exception as e:
                 self.fc_visualizer = None
 
-    def _on_tree_level_changed(self, event):
-        """Handle tree level changes.
-        !the method name has to follow the naming pattern on_<param>_changed in order to be recognized by the state manager"""
-        self._update_fc_visualizer()
-        if self.protein_input.value:
-            self._update_protein_plot(self.protein_input.value)
-
     def _on_show_plots_clicked(self, event):
         """Handle show plots button click."""
         if self.cond1 and self.cond2:
diff --git a/alphaquant/ui/dashboard_parts_plots_proteoforms.py b/alphaquant/ui/dashboard_parts_plots_proteoforms.py
index 2ed1a9dd..68542ba0 100644
--- a/alphaquant/ui/dashboard_parts_plots_proteoforms.py
+++ b/alphaquant/ui/dashboard_parts_plots_proteoforms.py
@@ -2,7 +2,6 @@
 import panel as pn
 import pandas as pd
 import os
-import itertools
 import glob
 import re
 
@@ -293,27 +292,6 @@ def _on_protein_selected(self, event):
             self.proteoform_plot_pane.clear()
             self.proteoform_plot_pane.append(pn.pane.Markdown("### Click 'Plot Protein' to visualize the data"))
 
-    def _update_condition_pairs_from_df(self, df):
-        """Update condition pairs based on the samplemap DataFrame."""
-        if 'condition' in df.columns:
-            unique_conditions = df['condition'].dropna().unique()
-            pairs = [(c1, c2) for c1, c2 in itertools.permutations(unique_conditions, 2)]
-            pairs_str = [f"{c1}{aq_variables.CONDITION_PAIR_SEPARATOR}{c2}" for c1, c2 in pairs]
-            self.condpairname_select.options = ["No conditions"] + pairs_str
-
-    def _load_protein_identifiers(self, results_file):
-        """Load protein identifiers from results file and update the protein input widget."""
-        try:
-            proteoforms_df = pd.read_csv(results_file, sep='\t')
-            protein_ids = sorted(proteoforms_df['gene_symbol'].unique().tolist())
-            self.protein_input.options = protein_ids
-            self.protein_input.disabled = False
-            return protein_ids
-        except Exception as e:
-            self.protein_input.disabled = True
-            self.protein_input.options = []
-            raise Exception(f"Failed to load protein identifiers: {str(e)}")
-
     def _on_proteoform_selected(self, event):
         """Handle proteoform selection from table."""
         print(f"_on_proteoform_selected called with event: {event}")

From a3ce7436b6c22d55d4687a05b47646aab602cfa6 Mon Sep 17 00:00:00 2001
From: ammarcsj <70114795+ammarcsj@users.noreply.github.com>
Date: Mon, 25 May 2026 10:05:19 +0200
Subject: [PATCH 3/3] Remove dead utility and analysis functions

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 alphaquant/diffquant/diffutils.py             | 79 -------------------
 alphaquant/norm/normalization.py              |  5 --
 .../quant_reader/quant_reader_manager.py      |  2 -
 alphaquant/resources/database_loader.py       |  9 ---
 alphaquant/utils/benchmark_utils.py           | 14 ----
 alphaquant/utils/diffquant_utils.py           | 17 ----
 alphaquant/utils/utils.py                     | 13 ---
 7 files changed, 139 deletions(-)

diff --git a/alphaquant/diffquant/diffutils.py b/alphaquant/diffquant/diffutils.py
index df5d02cf..59ea900e 100644
--- a/alphaquant/diffquant/diffutils.py
+++ b/alphaquant/diffquant/diffutils.py
@@ -56,32 +56,6 @@ def get_samples_used_from_samplemap_df(samplemap_df, cond1, cond2):
     samples_c2 = samplemap_df[[cond2 == x for x in samplemap_df["condition"]]]["sample"]
     return list(samples_c1), list(samples_c2)
 
-def get_all_samples_from_samplemap_df(samplemap_df):
-    return list(samplemap_df["sample"])
-
-# Cell
-import pandas as pd
-
-def get_samplenames_from_input_df(data):
-    """extracts the names of the samples of the AQ input dataframe"""
-    names = list(data.columns)
-    names.remove('protein')
-    names.remove(QUANT_ID)
-    return names
-
-# Cell
-import numpy as np
-def filter_df_to_min_valid_values(quant_df_wideformat, samples_c1, samples_c2, min_valid_values):
-    """filters dataframe in alphaquant format such that each column has a minimum number of replicates
-    """
-    quant_df_wideformat = quant_df_wideformat.replace(0, np.nan)
-    df_c1_min_valid_values = quant_df_wideformat[samples_c1].dropna(thresh = min_valid_values, axis = 0)
-    df_c2_min_valid_values = quant_df_wideformat[samples_c2].dropna(thresh = min_valid_values, axis = 0)
-    idxs_both = df_c1_min_valid_values.index.intersection(df_c2_min_valid_values.index)
-    quant_df_reduced = quant_df_wideformat.iloc[idxs_both].reset_index()
-    return quant_df_reduced
-
-
 # Cell
 def get_condpairname(condpair):
     return f"{condpair[0]}_VS_{condpair[1]}"
@@ -102,39 +76,6 @@ def make_dir_w_existcheck(dir):
     if not os.path.exists(dir):
         os.makedirs(dir)
 
-# Cell
-import os
-def get_results_plot_dir_condpair(results_dir, condpair):
-    results_dir_plots = f"{results_dir}/{condpair}_plots"
-    make_dir_w_existcheck(results_dir_plots)
-    return results_dir_plots
-
-# Cell
-def get_middle_elem(sorted_list):
-    nvals = len(sorted_list)
-    if nvals==1:
-        return sorted_list[0]
-    middle_idx = nvals//2
-    if nvals%2==1:
-        return sorted_list[middle_idx]
-    return 0.5* (sorted_list[middle_idx] + sorted_list[middle_idx-1])
-
-# Cell
-import numpy as np
-def get_nonna_array(array_w_nas):
-    res = []
-    isnan_arr = np.isnan(array_w_nas)
-
-    for idx in range(len(array_w_nas)):
-        sub_res = []
-        sub_array = array_w_nas[idx]
-        na_array = isnan_arr[idx]
-        for idx2 in range(len(sub_array)):
-            if not na_array[idx2]:
-               sub_res.append(sub_array[idx2])
-        res.append(np.array(sub_res))
-    return np.array(res)
-
 # Cell
 import numpy as np
 def get_non_nas_from_pd_df(df):
@@ -152,12 +93,6 @@ def get_ionints_from_pd_df(df):
     }
 
 # Cell
-def invert_dictionary(my_map):
-    inv_map = {}
-    for k, v in my_map.items():
-        inv_map[v] = inv_map.get(v, []) + [k]
-    return inv_map
-
 from collections import defaultdict
 def invert_tuple_list_w_nonunique_values(tuple_list):
     inverted_dict = defaultdict(list)
@@ -373,20 +308,6 @@ def get_path_to_unformatted_file(input_file_name):
 
 
 
-# Cell
-
-# Cell
-import os
-def check_for_processed_runs_in_results_folder(results_folder):
-    contained_condpairs = []
-    folder_files = os.listdir(results_folder)
-    result_files = list(filter(lambda x: "results.tsv" in x ,folder_files))
-    for result_file in result_files:
-        res_name = result_file.replace(".results.tsv", "")
-        if ((f"{res_name}.normed.tsv" in folder_files) and (f"{res_name}.results.ions.tsv" in folder_files)):
-            contained_condpairs.append(res_name)
-    return contained_condpairs
-
 # Cell
 import pandas as pd
 import os
diff --git a/alphaquant/norm/normalization.py b/alphaquant/norm/normalization.py
index 1f80bb70..4a7dd726 100644
--- a/alphaquant/norm/normalization.py
+++ b/alphaquant/norm/normalization.py
@@ -136,11 +136,6 @@ def determine_anchor_and_shift_sample(sample2counts, i_min, j_min, min_distance)
     flip = 1 if anchor_idx == i_min else -1
     return anchor_idx, shift_idx, flip*min_distance
 
-# Cell
-def shift_samples(samples, sampleidx2anchoridx, sample2shift):
-    for sample_idx in range(samples.shape[0]):
-        samples[sample_idx] = samples[sample_idx]+get_total_shift(sampleidx2anchoridx, sample2shift, sample_idx)
-
 # Cell
 def get_total_shift(sampleidx2anchoridx, sample2shift,sample_idx):
 
diff --git a/alphaquant/quant_reader/quant_reader_manager.py b/alphaquant/quant_reader/quant_reader_manager.py
index 2f5dd4b3..c9865c36 100644
--- a/alphaquant/quant_reader/quant_reader_manager.py
+++ b/alphaquant/quant_reader/quant_reader_manager.py
@@ -72,5 +72,3 @@ def reformat_and_save_input_file(
     return outfile_name
 
 
-def set_quanttable_config_location(quanttable_config_file):
-    config_dict_loader.INTABLE_CONFIG = quanttable_config_file
diff --git a/alphaquant/resources/database_loader.py b/alphaquant/resources/database_loader.py
index efdeee39..d0eb13f3 100644
--- a/alphaquant/resources/database_loader.py
+++ b/alphaquant/resources/database_loader.py
@@ -28,15 +28,6 @@ def get_genename2sequence_dict( organism = "human"):
 
     return gene2sequence_dict
 
-def get_swissprot2sequence_dict( organism = "human"):
-    swissprot_file = get_swissprot_path(organism)
-    swissprot_df = pd.read_csv(swissprot_file, sep = '\t', usecols=['Entry', 'Sequence'])
-    swissprot_ids = swissprot_df['Entry'].astype(str).tolist()
-    sequences = swissprot_df['Sequence'].astype(str).tolist()
-
-    swissprot2sequence_dict = dict(zip(swissprot_ids, sequences))
-    return swissprot2sequence_dict
-
 def get_uniprot2sequence_dict( organism = "human"):
     swissprot_file = get_swissprot_path(organism)
     swissprot_df = pd.read_csv(swissprot_file, sep = '\t', usecols=['Entry', 'Sequence'])
diff --git a/alphaquant/utils/benchmark_utils.py b/alphaquant/utils/benchmark_utils.py
index bb6e29c9..e69de29b 100644
--- a/alphaquant/utils/benchmark_utils.py
+++ b/alphaquant/utils/benchmark_utils.py
@@ -1,14 +0,0 @@
-import numpy as np
-def subset_df_to_n_most_complete_proteins(proteome_df_aq_reformat, proteome_df_original, n = 100, protein_header = "PG.ProteinGroups", 
-                                          protein_subset_to_use = None, use_only_complete_columns = False):
-    proteome_df_aq_reformat = proteome_df_aq_reformat.set_index(["protein", "quant_id"]).replace(0, np.nan)
-    if use_only_complete_columns:
-        proteome_df_aq_reformat = proteome_df_aq_reformat.dropna()
-    
-    proteome_df_aq_reformat = proteome_df_aq_reformat.reset_index()
-
-    set_of_proteins = set(proteome_df_aq_reformat["protein"].unique())
-    if protein_subset_to_use is not None:
-        set_of_proteins = protein_subset_to_use.intersection(set_of_proteins)
-    
-    return np.random.choice(list(set_of_proteins), n)
\ No newline at end of file
diff --git a/alphaquant/utils/diffquant_utils.py b/alphaquant/utils/diffquant_utils.py
index 0d4f65ba..5a49abe3 100644
--- a/alphaquant/utils/diffquant_utils.py
+++ b/alphaquant/utils/diffquant_utils.py
@@ -1,19 +1,2 @@
 import pandas as pd
 import numpy as np
-
-
-
-def find_non_outlier_indices_ipr(data, threshold=1.5, percentile_lower = 25, percentile_upper = 75):
-    
-    value_lower, value_upper = np.percentile(data, [percentile_lower, percentile_upper])
-    iqr = value_upper - value_lower
-
-    # Calculate the bounds for non-outliers
-    cut_off = iqr * threshold
-    lowest_tolerated_value = value_lower - cut_off
-    highest_tolerated_value = value_upper + cut_off
-
-    # Identify non-outlier indices
-    non_outlier_indices = np.where((data >= lowest_tolerated_value) & (data <= highest_tolerated_value))[0]
-
-    return non_outlier_indices
\ No newline at end of file
diff --git a/alphaquant/utils/utils.py b/alphaquant/utils/utils.py
index dacc3e62..feada3e7 100644
--- a/alphaquant/utils/utils.py
+++ b/alphaquant/utils/utils.py
@@ -55,19 +55,6 @@ def cut_trailing_parts_seqstring(seqstring):
 def get_condpairname(condpair):
     return f"{condpair[0]}_VS_{condpair[1]}"
 
-def get_condpair_from_condpairname(condpairname):
-    return condpairname.split(aq_variables.CONDITION_PAIR_SEPARATOR)
-
-
-def convert_ion_string_to_node_type(ionstring, node_type): #for example I have a full quant_id that describes a fragment ion, I want to shorten it to the specified leve, e.g. sequence
-    regex = NODETYPE2REGEX[node_type]
-    match = re.match(regex, ionstring)
-    if match:
-        return match.group(1)
-    else:
-        raise ValueError(f"Could not match {ionstring} to {node_type}. This function only works for the following node types: seq, mod_seq, mod_seq_charge")
-
-
 def get_progress_folder_filename(input_file, file_ending, remove_extension = True): #file ending needs to include all dots, e.g. ".aq_reformat.tsv"
     input_file = os.path.abspath(input_file) #to make sure that the path is absolute
     dirname_input_file = os.path.dirname(input_file)