elementary-mode-based-FBA-rationale/ECM_analysis_ecoli_iJR.py at main · SystemsBioinformatics/elementary-mode-based-FBA-rationale · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
import itertools

from helpers_EFM_FBA import *

"""CONSTANTS"""
model_name = "iJR904"

# Define directories for storing: 1) newly made models, 2) results such as figures, and 3) for finding models
sbml_dir = os.path.join(os.getcwd(), "data", model_name + "_2", 'models', 'sbml')
result_dir = os.path.join(os.getcwd(), "data", model_name + "_2", "EFM_yield_analysis")
model_dir = os.path.join("models")
LOAD_IF_AVAILABLE = False  # If this is true, newly created models will be loaded if the program runs a second time
N_KOS = 0  # number of knockout models that will be created
# adjust DROP_MODELS based on this.
# The complete model is sometimes to large to calculate ECMs. This is therefore dropped manually
DROP_MODEL_TAGS = ['full', 'active', 'fva', 'active_hidden', 'ko']  # ['full','active','fva','hidden','active_hidden' ,'ko']
USE_EXTERNAL_COMPARTMENT = None
ONLY_RAYS = False  # True or False
SAVE_result = False  # saves list_model_dict after ECM enumeration and calculation of ECM activities in FBA solution
VERBOSE = True

"""START ANALYSIS"""
"""Create folders for storing the results"""
if not os.path.exists(sbml_dir):
    os.makedirs(sbml_dir)
if not os.path.exists(result_dir):
    os.makedirs(result_dir)

"""Load main model and clone it to have a model that we can adapt"""
try:
    cmod = cbm.readSBML3FBC(os.path.join(model_dir, model_name + ".xml"))
except:  # use version 2
    cmod = cbm.readSBML2FBA(os.path.join(model_dir, model_name + ".xml"))

cbm.doFBAMinSum(cmod)
intermediate_cmod = cmod.clone()

"""Adapt model to your liking, and update the FBA"""
oxygen_reaction = intermediate_cmod.getReaction('R_EX_o2_e')

################ Adapt oxygen uptake reaction here #####################
oxygen_reaction.setLowerBound(-10.)  # -15, -10, -5-> give two active constraints; -1000 -> give 1 active constraint
########################################################################

atp_reaction = intermediate_cmod.getReaction('R_ATPM')
atp_reaction.setLowerBound(0.)
intermediate_cmod.setReactionLowerBound("R_EX_co2_e", 0.)
intermediate_cmod.setReactionLowerBound("R_EX_fe2_e", 0.)
intermediate_cmod.setReactionLowerBound("R_EX_h2o_e", 0.)
intermediate_cmod.setReactionLowerBound("R_EX_h_e", 0.)
intermediate_cmod.setReactionLowerBound("R_EX_k_e", 0.)
intermediate_cmod.setReactionLowerBound("R_EX_na1_e", 0.)

# Find FBA objective value
original_FBA_val = cbm.doFBA(intermediate_cmod)
cbm.doFBAMinSum(intermediate_cmod)

"""Create list with a dictionary in which all info will be stored for each model"""
list_model_dicts = []

# Create dictionary for original model
list_model_dicts.append(
    {'model': intermediate_cmod, 'model_name': 'original_network', 'calc_efms': False, 'get_activities': True,
     'get_relevant_efms': False, 'drop_tag': 'full'})

if ('active' not in DROP_MODEL_TAGS) or ('active_hidden' not in DROP_MODEL_TAGS):
    # Deletes all non active reactions (determined by seeing if the flux is lower than a tolerance)
    sub_model_name = 'active_subnetwork'
    active_model_path = os.path.join(sbml_dir, sub_model_name + ".xml")
    if os.path.exists(active_model_path) and LOAD_IF_AVAILABLE:  # Checks if this model was already made
        try:
            intermediate_cmod_active = cbm.readSBML3FBC(active_model_path)
        except:  # use version 2
            intermediate_cmod_active = cbm.readSBML2FBA(active_model_path)
    else:
        intermediate_cmod_active = delete_non_active_network(intermediate_cmod, which_zeros='flux_zero', zero_tol=1e-15,
                                                             opt_tol=1e-8)  # 15, 8

    # Create dictionary for active subnetwork
    list_model_dicts.append(
        {'model': intermediate_cmod_active, 'model_name': sub_model_name, 'calc_efms': False, 'get_activities': True,
         'get_relevant_efms': False, 'drop_tag': 'active'})

# Delete only reactions that are never active (based on FVA)
if 'fva' not in DROP_MODEL_TAGS:
    sub_model_name = 'active_subnetwork_FVA'
    model_path = os.path.join(sbml_dir, sub_model_name + ".xml")
    if os.path.exists(model_path) and LOAD_IF_AVAILABLE:  # Checks if this model was already made
        try:
            intermediate_cmod_FVA_active = cbm.readSBML3FBC(model_path)
            intermediate_cmod_FVA_active.setNotes(
                intermediate_cmod_FVA_active.getNotes().encode(encoding='ascii', errors='ignore'))
        except:  # use version 2
            intermediate_cmod_FVA_active = cbm.readSBML2FBA(model_path)
    else:
        intermediate_cmod_FVA_active = delete_non_active_network(intermediate_cmod, which_zeros='FVA_zero')

    # Create dictionary for active subnetwork based on FVA
    list_model_dicts.append(
        {'model': intermediate_cmod_FVA_active, 'model_name': sub_model_name, 'calc_efms': False,
         'get_activities': True, 'get_relevant_efms': False,  'drop_tag': 'fva'})

# Optional: Create list of models with reactions that are active when certain knockouts are created
# This is a way to create more models to try-out stuff. You can use this option by setting n_KOs > 0
list_KO_models = create_KO_models(intermediate_cmod, n_KOs=N_KOS)
for model_ind, model in enumerate(list_KO_models):
    list_model_dicts.append({'model': model, 'model_name': 'active_subnetwork_KO_%d' % model_ind, 'calc_efms': True,
                             'get_activities': True, 'get_relevant_efms': True,  'drop_tag': 'ko'}) #was False

"""Rebuild stoichiometric matrix for reduced models"""
for model_dict in list_model_dicts:
    model_dict['model'].buildStoichMatrix()
    cbm.doFBAMinSum(model_dict['model'])

"""Store the created models in the result- and in the xml-directory"""
for model_dict in list_model_dicts:
    model_id = model_dict['model_name']
    model = model_dict['model']
    model_path = os.path.join(sbml_dir, model_id + ".xml")
    model_dict['model_path'] = model_path
    if not os.path.exists(model_path) or not LOAD_IF_AVAILABLE:
        cbm.writeSBML3FBCV2(model, model_path, add_cbmpy_annot=False)
        cbm.writeSBML3FBCV2(model, os.path.join(result_dir, model_id + ".xml"), add_cbmpy_annot=False)

full_model_path = list_model_dicts[0]['model_path']

"""Find the relevant, i.e., growth-limiting, constraints for the original model"""
# Do non-zero reduced cost analysis
cbm.analyzeModel(intermediate_cmod)
nzrc_dictionaries, n_objectives = get_nzrc(intermediate_cmod)
cbm.doFBAMinSum(intermediate_cmod)

"""Check which metabolite is coupled to the constrained reactions. If no metab is coupled, tag the reaction
with a virtual metabolite"""
nzrc_dictionaries, reactions_to_tag = findConstraintMetabolites(nzrc_dictionaries, intermediate_cmod)

"""Determine active objective function. Split the non-zero reduced costs in objectives and constraints"""
infos_obj, infos_cons = get_info_objectives_constraints(nzrc_dictionaries, intermediate_cmod)

if 'hidden' not in DROP_MODEL_TAGS:
    """Get relevant metabolites for the cost-calculations, find indices of external metabs that can be ignored"""
    # We only have to focus on conversions of metabolites that are somehow active in a constraint
    relevant_metabs = [info_dict['ext_metab'] for info_dict in infos_obj + infos_cons]

    # The following finds all indices of external metabolites that can be ignored in the conversion analysis
    hide_indices = find_hide_indices(full_model_path, to_be_tagged=reactions_to_tag, focus_metabs=relevant_metabs,
                                     use_external_compartment=USE_EXTERNAL_COMPARTMENT)
# By default no metabolites are hidden
for model_dict in list_model_dicts:
    model_dict['hide_metabolites'] = []

if 'hidden' not in DROP_MODEL_TAGS:
    """Create another dictionary for a model in which some metabolites are hidden"""
    list_model_dicts.append(
        {'model': intermediate_cmod, 'model_name': 'original_with_hidden_metabolites', 'calc_efms': False,
         'get_activities': True, 'hide_metabolites': hide_indices, 'get_relevant_efms': True,
         'model_path': os.path.join(sbml_dir, "original_network.xml"), 'drop_tag': 'hidden'})

if 'active_hidden' not in DROP_MODEL_TAGS:
    """Create another dictionary for the active model with some metabolites hidden"""
    active_hide_indices = find_hide_indices(active_model_path, to_be_tagged=reactions_to_tag, focus_metabs=relevant_metabs,
                                            use_external_compartment=USE_EXTERNAL_COMPARTMENT)
    list_model_dicts.append(
        {'model': intermediate_cmod_active, 'model_name': 'active_network_with_hidden_metabolites', 'calc_efms': False, #False
         'get_activities': True, 'hide_metabolites': active_hide_indices, 'get_relevant_efms': True,
         'model_path': active_model_path,  'drop_tag': 'active_hidden'})

"""Calculate ECMs and/or EFMs"""
# For genome-scale models we cannot yet calculate ECMs, then we should calculate them for only the active subnetwork,
# or only for the model with external metabolites that are ignored.

# We remove some models defined at top of the file
list_model_dicts_remember = list_model_dicts.copy()
list_model_dicts = [model_dict for model_dict in list_model_dicts if model_dict['drop_tag'] not in DROP_MODEL_TAGS]

# Load ECMs file E.coli iJR
ecms_df_pre = pd.read_csv(os.path.join(result_dir, 'iJR_hideallexceptglco2biomass.csv')) # not rounded
ecms_df_pre = ecms_df_pre.transpose()

# and update model_dict
ecms_from_efms = None
efms_df = None
#full_network_ecm = None
for model_dict in list_model_dicts:
    full_network_ecm = get_full_network(file_path=model_dict['model_path'],
                                        reactions_to_tag=reactions_to_tag,
                                        print_results=False,
                                        hide_metabs=model_dict['hide_metabolites'],
                                        use_external_compartment=USE_EXTERNAL_COMPARTMENT,
                                        only_rays=ONLY_RAYS
                                        )
    # Find all metabolite_ids in the order used by ECMtool
    mets = [met.id for met in full_network_ecm.metabolites]

    ecms_df_pre_norm = pd.DataFrame(np.zeros((len(mets), len(ecms_df_pre.columns))), index=mets, columns=ecms_df_pre.columns)
    for met in ecms_df_pre.index:
        ecms_df_pre_norm.loc[met] = ecms_df_pre.loc[met]

    # Normalize the ECMs. We first normalize all ECMs that produce the objective to produce one objective, after that
    # we normalize to different metabolite productions, such that the ECMs are maximally comparable
    ecms_matrix = normalize_ECMS_objective_first(ecms_df_pre_norm.to_numpy(), full_network_ecm, infos_obj)

    # Create dataframe with the ecms as columns and the metabolites as index
    ecms_df = pd.DataFrame(ecms_matrix, index=mets)

    model_dict.update(
        {'ecms_df': ecms_df, 'network': full_network_ecm, 'efms_df': efms_df, 'ecms_from_efms': ecms_from_efms})

"""Select only the relevant part of the ECM information. So, only the consumption/production flux of metabolites that
    are associated to a flux bound."""
for model_dict in list_model_dicts:
    model_dict['infos_cons'] = infos_cons
    model_dict['infos_obj'] = infos_obj
    table_cons_df, table_obj_df = get_relevant_parts_ECMs(model_dict['ecms_df'], model_dict['infos_cons'], model_dict['infos_obj'])

    # Then calculate the activities of the ECMs in the FBA solution and add it to the dataframes.
    if model_dict['get_activities']:
        print('Calculating activities for ' + model_dict['model_name'])
        activities = get_activity_ECMs(model_dict['ecms_df'], model_dict['model'],
                                       model_dict['network'], hide_indices=model_dict['hide_metabolites'])
        # Store the activities of the ECMs in the tables where we had the relevant info
        table_cons_df['active'] = activities
        table_obj_df['active'] = activities

    model_dict['table_cons_df'] = table_cons_df
    model_dict['table_obj_df'] = table_obj_df

"""Plot these costs"""
# We need the objective value to scale the axes of our plots
objective_val = intermediate_cmod.getObjFuncValue()
# We make 2-D cost plots only (3-D is too confusing). Since each constrained metabolite brings costs, each metabolite
# needs to be plotted on some axes. We thus make ceil(n_cons/2) figures
constrained_ext_metabs = [cons_dict['ext_metab'] for cons_dict in list_model_dicts[0]['infos_cons']]
if len(constrained_ext_metabs) > 1:
    if model_name == "e_coli_core":
        # all possible combinations of constraints
        cons_ID_combi_list = list(itertools.combinations(constrained_ext_metabs, 2))
    else:
        cons_ID_combi_list = get_cons_ID_combinations(constrained_ext_metabs, list_model_dicts[0])
else:
    cons_ID_combi_list = None

if cons_ID_combi_list:
    for cons_ID_combi in cons_ID_combi_list:
        # Plot the results of the different models (with different (sub)networks) in one plot
        plot_different_approaches_one_figure(list_model_dicts, infos_obj,  # list_model_dicts_remember[0]['infos_obj'],
                                             infos_cons,  # list_model_dicts_remember[0]['infos_cons'],
                                             obj_val=objective_val,
                                             result_dir=result_dir, cons_IDs=cons_ID_combi)
else:
    # Select constraints that will be plotted in this figure
    cons_for_fig = constrained_ext_metabs
    # Plot the results of the different models (with different (sub)networks) in one plot
    plot_different_approaches_one_figure(list_model_dicts, infos_obj,  # list_model_dicts_remember[0]['infos_obj'],
                                             infos_cons,  # list_model_dicts_remember[0]['infos_cons'],
                                             obj_val=objective_val,
                                             result_dir=result_dir, cons_IDs=cons_for_fig)

# Now make for each approach for which we calculated the activities of the ECMs a different plot in which the usage of
# the ECMs is shown with vectors.
if cons_ID_combi_list:
    for model_dict in list_model_dicts:
        if model_dict['get_activities']:
            print('Plotting the cost vectors including usage for model %s' % model_dict['model_name'])
            for cons_ID_combi in cons_ID_combi_list:
                print(cons_ID_combi)
                plot_costs_flux(model_dict, infos_obj, infos_cons,  # model_dict['infos_obj'], model_dict['infos_cons'],
                           cons_IDs=cons_ID_combi, obj_val=objective_val,
                           show_active=True, result_dir=result_dir, squared_plot=False)
                plot_costs_ECMs(model_dict, infos_cons, cons_IDs=cons_ID_combi, result_dir=result_dir)
else:
    for model_dict in list_model_dicts:
        if model_dict['get_activities']:
            print('Plotting the cost vectors including usage for model %s' % model_dict['model_name'])
            plot_costs(model_dict, infos_obj, infos_cons,  # model_dict['infos_obj'], model_dict['infos_cons'],
                       cons_IDs=constrained_ext_metabs, obj_val=objective_val,
                       show_active=True, result_dir=result_dir)
            plot_costs_ECMs(model_dict, infos_cons, cons_IDs=constrained_ext_metabs, result_dir=result_dir)

"""Find corresponding EFM(s) for active ECM(s)"""
# Find some EFM that corresponds to each of the active ECMs in the hidden-network
for model_dict in list_model_dicts:
    if model_dict['get_relevant_efms'] and model_dict['get_activities']:
        relevant_efms_df, full_relevant_ecms_df = find_associated_efms(model_dict['model'], model_dict['table_cons_df'],
                                                                       model_dict['ecms_df'],
                                                                       infos_obj + infos_cons, model_dict['model_path'],
                                                                       use_external_compartment=USE_EXTERNAL_COMPARTMENT,
                                                                       only_relevant_ECMs=False)
        relevant_efms_df.to_csv(
            os.path.join(
                result_dir, "efms_corresponding_to_hide_ecms" + model_dict['model_name'] + ".csv")) #, index=False)
        full_relevant_ecms_df.to_csv(
            os.path.join(
                result_dir, "full_ecms_corresponding_to_hide_ecms" + model_dict['model_name'] + ".csv")) #, index=False)

        if VERBOSE:
            printable_ecms = full_relevant_ecms_df.sort_values('M_glc__D_e')/2
            print_ecms_direct(np.transpose(printable_ecms.values), full_relevant_ecms_df.columns)


# # Todo: find differences/similarities between ECMs and EFMs
# a = full_relevant_ecms_df.iloc[0]-full_relevant_ecms_df.iloc[1]
# a.to_numpy().nonzero()
# full_relevant_ecms_df.iloc[:,a.to_numpy().nonzero()[0]]
#
# b = relevant_efms_df.iloc[0] - relevant_efms_df.iloc[1]
# b.to_numpy().nonzero()
# relevant_efms_df.iloc[:,b.to_numpy().nonzero()[0]]
#
# print(relevant_efms_df.iloc[:,b.to_numpy()>1.].transpose())
#
# for rid in relevant_efms_df.iloc[:,b.to_numpy()>1.].transpose().index:
#     reaction = intermediate_cmod.getReaction(rid)
#     print(reaction.getName())
#     print(relevant_efms_df[rid])
#     print(reaction.getEquation())