Skip to content

Commit 4bc0d6e

Browse files
committed
Add the model-manipulation layer (add, remove, transport, merge, etc.)
1 parent 50bea40 commit 4bc0d6e

24 files changed

Lines changed: 3475 additions & 0 deletions
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
"""Generic cobra.Model structural transforms that cobrapy does not cover cleanly:
2+
reaction building from equations, batch GPR / bound changes, irreversibility splitting,
3+
isozyme expansion, compartment merge / copy, and model merging by name."""
4+
from .add import add_reactions_from_equations
5+
from .change import change_gene_reaction_rules, change_reaction_equations
6+
from .expand import expand_model
7+
from .irreversible import convert_to_irreversible
8+
from .merge import merge_models
9+
from .parameters import set_variance_bounds
10+
from .remove import remove_genes, remove_metabolites
11+
from .simplify import (
12+
constrain_reversible_reactions,
13+
group_linear_reactions,
14+
remove_dead_end_reactions,
15+
remove_duplicate_reactions,
16+
)
17+
from .transfer import add_reactions_from_model
18+
from .transport import add_transport_reactions
19+
20+
__all__ = [
21+
"add_reactions_from_equations",
22+
"add_reactions_from_model",
23+
"add_transport_reactions",
24+
"change_gene_reaction_rules",
25+
"change_reaction_equations",
26+
"constrain_reversible_reactions",
27+
"convert_to_irreversible",
28+
"expand_model",
29+
"group_linear_reactions",
30+
"merge_models",
31+
"remove_dead_end_reactions",
32+
"remove_duplicate_reactions",
33+
"remove_genes",
34+
"remove_metabolites",
35+
"set_variance_bounds",
36+
]
Lines changed: 345 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,345 @@
1+
"""Add reactions to a model from equation strings.
2+
3+
Most of the equivalent MATLAB code is struct-of-arrays bookkeeping (padding parallel
4+
``rxnNames`` / ``lb`` / ``ub`` / ``grRules`` / ... fields) that does not exist in
5+
cobra, where each ``Reaction`` carries its own attributes. cobra also already
6+
covers a large part of the *behaviour*:
7+
8+
* ``Reaction.build_reaction_from_string`` parses equation strings, coefficients,
9+
and reversibility arrows (``<=>``, ``-->``, ``=>``) and creates unknown
10+
metabolites — but only matching metabolites **by ID**, and it leaves new
11+
metabolites with ``compartment=None``.
12+
* assigning ``reaction.gene_reaction_rule`` auto-creates ``Gene`` objects.
13+
14+
So this port keeps only the parts cobra lacks:
15+
16+
* **name-based matching** — interpret equation tokens as metabolite *names*
17+
(RAVEN eqnType 2) or as ``name[comp]`` (eqnType 3), not just IDs;
18+
* **correct compartment** assignment for newly created metabolites;
19+
* **strict policies** — optionally *error* (rather than silently create) on
20+
unknown metabolites or genes, and always error on a duplicate reaction ID
21+
(cobra silently ignores those).
22+
23+
Instead of RAVEN's ``eqnType`` integer (1/2/3) the matching mode is a readable
24+
keyword: ``mets_by="id"`` or ``mets_by="name"``, with ``name[comp]`` recognised
25+
automatically. See IMPROVEMENTS.md (A-series) for the rationale.
26+
"""
27+
from __future__ import annotations
28+
29+
import re
30+
import warnings
31+
from collections import OrderedDict
32+
from collections.abc import Mapping, Sequence
33+
34+
import cobra
35+
from cobra import Metabolite, Reaction
36+
from cobra.core.gene import GPR
37+
38+
from raven_python.utils.parse import parse_name_comp
39+
40+
# Reversibility arrows. ``<=>`` must be tried before ``=>`` (it contains it).
41+
_REVERSIBLE_ARROWS = ("<=>",)
42+
_FORWARD_ARROWS = ("-->", "->", "=>")
43+
44+
45+
def _split_equation(equation: str) -> tuple[str, str, bool]:
46+
"""Split an equation into (lhs, rhs, reversible) on its arrow."""
47+
for arrow in _REVERSIBLE_ARROWS:
48+
if arrow in equation:
49+
lhs, rhs = equation.split(arrow, 1)
50+
return lhs, rhs, True
51+
for arrow in _FORWARD_ARROWS:
52+
if arrow in equation:
53+
lhs, rhs = equation.split(arrow, 1)
54+
return lhs, rhs, False
55+
raise ValueError(f"No reaction arrow (<=>, -->, =>) found in equation: {equation!r}")
56+
57+
58+
def _parse_side(side: str) -> list[tuple[float, str, str | None]]:
59+
"""Parse one side of an equation into ``[(coefficient, token, fallback), ...]``.
60+
61+
The ``fallback`` slot is for the ambiguous ``"<number> <rest>"`` shape: when
62+
matching by name, ``"2 oxoglutarate"`` could be either ``coeff=2, name="oxoglutarate"``
63+
or ``coeff=1, name="2 oxoglutarate"`` (a real chemistry name). We return the
64+
coefficient-split form as the primary and the full term as the fallback; the
65+
resolver picks whichever matches an existing metabolite. Pure-number heads
66+
with no name (``"2"``) and pure-name terms (``"glucose"``) have no fallback.
67+
"""
68+
terms: list[tuple[float, str, str | None]] = []
69+
for raw in side.split(" + "):
70+
term = raw.strip()
71+
if not term:
72+
continue
73+
head, _, tail = term.partition(" ")
74+
try:
75+
coeff = float(head)
76+
token = tail.strip()
77+
except ValueError:
78+
coeff, token = 1.0, term
79+
fallback = None
80+
else:
81+
# Coefficient-split succeeded. Keep the full term as a fallback when
82+
# the tail is non-empty so name-resolution can re-try it as one token.
83+
fallback = term if token else None
84+
if not token:
85+
raise ValueError(f"Missing metabolite after coefficient in term: {raw!r}")
86+
terms.append((coeff, token, fallback))
87+
return terms
88+
89+
90+
def _new_met_id(model: cobra.Model, prefix: str) -> str:
91+
"""Next free ``<prefix><int>`` metabolite ID (RAVEN m1, m2, ... scheme)."""
92+
pattern = re.compile(rf"^{re.escape(prefix)}(\d+)$")
93+
used = [int(m.group(1)) for met in model.metabolites if (m := pattern.match(met.id))]
94+
n = max(used) + 1 if used else 1
95+
while f"{prefix}{n}" in model.metabolites:
96+
n += 1
97+
return f"{prefix}{n}"
98+
99+
100+
def _try_existing(
101+
model: cobra.Model, token: str, *, mets_by: str, compartment: str | None
102+
) -> Metabolite | None:
103+
"""Look up ``token`` as an existing metabolite (no creation, no side effects).
104+
105+
Returns the matching metabolite or ``None``. Used by ``_stoichiometry`` to
106+
disambiguate the ``"<number> <rest>"`` shape: if a metabolite whose *name*
107+
(or id) literally contains a leading number exists, prefer it over splitting
108+
the number off as a coefficient.
109+
"""
110+
name, comp = parse_name_comp(token)
111+
if mets_by == "id" and comp is None:
112+
return model.metabolites.get_by_id(token) if token in model.metabolites else None
113+
target_comp = comp if comp is not None else compartment
114+
if target_comp is None:
115+
return None
116+
for met in model.metabolites:
117+
if met.name == name and met.compartment == target_comp:
118+
return met
119+
return None
120+
121+
122+
def _resolve_metabolite(
123+
model: cobra.Model,
124+
token: str,
125+
*,
126+
mets_by: str,
127+
compartment: str | None,
128+
allow_new_mets: bool,
129+
new_met_prefix: str,
130+
) -> Metabolite:
131+
"""Resolve an equation token to an existing or newly created Metabolite."""
132+
name, comp = parse_name_comp(token)
133+
134+
if mets_by == "id" and comp is None:
135+
# token is a metabolite ID
136+
if token in model.metabolites:
137+
return model.metabolites.get_by_id(token)
138+
if not allow_new_mets:
139+
raise ValueError(
140+
f"Unknown metabolite ID {token!r}; pass allow_new_mets=True to create it."
141+
)
142+
if compartment is None:
143+
raise ValueError(
144+
f"Cannot create metabolite {token!r}: no compartment given."
145+
)
146+
_warn_unknown_compartment(model, compartment, token)
147+
met = Metabolite(token, compartment=compartment)
148+
model.add_metabolites([met])
149+
return met
150+
151+
# name-based (mets_by="name") or explicit name[comp]
152+
target_comp = comp if comp is not None else compartment
153+
if target_comp is None:
154+
raise ValueError(
155+
f"Metabolite {token!r} matched by name needs a compartment; "
156+
"pass compartment=... or use the name[comp] syntax."
157+
)
158+
if comp is not None and target_comp not in model.compartments and not allow_new_mets:
159+
raise ValueError(f"Compartment {target_comp!r} is not in the model.")
160+
161+
matches = [
162+
met
163+
for met in model.metabolites
164+
if met.name == name and met.compartment == target_comp
165+
]
166+
if matches:
167+
return matches[0]
168+
if not allow_new_mets:
169+
raise ValueError(
170+
f"No metabolite named {name!r} in compartment {target_comp!r}; "
171+
"pass allow_new_mets=True to create it."
172+
)
173+
_warn_unknown_compartment(model, target_comp, name)
174+
met = Metabolite(_new_met_id(model, new_met_prefix), name=name, compartment=target_comp)
175+
model.add_metabolites([met])
176+
return met
177+
178+
179+
def _warn_unknown_compartment(model: cobra.Model, compartment: str, identifier: str) -> None:
180+
"""Warn when a new metabolite would be born into a not-yet-registered compartment.
181+
182+
Both ``mets_by`` paths previously created the metabolite without validating
183+
the compartment, so a typo (``"cyto"`` for ``"c"``) silently produced a
184+
one-metabolite ghost compartment. cobra inherits the compartment from the
185+
first metabolite assigned to it, so the fix is a warning, not a hard error.
186+
"""
187+
known = set(model.compartments) | set(model._compartments)
188+
if compartment not in known:
189+
warnings.warn(
190+
f"Creating metabolite {identifier!r} in unregistered compartment "
191+
f"{compartment!r} (existing: {sorted(known) or 'none'}); "
192+
"add the compartment first or check for a typo.",
193+
stacklevel=5,
194+
)
195+
196+
197+
def _stoichiometry(
198+
model: cobra.Model,
199+
equation: str,
200+
*,
201+
mets_by: str,
202+
compartment: str | None,
203+
allow_new_mets: bool,
204+
new_met_prefix: str,
205+
) -> tuple[dict[Metabolite, float], bool]:
206+
"""Parse an equation into a {Metabolite: net coefficient} dict + reversibility."""
207+
lhs, rhs, reversible = _split_equation(equation)
208+
coeffs: OrderedDict[Metabolite, float] = OrderedDict()
209+
had_terms = False
210+
for sign, side in ((-1.0, lhs), (1.0, rhs)):
211+
for coeff, token, fallback in _parse_side(side):
212+
had_terms = True
213+
# "<number> <name>" is ambiguous when the name itself starts with a
214+
# number (e.g. "2 oxoglutarate"). Prefer the full-term interpretation
215+
# when it matches an existing metabolite — otherwise fall through to
216+
# the coefficient-split form.
217+
met = None
218+
if fallback is not None:
219+
met = _try_existing(
220+
model, fallback, mets_by=mets_by, compartment=compartment
221+
)
222+
if met is not None:
223+
coeff = 1.0
224+
if met is None:
225+
met = _resolve_metabolite(
226+
model,
227+
token,
228+
mets_by=mets_by,
229+
compartment=compartment,
230+
allow_new_mets=allow_new_mets,
231+
new_met_prefix=new_met_prefix,
232+
)
233+
coeffs[met] = coeffs.get(met, 0.0) + sign * coeff
234+
# Drop metabolites that net to zero (present as both substrate and product).
235+
coeffs = OrderedDict((met, c) for met, c in coeffs.items() if c != 0.0)
236+
if had_terms and not coeffs:
237+
warnings.warn(
238+
f"Equation {equation!r} has no net metabolites (all terms cancelled); "
239+
"the reaction will be added with empty stoichiometry.",
240+
stacklevel=4,
241+
)
242+
return dict(coeffs), reversible
243+
244+
245+
def add_reactions_from_equations(
246+
model: cobra.Model,
247+
reactions: Sequence[Mapping],
248+
*,
249+
mets_by: str = "id",
250+
compartment: str | None = None,
251+
allow_new_mets: bool = True,
252+
allow_new_genes: bool = True,
253+
new_met_prefix: str = "m",
254+
) -> list[Reaction]:
255+
"""Add reactions defined by equation strings, matching mets by ID or name.
256+
Parameters
257+
----------
258+
model
259+
Target ``cobra.Model``, mutated in place.
260+
reactions
261+
Sequence of mappings, one per reaction. Recognised keys:
262+
263+
* ``id`` (**required**) — reaction ID; must not already exist.
264+
* ``equation`` (**required**) — e.g. ``"atp_c + h2o_c <=> adp_c + pi_c"``.
265+
Use ``<=>`` for reversible, ``-->``/``->``/``=>`` for irreversible.
266+
* ``name`` — reaction name.
267+
* ``bounds`` — ``(lower, upper)`` tuple; overrides the arrow.
268+
* ``gene_reaction_rule`` — GPR string.
269+
* ``subsystem`` — subsystem name.
270+
mets_by
271+
How bare equation tokens (without ``[comp]``) are matched:
272+
``"id"`` (RAVEN eqnType 1) or ``"name"`` (eqnType 2). A ``name[comp]``
273+
token (eqnType 3) is always matched by name + compartment.
274+
compartment
275+
Default compartment for new metabolites and for name-matched tokens
276+
without an explicit ``[comp]``.
277+
allow_new_mets
278+
If True (default), create metabolites not found. New metabolites get
279+
``compartment`` (id mode) or an auto ID ``m1``, ``m2``, ... (name mode).
280+
If False, an unknown metabolite raises.
281+
allow_new_genes
282+
If True (default), genes in a GPR are auto-created by cobra. If False,
283+
a GPR referencing a gene not already in the model raises.
284+
new_met_prefix
285+
Prefix for auto-generated metabolite IDs in name mode (default ``"m"``).
286+
287+
Returns
288+
-------
289+
list of cobra.Reaction
290+
The reactions added, in input order.
291+
"""
292+
if mets_by not in ("id", "name"):
293+
raise ValueError(f"mets_by must be 'id' or 'name', got {mets_by!r}")
294+
295+
known_genes = {gene.id for gene in model.genes}
296+
added: list[Reaction] = []
297+
298+
for spec in reactions:
299+
if "id" not in spec:
300+
raise ValueError(f"Reaction spec missing required 'id': {spec!r}")
301+
rxn_id = spec["id"]
302+
if rxn_id in model.reactions:
303+
raise ValueError(
304+
f"Reaction {rxn_id!r} already exists; use changeRxns or remove it first."
305+
)
306+
if "equation" not in spec:
307+
raise ValueError(f"Reaction {rxn_id!r} spec missing required 'equation'.")
308+
309+
coeffs, reversible = _stoichiometry(
310+
model,
311+
spec["equation"],
312+
mets_by=mets_by,
313+
compartment=compartment,
314+
allow_new_mets=allow_new_mets,
315+
new_met_prefix=new_met_prefix,
316+
)
317+
318+
rxn = Reaction(rxn_id, name=spec.get("name", ""))
319+
if "bounds" in spec:
320+
rxn.bounds = tuple(spec["bounds"])
321+
else:
322+
config = cobra.Configuration()
323+
lower = config.lower_bound if reversible else 0.0
324+
rxn.bounds = (lower, config.upper_bound)
325+
if "subsystem" in spec:
326+
rxn.subsystem = spec["subsystem"]
327+
328+
model.add_reactions([rxn])
329+
rxn.add_metabolites(coeffs)
330+
331+
rule = spec.get("gene_reaction_rule", "")
332+
if rule:
333+
if not allow_new_genes:
334+
missing = sorted(set(GPR.from_string(rule).genes) - known_genes)
335+
if missing:
336+
raise ValueError(
337+
f"Reaction {rxn_id!r} references genes not in the model: "
338+
f"{missing}. Set allow_new_genes=True or add them first."
339+
)
340+
rxn.gene_reaction_rule = rule
341+
known_genes.update(gene.id for gene in rxn.genes)
342+
343+
added.append(rxn)
344+
345+
return added

0 commit comments

Comments
 (0)