Skip to content

Commit 07f2fab

Browse files
committed
Add the ftINIT pipeline and task-aware gap-filling
1 parent b39f336 commit 07f2fab

5 files changed

Lines changed: 894 additions & 0 deletions

File tree

src/raven_python/init/ftinit.py

Lines changed: 328 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,328 @@
1+
"""The ftINIT MILP — the faster staged variant of INIT.
2+
3+
ftINIT keeps tINIT's objective — pick the reaction subset best matching expression
4+
scores while staying flux-consistent — but with a cheaper MILP encoding that is the
5+
reason it is *fast*: a **positive-score reaction needs no binary**. Because the
6+
objective *maximises* ``Σ score·y`` with ``score > 0``, the optimiser pushes its
7+
continuous indicator ``y ∈ [0,1]`` to 1, and the gate ``net_flux ≥ force_on·y`` only
8+
lets ``y`` reach 1 if the reaction can actually carry flux. Only *negative*-score
9+
reactions need a true ``{0,1}`` binary (their indicator would otherwise sit at 0 for
10+
free). This roughly halves the integer count — the dominant MILP cost.
11+
12+
Reaction categories (RAVEN's six), by score sign × reversibility:
13+
14+
* **score 0** — left in the model, *not* in the problem: a free flux variable that can
15+
carry flux for connectivity but is neither scored nor removable.
16+
* **positive, irreversible** — continuous ``y∈[0,1]``; ``v ≥ force_on·y``. No binary.
17+
* **positive, reversible** — split ``v = v⁺ − v⁻``; continuous ``y``; a single
18+
direction binary keeps one of ``v⁺/v⁻`` at 0 (no fwd/back loop faking "on");
19+
``v⁺+v⁻ ≥ force_on·y``.
20+
* **negative, irreversible** — binary ``x∈{0,1}``; ``v ≤ ub·x``.
21+
* **negative, reversible** — split; binary ``x``; ``v⁺+v⁻ ≤ cap·x``.
22+
* **essential** — forced on (``v ≥ force_on_ess``); no indicator. Assumed already
23+
oriented irreversible in its forced direction (``prepINITModel`` does this).
24+
25+
Objective: **maximise** ``Σ score·indicator``. Unlike classic INIT
26+
(:func:`raven_python.init.run_init`), ftINIT does **not** reward production of every
27+
metabolite — ``prod_weight`` applies only to metabolomics-detected metabolites (not
28+
yet implemented; passing a non-empty ``metabolomics`` argument raises
29+
``NotImplementedError``). Connectivity comes solely from the flux gates plus any
30+
essential reactions. ``allow_excretion`` relaxes ``S·v = 0`` to ``≥ 0``; ``rem_pos_rev``
31+
drops positive reversible reactions from the problem (used in the staging schedule).
32+
33+
Needs a MILP solver (cobra's configured optlang solver; only Gurobi is fully viable at
34+
genome scale — see ``docs/init_solver_benchmark.md``). Magic numbers
35+
(``force_on``/``force_on_ess`` = 0.1, ``big_m`` = 100) are exposed and scale-dependent;
36+
calibration tables are in ``docs/init_param_calibration.md``. ``big_m`` caps a *scored*
37+
reaction's flux in its on/off (direction) constraint — using a fixed 100 rather than
38+
the reaction's ±1000 bound keeps the LP relaxation tight (what makes the genome-scale
39+
MILP tractable). Free / essential reactions keep their real bounds.
40+
41+
⚠️ **Loops.** The MILP has *no* loopless constraint: an internal
42+
thermodynamically-infeasible cycle is flux-consistent (``S·v = 0``), so if its
43+
reactions carry positive net score the optimiser will "include" them with no real
44+
exchange flux. RAVEN tolerates this — loop-free models come from the staged pipeline
45+
+ exchange handling, and at genome scale real exchange reactions make such cycles not
46+
score-optimal. A loopless option could be layered on later if needed.
47+
"""
48+
from __future__ import annotations
49+
50+
from collections.abc import Iterable, Mapping
51+
from dataclasses import dataclass, field
52+
53+
import cobra
54+
from optlang.symbolics import Real, add, mul
55+
56+
from raven_python.init.genes import remove_low_score_genes
57+
from raven_python.init.merge import group_rxn_scores
58+
from raven_python.init.steps import get_init_steps
59+
from raven_python.init.taskfill import fill_tasks
60+
61+
_FORCE_ON = 0.1 # min flux for a reaction to count as "on" (RAVEN forceOnLim)
62+
_BIG_M = 100.0 # indicator/direction big-M cap on a *scored* reaction's flux (RAVEN's 100)
63+
64+
65+
@dataclass
66+
class FtInitResult:
67+
"""Result of :func:`run_ftinit`."""
68+
69+
model: cobra.Model
70+
kept_reactions: list[str]
71+
deleted_reactions: list[str]
72+
fluxes: dict[str, float]
73+
objective: float
74+
on_reactions: set[str] = field(default_factory=set) # scored reactions turned on (indicator)
75+
76+
77+
def run_ftinit(
78+
model: cobra.Model,
79+
rxn_scores: Mapping[str, float] | None = None,
80+
*,
81+
essential_rxns: Iterable[str] | None = None,
82+
essential_directions: Mapping[str, int] | None = None,
83+
essential_force: Mapping[str, float] | None = None,
84+
allow_excretion: bool = False,
85+
rem_pos_rev: bool = False,
86+
ignore_mets: Iterable[str] = (),
87+
force_on: float = _FORCE_ON,
88+
force_on_ess: float = _FORCE_ON,
89+
big_m: float = _BIG_M,
90+
mip_gap: float | None = None,
91+
time_limit: float | None = None,
92+
) -> FtInitResult:
93+
"""Run the single-step ftINIT MILP and return the extracted model.
94+
95+
``rxn_scores`` maps reaction id → score (default 0 → reaction left free in the
96+
model, not scored or removable). ``essential_rxns`` are forced to carry flux
97+
(≥ ``force_on_ess``); ``essential_directions`` maps an essential reaction id to
98+
``+1`` (forward) or ``-1`` (reverse) for the forced direction (default forward).
99+
``ignore_mets`` are metabolite **names** whose mass balance is dropped (RAVEN's
100+
per-step "simple metabolite" removal, e.g. H2O/H+). See the module docstring for
101+
the formulation. This is the single-step variant; the staged schedule
102+
(:func:`raven_python.init.ftinit`) calls it per step.
103+
"""
104+
scores = dict(rxn_scores or {})
105+
essential = set(essential_rxns or [])
106+
directions = dict(essential_directions or {})
107+
essential_force = dict(essential_force or {})
108+
ignore_met_names = set(ignore_mets)
109+
prob = model.problem
110+
opt = prob.Model()
111+
112+
variables: list = []
113+
constraints: list = []
114+
flux_terms: dict[str, list[tuple[object, float]]] = {} # rxn id -> [(var, sign)]
115+
indicators: dict[str, tuple[object, float]] = {} # rxn id -> (indicator var, score)
116+
free_or_essential: set[str] = set() # kept regardless of an indicator
117+
118+
def add_constraint(expr, **kw):
119+
constraints.append(prob.Constraint(expr, **kw))
120+
121+
for rxn in model.reactions:
122+
rid = rxn.id
123+
lb, ub = rxn.lower_bound, rxn.upper_bound
124+
score = float(scores.get(rid, 0.0))
125+
if rem_pos_rev and score > 0 and lb < 0 < ub:
126+
score = 0.0 # staging step 1: positive reversibles dropped from the problem
127+
128+
if rid in essential:
129+
# Forced to carry flux in its forced direction (default forward); respect a
130+
# stricter native bound if the model already forces more flux. The forced
131+
# magnitude may be set per reaction (RAVEN's min(0.99·|prev flux|, 0.1), so
132+
# a reaction is never forced above what it carried before).
133+
force = essential_force.get(rid, force_on_ess) if essential_force else force_on_ess
134+
if directions.get(rid, 1) >= 0:
135+
forced = min(force, ub) # clamp to capacity so we never make lb > ub
136+
v = prob.Variable(f"v_{rid}", lb=max(forced, lb, 0.0), ub=ub)
137+
else: # reverse: flux ≤ -force
138+
forced = min(force, -lb)
139+
v = prob.Variable(f"v_{rid}", lb=lb, ub=min(-forced, ub))
140+
variables.append(v)
141+
flux_terms[rid] = [(v, 1.0)]
142+
free_or_essential.add(rid)
143+
continue
144+
145+
if score == 0.0: # free: carries flux for connectivity, not scored/removable
146+
v = prob.Variable(f"v_{rid}", lb=lb, ub=ub)
147+
variables.append(v)
148+
flux_terms[rid] = [(v, 1.0)]
149+
free_or_essential.add(rid)
150+
continue
151+
152+
reversible = lb < 0 < ub
153+
if reversible:
154+
vp = prob.Variable(f"vp_{rid}", lb=0.0, ub=ub)
155+
vn = prob.Variable(f"vn_{rid}", lb=0.0, ub=-lb)
156+
variables += [vp, vn]
157+
flux_terms[rid] = [(vp, 1.0), (vn, -1.0)]
158+
total = vp + vn # |flux| (one of vp/vn pinned to 0 below), used by the gates
159+
else: # single-direction: keep the model's own [lb, ub] (incl. any forced lb>0)
160+
v = prob.Variable(f"v_{rid}", lb=lb, ub=ub)
161+
variables.append(v)
162+
flux_terms[rid] = [(v, 1.0)]
163+
total = v if ub > 0 else -v # magnitude for a single-direction reaction
164+
165+
if score > 0:
166+
y = prob.Variable(f"y_{rid}", lb=0.0, ub=1.0) # continuous indicator, no binary
167+
variables.append(y)
168+
indicators[rid] = (y, score)
169+
add_constraint(total - force_on * y, lb=0.0, name=f"on_{rid}") # y=1 ⇒ |flux| ≥ force_on
170+
if reversible: # one direction binary stops a fwd/back loop faking "on"
171+
b = prob.Variable(f"b_{rid}", type="binary")
172+
variables.append(b)
173+
add_constraint(vp - big_m * b, ub=0.0, name=f"dirp_{rid}") # vp ≤ M·b
174+
add_constraint(vn + big_m * b, ub=big_m, name=f"dirn_{rid}") # vn ≤ M·(1-b)
175+
else: # score < 0
176+
x = prob.Variable(f"x_{rid}", type="binary")
177+
variables.append(x)
178+
indicators[rid] = (x, score)
179+
add_constraint(total - big_m * x, ub=0.0, name=f"off_{rid}") # flux>0 ⇒ x=1
180+
181+
# Steady state S·v {== 0 | >= 0}; ignored metabolites are left unbalanced.
182+
# Build each metabolite's balance as a *flat* list of (coeff·sign)·var terms and sum
183+
# it with optlang.symbolics.add. Python's builtin sum re-canonicalises a growing
184+
# sympy expression at every step (O(n²)); for hub metabolites that appear in ~10³
185+
# reactions that is minutes per constraint. add() builds the sum in one pass.
186+
met_terms: dict = {m: [] for m in model.metabolites if m.name not in ignore_met_names}
187+
for rxn in model.reactions:
188+
terms = flux_terms[rxn.id]
189+
for met, coeff in rxn.metabolites.items():
190+
bucket = met_terms.get(met)
191+
if bucket is None:
192+
continue
193+
for var, sign in terms:
194+
bucket.append(mul([Real(coeff * sign), var]))
195+
for termlist in met_terms.values():
196+
if termlist:
197+
add_constraint(add(termlist), lb=0.0, ub=None if allow_excretion else 0.0)
198+
199+
opt.add(variables + constraints)
200+
opt.objective = prob.Objective(
201+
add([mul([Real(score), ind]) for ind, score in indicators.values()]), direction="max"
202+
)
203+
if time_limit is not None:
204+
opt.configuration.timeout = int(time_limit)
205+
if mip_gap is not None:
206+
try: # Gurobi-specific; harmless if the backend differs
207+
opt.problem.Params.MIPGap = mip_gap
208+
except Exception: # noqa: BLE001
209+
pass
210+
opt.optimize()
211+
# Accept a near-optimal incumbent (when a MIP gap / time limit is set), as RAVEN does.
212+
if opt.status not in ("optimal", "feasible", "suboptimal", "time_limit"):
213+
raise RuntimeError(f"ftINIT MILP did not solve (status: {opt.status}).")
214+
215+
# RAVEN: a reaction is "on" iff its indicator ≥ 0.5 (positive indicators are
216+
# continuous and can land fractionally when a reaction can carry only tiny flux).
217+
on = {rid for rid, (ind, _) in indicators.items() if (ind.primal or 0.0) >= 0.5}
218+
kept = free_or_essential | on
219+
deleted = [r.id for r in model.reactions if r.id not in kept]
220+
fluxes = {
221+
rid: sum(sign * (var.primal or 0.0) for var, sign in terms)
222+
for rid, terms in flux_terms.items()
223+
}
224+
225+
out = model.copy()
226+
out.remove_reactions(deleted, remove_orphans=True)
227+
return FtInitResult(out, sorted(kept), sorted(deleted), fluxes,
228+
float(opt.objective.value), on_reactions=on)
229+
230+
231+
def ftinit(
232+
prep,
233+
rxn_scores: Mapping[str, float],
234+
*,
235+
gene_scores: Mapping[str, float] | None = None,
236+
series: str = "1+1",
237+
steps=None,
238+
fill_gaps: bool = True,
239+
metabolomics: Iterable[str] | None = None,
240+
force_on: float = _FORCE_ON,
241+
big_m: float = _BIG_M,
242+
mip_gap: float | None = None,
243+
time_limit: float | None = None,
244+
) -> cobra.Model:
245+
"""Run the full ftINIT pipeline on prepData and return the context-specific model.
246+
247+
``prep`` is a :class:`raven_python.init.PrepData`. ``rxn_scores`` maps **original**
248+
reaction id → score (e.g. from :func:`score_reactions_from_genes` on the template).
249+
Each step (:func:`raven_python.init.get_init_steps`) regroups scores under its
250+
``ignore_mask``, fixes the reactions turned on by earlier steps as essential (in
251+
their flux direction), and solves :func:`run_ftinit` on the merged model. Reactions
252+
never turned on (and not essential or left-in) are removed from the reference model;
253+
exchange reactions are always kept (RAVEN re-adds them).
254+
255+
If ``fill_gaps`` and ``prep`` carries tasks, reactions are added back so every task
256+
is feasible (:func:`raven_python.init.fill_tasks`). If ``gene_scores`` is given,
257+
negative-scoring genes are pruned from the GPRs at the end
258+
(:func:`raven_python.init.remove_low_score_genes`).
259+
260+
Essential reactions are forced to carry ``force_on`` (default 0.1) of flux in the
261+
forced direction. On genome-scale models a stricter regime is needed (the previous
262+
step's actual carried flux instead of a flat 0.1) — exposed via per-reaction
263+
``essential_force`` on :func:`run_ftinit`.
264+
265+
``metabolomics`` (a list of detected metabolite names to reward producing) is
266+
**not yet implemented**: the linear merge eliminates degree-2 detected metabolites,
267+
so it needs a producer-group-mapping + negative-producer force-flux block — the
268+
most intricate MILP piece, for the least-used input. Passing a non-empty value
269+
raises ``NotImplementedError``.
270+
271+
``mip_gap``/``time_limit`` are forwarded to each :func:`run_ftinit` solve. On
272+
genome-scale models they are essential for tractability — see
273+
``docs/init_param_calibration.md`` for the calibration table.
274+
"""
275+
if metabolomics:
276+
raise NotImplementedError(
277+
"metabolomics production-bonus is not yet implemented."
278+
)
279+
steps = steps if steps is not None else get_init_steps(series)
280+
min_model, group_of = prep.min_model, prep.group_of
281+
282+
turned_on: dict[str, float] = {} # merged reaction id -> flux (accumulated)
283+
left_in: set[str] = set() # merged reactions with score 0 in the last step
284+
for step in steps:
285+
to_zero = prep.masks.ignored(step.ignore_mask)
286+
scores = group_rxn_scores(min_model, rxn_scores, prep.orig_rxn_ids,
287+
prep.group_ids, to_zero)
288+
essential = set(prep.essential_rxns) # pre-oriented forward (default direction)
289+
directions: dict[str, int] = {}
290+
ess_force: dict[str, float] = {}
291+
if step.how_to_use_prev == "essential":
292+
for rid, flux in turned_on.items():
293+
essential.add(rid)
294+
directions[rid] = 1 if flux >= 0 else -1
295+
# never force more flux than the reaction carried before (RAVEN)
296+
ess_force[rid] = min(abs(flux) * 0.99, force_on)
297+
res = run_ftinit(
298+
min_model, scores, essential_rxns=essential, essential_directions=directions,
299+
essential_force=ess_force, allow_excretion=step.allow_met_secr,
300+
rem_pos_rev=step.pos_rev_off, ignore_mets=step.mets_to_ignore,
301+
force_on=force_on, force_on_ess=force_on, big_m=big_m,
302+
mip_gap=mip_gap, time_limit=time_limit,
303+
)
304+
for rid in res.on_reactions:
305+
turned_on[rid] = res.fluxes[rid]
306+
left_in = {rid for rid, s in scores.items() if s == 0.0}
307+
308+
# Merged reactions to keep: turned on + permanently essential + left-in (score 0).
309+
kept_min = set(turned_on) | set(prep.essential_rxns) | left_in
310+
deleted_min = [r.id for r in min_model.reactions if r.id not in kept_min]
311+
312+
# Map deleted merged reactions back to all originals in their groups.
313+
removed_groups = {group_of[rid] for rid in deleted_min if group_of[rid] != 0}
314+
to_remove = {o for o in prep.orig_rxn_ids if group_of[o] and group_of[o] in removed_groups}
315+
to_remove |= {rid for rid in deleted_min if group_of[rid] == 0} # unmerged
316+
# Keep the surviving originals plus all exchange reactions (always re-added).
317+
final_kept = (set(prep.orig_rxn_ids) - to_remove) | prep.masks.exchange
318+
319+
out = prep.ref_model.copy()
320+
out.remove_reactions([r.id for r in out.reactions if r.id not in final_kept],
321+
remove_orphans=True)
322+
323+
if fill_gaps and prep.tasks: # add reactions back so every task is feasible
324+
out = fill_tasks(out, prep.ref_model, prep.tasks, rxn_scores=rxn_scores,
325+
mip_gap=mip_gap, time_limit=time_limit).model
326+
if gene_scores is not None: # prune negative-scoring genes from the GPRs
327+
out, _ = remove_low_score_genes(out, gene_scores)
328+
return out

0 commit comments

Comments
 (0)