|
| 1 | +"""The ftINIT MILP — the faster staged variant of INIT. |
| 2 | +
|
| 3 | +ftINIT keeps tINIT's objective — pick the reaction subset best matching expression |
| 4 | +scores while staying flux-consistent — but with a cheaper MILP encoding that is the |
| 5 | +reason it is *fast*: a **positive-score reaction needs no binary**. Because the |
| 6 | +objective *maximises* ``Σ score·y`` with ``score > 0``, the optimiser pushes its |
| 7 | +continuous indicator ``y ∈ [0,1]`` to 1, and the gate ``net_flux ≥ force_on·y`` only |
| 8 | +lets ``y`` reach 1 if the reaction can actually carry flux. Only *negative*-score |
| 9 | +reactions need a true ``{0,1}`` binary (their indicator would otherwise sit at 0 for |
| 10 | +free). This roughly halves the integer count — the dominant MILP cost. |
| 11 | +
|
| 12 | +Reaction categories (RAVEN's six), by score sign × reversibility: |
| 13 | +
|
| 14 | +* **score 0** — left in the model, *not* in the problem: a free flux variable that can |
| 15 | + carry flux for connectivity but is neither scored nor removable. |
| 16 | +* **positive, irreversible** — continuous ``y∈[0,1]``; ``v ≥ force_on·y``. No binary. |
| 17 | +* **positive, reversible** — split ``v = v⁺ − v⁻``; continuous ``y``; a single |
| 18 | + direction binary keeps one of ``v⁺/v⁻`` at 0 (no fwd/back loop faking "on"); |
| 19 | + ``v⁺+v⁻ ≥ force_on·y``. |
| 20 | +* **negative, irreversible** — binary ``x∈{0,1}``; ``v ≤ ub·x``. |
| 21 | +* **negative, reversible** — split; binary ``x``; ``v⁺+v⁻ ≤ cap·x``. |
| 22 | +* **essential** — forced on (``v ≥ force_on_ess``); no indicator. Assumed already |
| 23 | + oriented irreversible in its forced direction (``prepINITModel`` does this). |
| 24 | +
|
| 25 | +Objective: **maximise** ``Σ score·indicator``. Unlike classic INIT |
| 26 | +(:func:`raven_python.init.run_init`), ftINIT does **not** reward production of every |
| 27 | +metabolite — ``prod_weight`` applies only to metabolomics-detected metabolites (not |
| 28 | +yet implemented; passing a non-empty ``metabolomics`` argument raises |
| 29 | +``NotImplementedError``). Connectivity comes solely from the flux gates plus any |
| 30 | +essential reactions. ``allow_excretion`` relaxes ``S·v = 0`` to ``≥ 0``; ``rem_pos_rev`` |
| 31 | +drops positive reversible reactions from the problem (used in the staging schedule). |
| 32 | +
|
| 33 | +Needs a MILP solver (cobra's configured optlang solver; only Gurobi is fully viable at |
| 34 | +genome scale — see ``docs/init_solver_benchmark.md``). Magic numbers |
| 35 | +(``force_on``/``force_on_ess`` = 0.1, ``big_m`` = 100) are exposed and scale-dependent; |
| 36 | +calibration tables are in ``docs/init_param_calibration.md``. ``big_m`` caps a *scored* |
| 37 | +reaction's flux in its on/off (direction) constraint — using a fixed 100 rather than |
| 38 | +the reaction's ±1000 bound keeps the LP relaxation tight (what makes the genome-scale |
| 39 | +MILP tractable). Free / essential reactions keep their real bounds. |
| 40 | +
|
| 41 | +⚠️ **Loops.** The MILP has *no* loopless constraint: an internal |
| 42 | +thermodynamically-infeasible cycle is flux-consistent (``S·v = 0``), so if its |
| 43 | +reactions carry positive net score the optimiser will "include" them with no real |
| 44 | +exchange flux. RAVEN tolerates this — loop-free models come from the staged pipeline |
| 45 | ++ exchange handling, and at genome scale real exchange reactions make such cycles not |
| 46 | +score-optimal. A loopless option could be layered on later if needed. |
| 47 | +""" |
| 48 | +from __future__ import annotations |
| 49 | + |
| 50 | +from collections.abc import Iterable, Mapping |
| 51 | +from dataclasses import dataclass, field |
| 52 | + |
| 53 | +import cobra |
| 54 | +from optlang.symbolics import Real, add, mul |
| 55 | + |
| 56 | +from raven_python.init.genes import remove_low_score_genes |
| 57 | +from raven_python.init.merge import group_rxn_scores |
| 58 | +from raven_python.init.steps import get_init_steps |
| 59 | +from raven_python.init.taskfill import fill_tasks |
| 60 | + |
| 61 | +_FORCE_ON = 0.1 # min flux for a reaction to count as "on" (RAVEN forceOnLim) |
| 62 | +_BIG_M = 100.0 # indicator/direction big-M cap on a *scored* reaction's flux (RAVEN's 100) |
| 63 | + |
| 64 | + |
| 65 | +@dataclass |
| 66 | +class FtInitResult: |
| 67 | + """Result of :func:`run_ftinit`.""" |
| 68 | + |
| 69 | + model: cobra.Model |
| 70 | + kept_reactions: list[str] |
| 71 | + deleted_reactions: list[str] |
| 72 | + fluxes: dict[str, float] |
| 73 | + objective: float |
| 74 | + on_reactions: set[str] = field(default_factory=set) # scored reactions turned on (indicator) |
| 75 | + |
| 76 | + |
| 77 | +def run_ftinit( |
| 78 | + model: cobra.Model, |
| 79 | + rxn_scores: Mapping[str, float] | None = None, |
| 80 | + *, |
| 81 | + essential_rxns: Iterable[str] | None = None, |
| 82 | + essential_directions: Mapping[str, int] | None = None, |
| 83 | + essential_force: Mapping[str, float] | None = None, |
| 84 | + allow_excretion: bool = False, |
| 85 | + rem_pos_rev: bool = False, |
| 86 | + ignore_mets: Iterable[str] = (), |
| 87 | + force_on: float = _FORCE_ON, |
| 88 | + force_on_ess: float = _FORCE_ON, |
| 89 | + big_m: float = _BIG_M, |
| 90 | + mip_gap: float | None = None, |
| 91 | + time_limit: float | None = None, |
| 92 | +) -> FtInitResult: |
| 93 | + """Run the single-step ftINIT MILP and return the extracted model. |
| 94 | +
|
| 95 | + ``rxn_scores`` maps reaction id → score (default 0 → reaction left free in the |
| 96 | + model, not scored or removable). ``essential_rxns`` are forced to carry flux |
| 97 | + (≥ ``force_on_ess``); ``essential_directions`` maps an essential reaction id to |
| 98 | + ``+1`` (forward) or ``-1`` (reverse) for the forced direction (default forward). |
| 99 | + ``ignore_mets`` are metabolite **names** whose mass balance is dropped (RAVEN's |
| 100 | + per-step "simple metabolite" removal, e.g. H2O/H+). See the module docstring for |
| 101 | + the formulation. This is the single-step variant; the staged schedule |
| 102 | + (:func:`raven_python.init.ftinit`) calls it per step. |
| 103 | + """ |
| 104 | + scores = dict(rxn_scores or {}) |
| 105 | + essential = set(essential_rxns or []) |
| 106 | + directions = dict(essential_directions or {}) |
| 107 | + essential_force = dict(essential_force or {}) |
| 108 | + ignore_met_names = set(ignore_mets) |
| 109 | + prob = model.problem |
| 110 | + opt = prob.Model() |
| 111 | + |
| 112 | + variables: list = [] |
| 113 | + constraints: list = [] |
| 114 | + flux_terms: dict[str, list[tuple[object, float]]] = {} # rxn id -> [(var, sign)] |
| 115 | + indicators: dict[str, tuple[object, float]] = {} # rxn id -> (indicator var, score) |
| 116 | + free_or_essential: set[str] = set() # kept regardless of an indicator |
| 117 | + |
| 118 | + def add_constraint(expr, **kw): |
| 119 | + constraints.append(prob.Constraint(expr, **kw)) |
| 120 | + |
| 121 | + for rxn in model.reactions: |
| 122 | + rid = rxn.id |
| 123 | + lb, ub = rxn.lower_bound, rxn.upper_bound |
| 124 | + score = float(scores.get(rid, 0.0)) |
| 125 | + if rem_pos_rev and score > 0 and lb < 0 < ub: |
| 126 | + score = 0.0 # staging step 1: positive reversibles dropped from the problem |
| 127 | + |
| 128 | + if rid in essential: |
| 129 | + # Forced to carry flux in its forced direction (default forward); respect a |
| 130 | + # stricter native bound if the model already forces more flux. The forced |
| 131 | + # magnitude may be set per reaction (RAVEN's min(0.99·|prev flux|, 0.1), so |
| 132 | + # a reaction is never forced above what it carried before). |
| 133 | + force = essential_force.get(rid, force_on_ess) if essential_force else force_on_ess |
| 134 | + if directions.get(rid, 1) >= 0: |
| 135 | + forced = min(force, ub) # clamp to capacity so we never make lb > ub |
| 136 | + v = prob.Variable(f"v_{rid}", lb=max(forced, lb, 0.0), ub=ub) |
| 137 | + else: # reverse: flux ≤ -force |
| 138 | + forced = min(force, -lb) |
| 139 | + v = prob.Variable(f"v_{rid}", lb=lb, ub=min(-forced, ub)) |
| 140 | + variables.append(v) |
| 141 | + flux_terms[rid] = [(v, 1.0)] |
| 142 | + free_or_essential.add(rid) |
| 143 | + continue |
| 144 | + |
| 145 | + if score == 0.0: # free: carries flux for connectivity, not scored/removable |
| 146 | + v = prob.Variable(f"v_{rid}", lb=lb, ub=ub) |
| 147 | + variables.append(v) |
| 148 | + flux_terms[rid] = [(v, 1.0)] |
| 149 | + free_or_essential.add(rid) |
| 150 | + continue |
| 151 | + |
| 152 | + reversible = lb < 0 < ub |
| 153 | + if reversible: |
| 154 | + vp = prob.Variable(f"vp_{rid}", lb=0.0, ub=ub) |
| 155 | + vn = prob.Variable(f"vn_{rid}", lb=0.0, ub=-lb) |
| 156 | + variables += [vp, vn] |
| 157 | + flux_terms[rid] = [(vp, 1.0), (vn, -1.0)] |
| 158 | + total = vp + vn # |flux| (one of vp/vn pinned to 0 below), used by the gates |
| 159 | + else: # single-direction: keep the model's own [lb, ub] (incl. any forced lb>0) |
| 160 | + v = prob.Variable(f"v_{rid}", lb=lb, ub=ub) |
| 161 | + variables.append(v) |
| 162 | + flux_terms[rid] = [(v, 1.0)] |
| 163 | + total = v if ub > 0 else -v # magnitude for a single-direction reaction |
| 164 | + |
| 165 | + if score > 0: |
| 166 | + y = prob.Variable(f"y_{rid}", lb=0.0, ub=1.0) # continuous indicator, no binary |
| 167 | + variables.append(y) |
| 168 | + indicators[rid] = (y, score) |
| 169 | + add_constraint(total - force_on * y, lb=0.0, name=f"on_{rid}") # y=1 ⇒ |flux| ≥ force_on |
| 170 | + if reversible: # one direction binary stops a fwd/back loop faking "on" |
| 171 | + b = prob.Variable(f"b_{rid}", type="binary") |
| 172 | + variables.append(b) |
| 173 | + add_constraint(vp - big_m * b, ub=0.0, name=f"dirp_{rid}") # vp ≤ M·b |
| 174 | + add_constraint(vn + big_m * b, ub=big_m, name=f"dirn_{rid}") # vn ≤ M·(1-b) |
| 175 | + else: # score < 0 |
| 176 | + x = prob.Variable(f"x_{rid}", type="binary") |
| 177 | + variables.append(x) |
| 178 | + indicators[rid] = (x, score) |
| 179 | + add_constraint(total - big_m * x, ub=0.0, name=f"off_{rid}") # flux>0 ⇒ x=1 |
| 180 | + |
| 181 | + # Steady state S·v {== 0 | >= 0}; ignored metabolites are left unbalanced. |
| 182 | + # Build each metabolite's balance as a *flat* list of (coeff·sign)·var terms and sum |
| 183 | + # it with optlang.symbolics.add. Python's builtin sum re-canonicalises a growing |
| 184 | + # sympy expression at every step (O(n²)); for hub metabolites that appear in ~10³ |
| 185 | + # reactions that is minutes per constraint. add() builds the sum in one pass. |
| 186 | + met_terms: dict = {m: [] for m in model.metabolites if m.name not in ignore_met_names} |
| 187 | + for rxn in model.reactions: |
| 188 | + terms = flux_terms[rxn.id] |
| 189 | + for met, coeff in rxn.metabolites.items(): |
| 190 | + bucket = met_terms.get(met) |
| 191 | + if bucket is None: |
| 192 | + continue |
| 193 | + for var, sign in terms: |
| 194 | + bucket.append(mul([Real(coeff * sign), var])) |
| 195 | + for termlist in met_terms.values(): |
| 196 | + if termlist: |
| 197 | + add_constraint(add(termlist), lb=0.0, ub=None if allow_excretion else 0.0) |
| 198 | + |
| 199 | + opt.add(variables + constraints) |
| 200 | + opt.objective = prob.Objective( |
| 201 | + add([mul([Real(score), ind]) for ind, score in indicators.values()]), direction="max" |
| 202 | + ) |
| 203 | + if time_limit is not None: |
| 204 | + opt.configuration.timeout = int(time_limit) |
| 205 | + if mip_gap is not None: |
| 206 | + try: # Gurobi-specific; harmless if the backend differs |
| 207 | + opt.problem.Params.MIPGap = mip_gap |
| 208 | + except Exception: # noqa: BLE001 |
| 209 | + pass |
| 210 | + opt.optimize() |
| 211 | + # Accept a near-optimal incumbent (when a MIP gap / time limit is set), as RAVEN does. |
| 212 | + if opt.status not in ("optimal", "feasible", "suboptimal", "time_limit"): |
| 213 | + raise RuntimeError(f"ftINIT MILP did not solve (status: {opt.status}).") |
| 214 | + |
| 215 | + # RAVEN: a reaction is "on" iff its indicator ≥ 0.5 (positive indicators are |
| 216 | + # continuous and can land fractionally when a reaction can carry only tiny flux). |
| 217 | + on = {rid for rid, (ind, _) in indicators.items() if (ind.primal or 0.0) >= 0.5} |
| 218 | + kept = free_or_essential | on |
| 219 | + deleted = [r.id for r in model.reactions if r.id not in kept] |
| 220 | + fluxes = { |
| 221 | + rid: sum(sign * (var.primal or 0.0) for var, sign in terms) |
| 222 | + for rid, terms in flux_terms.items() |
| 223 | + } |
| 224 | + |
| 225 | + out = model.copy() |
| 226 | + out.remove_reactions(deleted, remove_orphans=True) |
| 227 | + return FtInitResult(out, sorted(kept), sorted(deleted), fluxes, |
| 228 | + float(opt.objective.value), on_reactions=on) |
| 229 | + |
| 230 | + |
| 231 | +def ftinit( |
| 232 | + prep, |
| 233 | + rxn_scores: Mapping[str, float], |
| 234 | + *, |
| 235 | + gene_scores: Mapping[str, float] | None = None, |
| 236 | + series: str = "1+1", |
| 237 | + steps=None, |
| 238 | + fill_gaps: bool = True, |
| 239 | + metabolomics: Iterable[str] | None = None, |
| 240 | + force_on: float = _FORCE_ON, |
| 241 | + big_m: float = _BIG_M, |
| 242 | + mip_gap: float | None = None, |
| 243 | + time_limit: float | None = None, |
| 244 | +) -> cobra.Model: |
| 245 | + """Run the full ftINIT pipeline on prepData and return the context-specific model. |
| 246 | +
|
| 247 | + ``prep`` is a :class:`raven_python.init.PrepData`. ``rxn_scores`` maps **original** |
| 248 | + reaction id → score (e.g. from :func:`score_reactions_from_genes` on the template). |
| 249 | + Each step (:func:`raven_python.init.get_init_steps`) regroups scores under its |
| 250 | + ``ignore_mask``, fixes the reactions turned on by earlier steps as essential (in |
| 251 | + their flux direction), and solves :func:`run_ftinit` on the merged model. Reactions |
| 252 | + never turned on (and not essential or left-in) are removed from the reference model; |
| 253 | + exchange reactions are always kept (RAVEN re-adds them). |
| 254 | +
|
| 255 | + If ``fill_gaps`` and ``prep`` carries tasks, reactions are added back so every task |
| 256 | + is feasible (:func:`raven_python.init.fill_tasks`). If ``gene_scores`` is given, |
| 257 | + negative-scoring genes are pruned from the GPRs at the end |
| 258 | + (:func:`raven_python.init.remove_low_score_genes`). |
| 259 | +
|
| 260 | + Essential reactions are forced to carry ``force_on`` (default 0.1) of flux in the |
| 261 | + forced direction. On genome-scale models a stricter regime is needed (the previous |
| 262 | + step's actual carried flux instead of a flat 0.1) — exposed via per-reaction |
| 263 | + ``essential_force`` on :func:`run_ftinit`. |
| 264 | +
|
| 265 | + ``metabolomics`` (a list of detected metabolite names to reward producing) is |
| 266 | + **not yet implemented**: the linear merge eliminates degree-2 detected metabolites, |
| 267 | + so it needs a producer-group-mapping + negative-producer force-flux block — the |
| 268 | + most intricate MILP piece, for the least-used input. Passing a non-empty value |
| 269 | + raises ``NotImplementedError``. |
| 270 | +
|
| 271 | + ``mip_gap``/``time_limit`` are forwarded to each :func:`run_ftinit` solve. On |
| 272 | + genome-scale models they are essential for tractability — see |
| 273 | + ``docs/init_param_calibration.md`` for the calibration table. |
| 274 | + """ |
| 275 | + if metabolomics: |
| 276 | + raise NotImplementedError( |
| 277 | + "metabolomics production-bonus is not yet implemented." |
| 278 | + ) |
| 279 | + steps = steps if steps is not None else get_init_steps(series) |
| 280 | + min_model, group_of = prep.min_model, prep.group_of |
| 281 | + |
| 282 | + turned_on: dict[str, float] = {} # merged reaction id -> flux (accumulated) |
| 283 | + left_in: set[str] = set() # merged reactions with score 0 in the last step |
| 284 | + for step in steps: |
| 285 | + to_zero = prep.masks.ignored(step.ignore_mask) |
| 286 | + scores = group_rxn_scores(min_model, rxn_scores, prep.orig_rxn_ids, |
| 287 | + prep.group_ids, to_zero) |
| 288 | + essential = set(prep.essential_rxns) # pre-oriented forward (default direction) |
| 289 | + directions: dict[str, int] = {} |
| 290 | + ess_force: dict[str, float] = {} |
| 291 | + if step.how_to_use_prev == "essential": |
| 292 | + for rid, flux in turned_on.items(): |
| 293 | + essential.add(rid) |
| 294 | + directions[rid] = 1 if flux >= 0 else -1 |
| 295 | + # never force more flux than the reaction carried before (RAVEN) |
| 296 | + ess_force[rid] = min(abs(flux) * 0.99, force_on) |
| 297 | + res = run_ftinit( |
| 298 | + min_model, scores, essential_rxns=essential, essential_directions=directions, |
| 299 | + essential_force=ess_force, allow_excretion=step.allow_met_secr, |
| 300 | + rem_pos_rev=step.pos_rev_off, ignore_mets=step.mets_to_ignore, |
| 301 | + force_on=force_on, force_on_ess=force_on, big_m=big_m, |
| 302 | + mip_gap=mip_gap, time_limit=time_limit, |
| 303 | + ) |
| 304 | + for rid in res.on_reactions: |
| 305 | + turned_on[rid] = res.fluxes[rid] |
| 306 | + left_in = {rid for rid, s in scores.items() if s == 0.0} |
| 307 | + |
| 308 | + # Merged reactions to keep: turned on + permanently essential + left-in (score 0). |
| 309 | + kept_min = set(turned_on) | set(prep.essential_rxns) | left_in |
| 310 | + deleted_min = [r.id for r in min_model.reactions if r.id not in kept_min] |
| 311 | + |
| 312 | + # Map deleted merged reactions back to all originals in their groups. |
| 313 | + removed_groups = {group_of[rid] for rid in deleted_min if group_of[rid] != 0} |
| 314 | + to_remove = {o for o in prep.orig_rxn_ids if group_of[o] and group_of[o] in removed_groups} |
| 315 | + to_remove |= {rid for rid in deleted_min if group_of[rid] == 0} # unmerged |
| 316 | + # Keep the surviving originals plus all exchange reactions (always re-added). |
| 317 | + final_kept = (set(prep.orig_rxn_ids) - to_remove) | prep.masks.exchange |
| 318 | + |
| 319 | + out = prep.ref_model.copy() |
| 320 | + out.remove_reactions([r.id for r in out.reactions if r.id not in final_kept], |
| 321 | + remove_orphans=True) |
| 322 | + |
| 323 | + if fill_gaps and prep.tasks: # add reactions back so every task is feasible |
| 324 | + out = fill_tasks(out, prep.ref_model, prep.tasks, rxn_scores=rxn_scores, |
| 325 | + mip_gap=mip_gap, time_limit=time_limit).model |
| 326 | + if gene_scores is not None: # prune negative-scoring genes from the GPRs |
| 327 | + out, _ = remove_low_score_genes(out, gene_scores) |
| 328 | + return out |
0 commit comments