Add connectivity gap-filling (MILP) against template models

edkerk · edkerk · commit 2bbd4e6be538 · 2026-05-29T23:43:20.000+02:00
diff --git a/src/raven_python/gapfilling/__init__.py b/src/raven_python/gapfilling/__init__.py
@@ -0,0 +1,9 @@
+"""Connectivity gap-filling against template models.
+
+:func:`connect_blocked_reactions` adds the fewest (lowest-penalty) template reactions so
+reactions blocked in a draft can carry flux. For the other gap-fill flavour (fill until
+the objective is feasible) use ``cobra.flux_analysis.gapfill``.
+"""
+from raven_python.gapfilling.fill import GapFillResult, connect_blocked_reactions
+
+__all__ = ["GapFillResult", "connect_blocked_reactions"]
diff --git a/src/raven_python/gapfilling/fill.py b/src/raven_python/gapfilling/fill.py
@@ -0,0 +1,172 @@
+"""Connectivity gap-filling: add the fewest template reactions so reactions that are
+*blocked* in a draft can carry flux.
+
+For the other gap-filling flavour (add the fewest template reactions until the model's
+own objective becomes feasible) use ``cobra.flux_analysis.gapfill`` — just align the
+template's metabolite ids to the draft first, since cobra matches by id.
+
+It solves an MILP: pick the minimum-penalty subset of template reactions such that the
+blocked (irreversible) draft reactions can carry flux at steady state. Template
+metabolites are matched to the draft by ``name[compartment]`` (via
+:func:`add_reactions_from_model`), so templates in a different identifier namespace
+than the model still work. Per-reaction ``scores`` (higher = prefer to include) map to
+RAVEN's ``rxnScores``; the MILP minimises the penalty ``-score`` (default penalty
+``1.0``, i.e. minimise the number of reactions added).
+"""
+from __future__ import annotations
+
+from collections.abc import Iterable
+from dataclasses import dataclass
+
+import cobra
+from cobra.flux_analysis import find_blocked_reactions, flux_variability_analysis
+
+from raven_python.manipulation.transfer import add_reactions_from_model
+
+
+@dataclass
+class GapFillResult:
+    """Outcome of a connectivity gap-fill.
+
+    ``added_reactions`` are the template reaction ids added to ``model``;
+    ``newly_connected`` are draft reactions that were blocked but can now carry flux;
+    ``cannot_connect`` are blocked reactions left unconnectable.
+    """
+
+    added_reactions: list[str]
+    newly_connected: list[str]
+    cannot_connect: list[str]
+    model: cobra.Model
+
+
+def _as_models(templates: cobra.Model | Iterable[cobra.Model]) -> list[cobra.Model]:
+    return [templates] if isinstance(templates, cobra.Model) else list(templates)
+
+
+def _merge_templates(model: cobra.Model, templates: list[cobra.Model]) -> tuple[cobra.Model, list[str]]:
+    """Copy every template reaction (new ones only) into a working copy of ``model``.
+
+    Returns the working model and the ids of the reactions that came from templates
+    (the gap-fill candidates). Metabolites are matched by ``name[compartment]``.
+    """
+    working = model.copy()
+    template_ids: list[str] = []
+    for template in templates:
+        new = [r.id for r in template.reactions if r.id not in working.reactions]
+        if new:
+            added = add_reactions_from_model(working, template, new, genes=False, note=None)
+            template_ids += [r.id for r in added]
+    return working, template_ids
+
+
+def _solve_min_templates(
+    working: cobra.Model,
+    template_ids: list[str],
+    *,
+    scores: dict[str, float] | None,
+    penalty: float,
+    allow_net_production: bool,
+) -> set[str] | None:
+    """MILP: minimum-penalty template reactions making ``working`` feasible.
+
+    The requirement (here, forced flux through the blocked reactions) must already be
+    imposed on ``working``. Returns the template reaction ids to keep, or ``None`` if
+    the problem is infeasible.
+    """
+    prob = working.problem
+    indicators: dict[str, object] = {}
+    extra = []
+    for rid in template_ids:
+        rxn = working.reactions.get_by_id(rid)
+        y = prob.Variable(f"_gf_keep_{rid}", type="binary")
+        indicators[rid] = y
+        # Flux is confined to [lb*y, ub*y]: zero unless the reaction is kept (y=1).
+        extra.append(prob.Constraint(rxn.flux_expression - rxn.upper_bound * y, ub=0, name=f"_gf_ub_{rid}"))
+        extra.append(prob.Constraint(rxn.flux_expression - rxn.lower_bound * y, lb=0, name=f"_gf_lb_{rid}"))
+    working.add_cons_vars(list(indicators.values()) + extra)
+
+    if allow_net_production:  # relax steady state to Sv >= 0 (mets may accumulate)
+        for met in working.metabolites:
+            working.constraints[met.id].ub = None
+
+    def pen(rid: str) -> float:
+        return -scores[rid] if scores and rid in scores else penalty
+
+    working.objective = prob.Objective(
+        sum(pen(rid) * indicators[rid] for rid in template_ids), direction="min"
+    )
+    working.slim_optimize()
+    if working.solver.status != "optimal":
+        return None
+    return {rid for rid, y in indicators.items() if (y.primal or 0) > 0.5}
+
+
+def _build_filled(model: cobra.Model, templates: list[cobra.Model], chosen: set[str]) -> cobra.Model:
+    filled = model.copy()
+    remaining = set(chosen)
+    for template in templates:
+        ids = [r for r in remaining if r in template.reactions]
+        if ids:
+            add_reactions_from_model(filled, template, ids, genes=False, note="Added by connect_blocked_reactions")
+            remaining -= set(ids)
+    return filled
+
+
+def connect_blocked_reactions(
+    model: cobra.Model,
+    templates: cobra.Model | Iterable[cobra.Model],
+    *,
+    scores: dict[str, float] | None = None,
+    penalty: float = 1.0,
+    allow_net_production: bool = False,
+    eps: float = 1.0,
+) -> GapFillResult:
+    """Add template reactions so blocked draft reactions can carry flux.
+
+    Finds reactions that
+    cannot carry flux in ``model``, then adds the minimum-penalty set of template
+    reactions that lets the (irreversible) ones carry flux, and returns the filled
+    model. Like RAVEN, only irreversible blocked reactions are forced — reversible
+    ones can carry flux trivially in the split formulation, so forcing them is
+    uninformative.
+
+    For the *other* gap-filling flavour — adding reactions to make the model's
+    objective feasible — use ``cobra.flux_analysis.gapfill`` after aligning the
+    template's metabolite ids to the draft.
+
+    The draft is expected to have exchange reactions for its nutrients (otherwise most
+    reactions are trivially blocked).
+    """
+    templates = _as_models(templates)
+    blocked = set(find_blocked_reactions(model))
+    candidates = [r for r in blocked if model.reactions.get_by_id(r).lower_bound >= 0]
+
+    working, template_ids = _merge_templates(model, templates)
+
+    target: list[str] = []
+    if candidates:
+        fva = flux_variability_analysis(working, reaction_list=candidates, fraction_of_optimum=0.0)
+        # A reaction can be missing from the FVA frame if the solver dropped it
+        # (e.g. the reaction was eliminated upstream); treat that as "unreachable"
+        # rather than letting the KeyError propagate.
+        target = [
+            r for r in candidates
+            if r in fva.index and fva.at[r, "maximum"] > eps
+        ]
+
+    cannot = sorted(blocked - set(target))
+    if not target:
+        return GapFillResult([], [], cannot, model.copy())
+
+    for rid in target:
+        working.reactions.get_by_id(rid).lower_bound = eps
+    chosen = _solve_min_templates(
+        working, template_ids, scores=scores, penalty=penalty,
+        allow_net_production=allow_net_production,
+    )
+    if chosen is None:
+        raise RuntimeError(
+            "Gap-filling is infeasible: the blocked reactions cannot all carry flux "
+            "even with every template reaction added."
+        )
+    return GapFillResult(sorted(chosen), sorted(target), cannot, _build_filled(model, templates, chosen))
diff --git a/tests/test_gapfilling.py b/tests/test_gapfilling.py
@@ -0,0 +1,109 @@
+"""Tests for connectivity gap-filling (gapfilling/fill.py, Phase 4b)."""
+import cobra
+import pytest
+
+from raven_python.gapfilling import GapFillResult, connect_blocked_reactions
+
+
+def _met(mid):
+    return cobra.Metabolite(mid, name=mid, compartment="c")
+
+
+@pytest.fixture
+def draft_and_template():
+    """Draft: EX_A -> A -> B (r1), but B has no consumer, so r1 is blocked.
+
+    Template supplies B -> C (r2) and an exchange for C, which unblocks r1.
+    """
+    A, B = _met("A_c"), _met("B_c")
+    draft = cobra.Model("draft")
+    exa = cobra.Reaction("EX_A", lower_bound=-10, upper_bound=1000)
+    exa.add_metabolites({A: 1})
+    r1 = cobra.Reaction("r1", lower_bound=0, upper_bound=1000)  # A -> B, irreversible
+    r1.add_metabolites({A: -1, B: 1})
+    draft.add_reactions([exa, r1])
+
+    template = cobra.Model("template")
+    r2 = cobra.Reaction("r2", lower_bound=0, upper_bound=1000)  # B -> C
+    r2.add_metabolites({_met("B_c"): -1, _met("C_c"): 1})
+    exc = cobra.Reaction("EX_C", lower_bound=-1000, upper_bound=1000)
+    exc.add_metabolites({_met("C_c"): -1})
+    extra = cobra.Reaction("r_unneeded", lower_bound=0, upper_bound=1000)  # D -> E, irrelevant
+    extra.add_metabolites({_met("D_c"): -1, _met("E_c"): 1})
+    template.add_reactions([r2, exc, extra])
+    return draft, template
+
+
+# --------------------------------------------------------------------------- #
+# Connectivity gap-fill
+# --------------------------------------------------------------------------- #
+def test_fill_gaps_connects_blocked_reaction(draft_and_template):
+    draft, template = draft_and_template
+    assert "r1" in cobra.flux_analysis.find_blocked_reactions(draft)  # precondition
+
+    res = connect_blocked_reactions(draft, template)
+    assert isinstance(res, GapFillResult)
+    assert "r1" in res.newly_connected
+    assert set(res.added_reactions) == {"r2", "EX_C"}  # both needed to drain B
+    assert "r_unneeded" not in res.added_reactions  # irrelevant template rxn not added
+
+
+def test_fill_gaps_returns_working_model_that_unblocks(draft_and_template):
+    draft, template = draft_and_template
+    res = connect_blocked_reactions(draft, template)
+    assert {"r2", "EX_C"} <= {r.id for r in res.model.reactions}
+    assert "r1" not in cobra.flux_analysis.find_blocked_reactions(res.model)
+    # original draft is untouched
+    assert "r2" not in {r.id for r in draft.reactions}
+
+
+def test_fill_gaps_nothing_to_do_when_unblocked(draft_and_template):
+    draft, template = draft_and_template
+    # give the draft its own drain so r1 is not blocked
+    drain = cobra.Reaction("EX_B", lower_bound=-1000, upper_bound=1000)
+    drain.add_metabolites({draft.metabolites.B_c: -1})
+    draft.add_reactions([drain])
+    res = connect_blocked_reactions(draft, template)
+    assert res.added_reactions == []
+    assert res.newly_connected == []
+
+
+def test_fill_gaps_scores_prefer_higher_scored_reactions():
+    # Two alternative single-reaction drains for B; scores should pick the preferred one.
+    A, B = _met("A_c"), _met("B_c")
+    draft = cobra.Model("draft")
+    exa = cobra.Reaction("EX_A", lower_bound=-10, upper_bound=1000)
+    exa.add_metabolites({A: 1})
+    r1 = cobra.Reaction("r1", lower_bound=0, upper_bound=1000)
+    r1.add_metabolites({A: -1, B: 1})
+    draft.add_reactions([exa, r1])
+    template = cobra.Model("t")
+    d1 = cobra.Reaction("drain1", lower_bound=-1000, upper_bound=1000)
+    d1.add_metabolites({_met("B_c"): -1})
+    d2 = cobra.Reaction("drain2", lower_bound=-1000, upper_bound=1000)
+    d2.add_metabolites({_met("B_c"): -1})
+    template.add_reactions([d1, d2])
+    # Scores are penalties (higher = preferred = cheaper to include); only one drain
+    # is needed, so the less-penalised drain1 is chosen.
+    res = connect_blocked_reactions(draft, template, scores={"drain1": -1.0, "drain2": -5.0})
+    assert res.added_reactions == ["drain1"]
+
+
+def test_unconnectable_reaction_reported_not_added():
+    # A blocked irreversible reaction that no template can connect: reported, no adds.
+    A, B = _met("A_c"), _met("B_c")
+    draft = cobra.Model("draft")
+    exa = cobra.Reaction("EX_A", lower_bound=-10, upper_bound=1000)
+    exa.add_metabolites({A: 1})
+    r1 = cobra.Reaction("r1", lower_bound=0, upper_bound=1000)  # A -> B, B has no drain
+    r1.add_metabolites({A: -1, B: 1})
+    draft.add_reactions([exa, r1])
+    template = cobra.Model("t")  # offers nothing that can drain B
+    noise = cobra.Reaction("noise", lower_bound=0, upper_bound=1000)
+    noise.add_metabolites({_met("X_c"): -1, _met("Y_c"): 1})
+    template.add_reactions([noise])
+
+    res = connect_blocked_reactions(draft, template)
+    assert res.added_reactions == []
+    assert res.newly_connected == []
+    assert "r1" in res.cannot_connect