Skip to content

Commit b39f336

Browse files
committed
Add the tINIT (INIT) MILP and its supporting machinery
1 parent 2bbd4e6 commit b39f336

14 files changed

Lines changed: 1760 additions & 0 deletions

src/raven_python/init/__init__.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
"""Context-specific model extraction (tINIT / ftINIT).
2+
3+
tINIT:
4+
* :func:`run_init` — the classic INIT MILP.
5+
* :func:`score_reactions_from_genes` / :func:`gene_scores_from_expression` —
6+
gene → reaction scoring (RNA-seq is the common upstream).
7+
* :func:`get_init_model` — the tINIT pipeline (dead-end removal + ``run_init``).
8+
9+
ftINIT (faster, staged):
10+
* :func:`run_ftinit` — the single-step ftINIT MILP (continuous indicators for
11+
positive-score reactions; binaries only on negatives — the speedup over ``run_init``).
12+
* :func:`ftinit` — the full pipeline (``prep_init_model`` → staged ``run_ftinit`` →
13+
``fill_tasks`` → ``remove_low_score_genes``).
14+
"""
15+
from raven_python.init.build import InitModelResult, get_init_model
16+
from raven_python.init.ftinit import FtInitResult, ftinit, run_ftinit
17+
from raven_python.init.genes import remove_low_score_genes
18+
from raven_python.init.init import InitResult, run_init
19+
from raven_python.init.merge import group_rxn_scores, merge_linear
20+
from raven_python.init.prep import PrepData, ReactionMasks, classify_reactions, prep_init_model
21+
from raven_python.init.score import gene_scores_from_expression, score_reactions_from_genes
22+
from raven_python.init.steps import InitStep, get_init_steps
23+
from raven_python.init.taskfill import TaskFillResult, fill_tasks
24+
25+
__all__ = [
26+
"FtInitResult",
27+
"InitModelResult",
28+
"InitResult",
29+
"InitStep",
30+
"PrepData",
31+
"ReactionMasks",
32+
"TaskFillResult",
33+
"classify_reactions",
34+
"fill_tasks",
35+
"ftinit",
36+
"gene_scores_from_expression",
37+
"get_init_model",
38+
"get_init_steps",
39+
"group_rxn_scores",
40+
"merge_linear",
41+
"prep_init_model",
42+
"remove_low_score_genes",
43+
"run_ftinit",
44+
"run_init",
45+
"score_reactions_from_genes",
46+
]

src/raven_python/init/build.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
"""tINIT model building — high-level pipeline.
2+
3+
Turn expression-derived scores into reaction scores (via the GPR), drop reactions that
4+
cannot carry flux, then run the INIT MILP to extract a context-specific model. Pass
5+
gene scores (typically from :func:`gene_scores_from_expression` or one of the omics
6+
loaders) or reaction scores directly. ``essential_rxns`` are forced kept.
7+
8+
For task-aware gap-filling on top of the resulting model, use ftINIT
9+
(:func:`raven_python.init.ftinit`); ``get_init_model`` itself does not run the task layer.
10+
"""
11+
from __future__ import annotations
12+
13+
from collections.abc import Iterable, Mapping
14+
from dataclasses import dataclass
15+
16+
import cobra
17+
from cobra.flux_analysis import find_blocked_reactions
18+
19+
from raven_python.init.init import run_init
20+
from raven_python.init.score import score_reactions_from_genes
21+
22+
23+
@dataclass
24+
class InitModelResult:
25+
"""Result of :func:`get_init_model`."""
26+
27+
model: cobra.Model
28+
reaction_scores: dict[str, float]
29+
deleted_dead_end_reactions: list[str]
30+
deleted_in_init: list[str]
31+
met_production: dict[str, bool]
32+
objective: float
33+
34+
35+
def get_init_model(
36+
ref_model: cobra.Model,
37+
*,
38+
rxn_scores: Mapping[str, float] | None = None,
39+
gene_scores: Mapping[str, float] | None = None,
40+
isozyme_scoring: str = "max",
41+
complex_scoring: str = "min",
42+
no_gene_score: float = -2.0,
43+
essential_rxns: Iterable[str] | None = None,
44+
present_mets: Iterable[str] | None = None,
45+
prod_weight: float = 0.5,
46+
allow_excretion: bool = True,
47+
no_rev_loops: bool = False,
48+
remove_dead_ends: bool = True,
49+
eps: float = 1.0,
50+
big_m: float | None = None,
51+
mip_gap: float | None = None,
52+
time_limit: float | None = None,
53+
) -> InitModelResult:
54+
"""Extract a context-specific model with tINIT.
55+
56+
Provide either ``rxn_scores`` (reaction id → score) or ``gene_scores`` (gene id →
57+
score, converted via the GPR with :func:`score_reactions_from_genes`). Reactions
58+
that cannot carry flux (with exchanges open) are removed first unless
59+
``remove_dead_ends=False``; ``essential_rxns`` are kept regardless. The remaining
60+
model is passed to :func:`run_init`.
61+
"""
62+
if (rxn_scores is None) == (gene_scores is None):
63+
raise ValueError("Provide exactly one of rxn_scores or gene_scores.")
64+
65+
model = ref_model.copy()
66+
essential = set(essential_rxns or [])
67+
if gene_scores is not None:
68+
scores = score_reactions_from_genes(
69+
model, gene_scores, isozyme_scoring=isozyme_scoring,
70+
complex_scoring=complex_scoring, no_gene_score=no_gene_score,
71+
)
72+
else:
73+
scores = dict(rxn_scores)
74+
75+
deleted_dead_end: list[str] = []
76+
if remove_dead_ends:
77+
# Identify and drop reactions that cannot carry flux even under the
78+
# *most permissive* boundary regime: every metabolite open for excretion
79+
# (when ``allow_excretion``) plus the exchange-opened FVA. That makes
80+
# the pre-filter conservative — only reactions blocked under both lax
81+
# and strict regimes are removed, so the strict run_init path never
82+
# loses a candidate it could have used.
83+
probe = model.copy()
84+
original_ids = {r.id for r in model.reactions}
85+
if allow_excretion:
86+
has_boundary = {m.id for r in probe.boundary for m in r.metabolites}
87+
for met in list(probe.metabolites):
88+
if met.id not in has_boundary:
89+
probe.add_boundary(met, type="demand")
90+
blocked = set(find_blocked_reactions(probe, open_exchanges=True))
91+
deleted_dead_end = sorted((blocked & original_ids) - essential)
92+
model.remove_reactions(deleted_dead_end, remove_orphans=True)
93+
94+
result = run_init(
95+
model, scores,
96+
present_mets=present_mets,
97+
essential_rxns=essential & {r.id for r in model.reactions},
98+
prod_weight=prod_weight,
99+
allow_excretion=allow_excretion,
100+
no_rev_loops=no_rev_loops,
101+
eps=eps,
102+
big_m=big_m,
103+
mip_gap=mip_gap,
104+
time_limit=time_limit,
105+
)
106+
return InitModelResult(
107+
model=result.model,
108+
reaction_scores=scores,
109+
deleted_dead_end_reactions=deleted_dead_end,
110+
deleted_in_init=result.deleted_reactions,
111+
met_production=result.met_production,
112+
objective=result.objective,
113+
)

src/raven_python/init/genes.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
"""Prune low-scoring genes from a model — the last ftINIT step.
2+
3+
Drop negative-scoring genes from each reaction's GPR, while
4+
respecting enzyme structure — genes joined by **OR** (isozymes) are candidates for
5+
removal, but at least one must remain (the least-negative if all are negative);
6+
genes joined by **AND** (complex subunits) are *not* removed individually, though a
7+
whole complex can be dropped as one isozyme alternative if its (aggregated) score is
8+
negative. Operates on cobra's GPR AST recursively, so nested rules like
9+
``G1 and (G2 or G3) and G4`` prune the inner isozyme group correctly.
10+
"""
11+
from __future__ import annotations
12+
13+
import ast
14+
import statistics
15+
from collections.abc import Mapping
16+
17+
import cobra
18+
from cobra.manipulation import remove_genes
19+
20+
_AGG = {"min": min, "max": max, "median": statistics.median, "average": statistics.fmean}
21+
22+
23+
def _prune(node, scores, iso, cplx) -> tuple[str | None, float | None]:
24+
"""Return (pruned GPR string, aggregate score) for an AST node, or (None, None)."""
25+
if isinstance(node, ast.Name):
26+
return node.id, scores.get(node.id) # None = unscored (NaN: never removed)
27+
if not isinstance(node, ast.BoolOp):
28+
return None, None
29+
30+
children = [_prune(v, scores, iso, cplx) for v in node.values]
31+
children = [(s, sc) for s, sc in children if s is not None]
32+
33+
if isinstance(node.op, ast.And): # complex: keep every subunit, prune nested ORs
34+
kept = children
35+
else: # OR / isozymes: drop negative-scoring alternatives, keep at least one
36+
kept = [(s, sc) for s, sc in children if sc is None or sc >= 0]
37+
if not kept: # all negative → keep the least-negative
38+
kept = [max(children, key=lambda c: c[1])]
39+
40+
parts = [s for s, _ in kept]
41+
score_vals = [sc for _, sc in kept if sc is not None]
42+
agg = (cplx if isinstance(node.op, ast.And) else iso)
43+
score = agg(score_vals) if score_vals else None
44+
op = " and " if isinstance(node.op, ast.And) else " or "
45+
text = parts[0] if len(parts) == 1 else "(" + op.join(parts) + ")"
46+
return text, score
47+
48+
49+
def remove_low_score_genes(
50+
model: cobra.Model,
51+
gene_scores: Mapping[str, float],
52+
*,
53+
isozyme_scoring: str = "max",
54+
complex_scoring: str = "min",
55+
) -> tuple[cobra.Model, list[str]]:
56+
"""Remove negative-scoring genes from GPRs (RAVEN ``removeLowScoreGenes``).
57+
58+
``gene_scores`` maps gene id → score; genes absent from it are treated as unscored
59+
(never removed). Returns ``(new_model, removed_gene_ids)`` — genes dropped from
60+
*every* rule they were in (and thus from the model). ``isozyme_scoring`` /
61+
``complex_scoring`` aggregate alternative/subunit scores (``max``/``min`` default).
62+
63+
When all isozyme alternatives are negative the least-negative one is kept
64+
**deterministically** (first on a tie), unlike RAVEN's random tie-break — same
65+
quality, reproducible.
66+
"""
67+
for name, value in (("isozyme_scoring", isozyme_scoring), ("complex_scoring", complex_scoring)):
68+
if value not in _AGG:
69+
raise ValueError(f"{name} must be one of {sorted(_AGG)}; got {value!r}.")
70+
iso, cplx = _AGG[isozyme_scoring], _AGG[complex_scoring]
71+
72+
out = model.copy()
73+
for rxn in out.reactions:
74+
body = rxn.gpr.body
75+
if body is None or not rxn.genes:
76+
continue
77+
pruned, _ = _prune(body, gene_scores, iso, cplx)
78+
if pruned is not None:
79+
rxn.gene_reaction_rule = pruned
80+
81+
used = {g.id for rxn in out.reactions for g in rxn.genes}
82+
removed = sorted(g.id for g in out.genes if g.id not in used)
83+
if removed:
84+
remove_genes(out, removed, remove_reactions=False)
85+
return out, removed

0 commit comments

Comments
 (0)