Skip to content

Commit 23616a2

Browse files
authored
Metabolic tasks and connectivity gap-filling (#6)
* Project scaffold: pyproject + package skeleton + README + LICENSE * Add GitHub Actions CI and the maintainer-scripts README * Add the foundation utilities: GPR, balance, parse, sort, validate * Add the model-manipulation layer (add, remove, transport, merge, etc.) * Add binary + data resolvers for external tools and published artefacts * Add YAML and SIF model I/O * Add Excel export and the Standard-GEM git-layout export * Add BLAST and DIAMOND wrappers for protein-homology searches * Add the homology-based draft model builder (getModelFromHomology port) * Add KEGG download, dump parser and taxonomy parser * Add KEGG HMM-library build and HMM-based KO assignment * Add KEGG species-model assembly (per-organism reconstruction) * Add KEGG artefact-build scripts and HMM-cutoff calibration docs * Add metabolic-task parsing and the check_tasks validator * Add connectivity gap-filling (MILP) against template models
1 parent a9c3e25 commit 23616a2

8 files changed

Lines changed: 1089 additions & 0 deletions

File tree

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
"""Connectivity gap-filling against template models.
2+
3+
:func:`connect_blocked_reactions` adds the fewest (lowest-penalty) template reactions so
4+
reactions blocked in a draft can carry flux. For the other gap-fill flavour (fill until
5+
the objective is feasible) use ``cobra.flux_analysis.gapfill``.
6+
"""
7+
from raven_python.gapfilling.fill import GapFillResult, connect_blocked_reactions
8+
9+
__all__ = ["GapFillResult", "connect_blocked_reactions"]
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
"""Connectivity gap-filling: add the fewest template reactions so reactions that are
2+
*blocked* in a draft can carry flux.
3+
4+
For the other gap-filling flavour (add the fewest template reactions until the model's
5+
own objective becomes feasible) use ``cobra.flux_analysis.gapfill`` — just align the
6+
template's metabolite ids to the draft first, since cobra matches by id.
7+
8+
It solves an MILP: pick the minimum-penalty subset of template reactions such that the
9+
blocked (irreversible) draft reactions can carry flux at steady state. Template
10+
metabolites are matched to the draft by ``name[compartment]`` (via
11+
:func:`add_reactions_from_model`), so templates in a different identifier namespace
12+
than the model still work. Per-reaction ``scores`` (higher = prefer to include) map to
13+
RAVEN's ``rxnScores``; the MILP minimises the penalty ``-score`` (default penalty
14+
``1.0``, i.e. minimise the number of reactions added).
15+
"""
16+
from __future__ import annotations
17+
18+
from collections.abc import Iterable
19+
from dataclasses import dataclass
20+
21+
import cobra
22+
from cobra.flux_analysis import find_blocked_reactions, flux_variability_analysis
23+
24+
from raven_python.manipulation.transfer import add_reactions_from_model
25+
26+
27+
@dataclass
28+
class GapFillResult:
29+
"""Outcome of a connectivity gap-fill.
30+
31+
``added_reactions`` are the template reaction ids added to ``model``;
32+
``newly_connected`` are draft reactions that were blocked but can now carry flux;
33+
``cannot_connect`` are blocked reactions left unconnectable.
34+
"""
35+
36+
added_reactions: list[str]
37+
newly_connected: list[str]
38+
cannot_connect: list[str]
39+
model: cobra.Model
40+
41+
42+
def _as_models(templates: cobra.Model | Iterable[cobra.Model]) -> list[cobra.Model]:
43+
return [templates] if isinstance(templates, cobra.Model) else list(templates)
44+
45+
46+
def _merge_templates(model: cobra.Model, templates: list[cobra.Model]) -> tuple[cobra.Model, list[str]]:
47+
"""Copy every template reaction (new ones only) into a working copy of ``model``.
48+
49+
Returns the working model and the ids of the reactions that came from templates
50+
(the gap-fill candidates). Metabolites are matched by ``name[compartment]``.
51+
"""
52+
working = model.copy()
53+
template_ids: list[str] = []
54+
for template in templates:
55+
new = [r.id for r in template.reactions if r.id not in working.reactions]
56+
if new:
57+
added = add_reactions_from_model(working, template, new, genes=False, note=None)
58+
template_ids += [r.id for r in added]
59+
return working, template_ids
60+
61+
62+
def _solve_min_templates(
63+
working: cobra.Model,
64+
template_ids: list[str],
65+
*,
66+
scores: dict[str, float] | None,
67+
penalty: float,
68+
allow_net_production: bool,
69+
) -> set[str] | None:
70+
"""MILP: minimum-penalty template reactions making ``working`` feasible.
71+
72+
The requirement (here, forced flux through the blocked reactions) must already be
73+
imposed on ``working``. Returns the template reaction ids to keep, or ``None`` if
74+
the problem is infeasible.
75+
"""
76+
prob = working.problem
77+
indicators: dict[str, object] = {}
78+
extra = []
79+
for rid in template_ids:
80+
rxn = working.reactions.get_by_id(rid)
81+
y = prob.Variable(f"_gf_keep_{rid}", type="binary")
82+
indicators[rid] = y
83+
# Flux is confined to [lb*y, ub*y]: zero unless the reaction is kept (y=1).
84+
extra.append(prob.Constraint(rxn.flux_expression - rxn.upper_bound * y, ub=0, name=f"_gf_ub_{rid}"))
85+
extra.append(prob.Constraint(rxn.flux_expression - rxn.lower_bound * y, lb=0, name=f"_gf_lb_{rid}"))
86+
working.add_cons_vars(list(indicators.values()) + extra)
87+
88+
if allow_net_production: # relax steady state to Sv >= 0 (mets may accumulate)
89+
for met in working.metabolites:
90+
working.constraints[met.id].ub = None
91+
92+
def pen(rid: str) -> float:
93+
return -scores[rid] if scores and rid in scores else penalty
94+
95+
working.objective = prob.Objective(
96+
sum(pen(rid) * indicators[rid] for rid in template_ids), direction="min"
97+
)
98+
working.slim_optimize()
99+
if working.solver.status != "optimal":
100+
return None
101+
return {rid for rid, y in indicators.items() if (y.primal or 0) > 0.5}
102+
103+
104+
def _build_filled(model: cobra.Model, templates: list[cobra.Model], chosen: set[str]) -> cobra.Model:
105+
filled = model.copy()
106+
remaining = set(chosen)
107+
for template in templates:
108+
ids = [r for r in remaining if r in template.reactions]
109+
if ids:
110+
add_reactions_from_model(filled, template, ids, genes=False, note="Added by connect_blocked_reactions")
111+
remaining -= set(ids)
112+
return filled
113+
114+
115+
def connect_blocked_reactions(
116+
model: cobra.Model,
117+
templates: cobra.Model | Iterable[cobra.Model],
118+
*,
119+
scores: dict[str, float] | None = None,
120+
penalty: float = 1.0,
121+
allow_net_production: bool = False,
122+
eps: float = 1.0,
123+
) -> GapFillResult:
124+
"""Add template reactions so blocked draft reactions can carry flux.
125+
126+
Finds reactions that
127+
cannot carry flux in ``model``, then adds the minimum-penalty set of template
128+
reactions that lets the (irreversible) ones carry flux, and returns the filled
129+
model. Like RAVEN, only irreversible blocked reactions are forced — reversible
130+
ones can carry flux trivially in the split formulation, so forcing them is
131+
uninformative.
132+
133+
For the *other* gap-filling flavour — adding reactions to make the model's
134+
objective feasible — use ``cobra.flux_analysis.gapfill`` after aligning the
135+
template's metabolite ids to the draft.
136+
137+
The draft is expected to have exchange reactions for its nutrients (otherwise most
138+
reactions are trivially blocked).
139+
"""
140+
templates = _as_models(templates)
141+
blocked = set(find_blocked_reactions(model))
142+
candidates = [r for r in blocked if model.reactions.get_by_id(r).lower_bound >= 0]
143+
144+
working, template_ids = _merge_templates(model, templates)
145+
146+
target: list[str] = []
147+
if candidates:
148+
fva = flux_variability_analysis(working, reaction_list=candidates, fraction_of_optimum=0.0)
149+
# A reaction can be missing from the FVA frame if the solver dropped it
150+
# (e.g. the reaction was eliminated upstream); treat that as "unreachable"
151+
# rather than letting the KeyError propagate.
152+
target = [
153+
r for r in candidates
154+
if r in fva.index and fva.at[r, "maximum"] > eps
155+
]
156+
157+
cannot = sorted(blocked - set(target))
158+
if not target:
159+
return GapFillResult([], [], cannot, model.copy())
160+
161+
for rid in target:
162+
working.reactions.get_by_id(rid).lower_bound = eps
163+
chosen = _solve_min_templates(
164+
working, template_ids, scores=scores, penalty=penalty,
165+
allow_net_production=allow_net_production,
166+
)
167+
if chosen is None:
168+
raise RuntimeError(
169+
"Gap-filling is infeasible: the blocked reactions cannot all carry flux "
170+
"even with every template reaction added."
171+
)
172+
return GapFillResult(sorted(chosen), sorted(target), cannot, _build_filled(model, templates, chosen))

src/raven_python/tasks/__init__.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
"""Metabolic task definition, parsing, and checking.
2+
3+
* :class:`Task` + :func:`parse_task_list` — the task-list file format.
4+
* :func:`check_tasks` + :class:`TaskResult` — run tasks against a model.
5+
* :func:`find_task_essential_reactions` + :class:`EssentialReactionsResult` — reactions
6+
a model must use to satisfy a task list (the input for (f)tINIT's task layer).
7+
"""
8+
from raven_python.tasks.check import (
9+
EssentialReactionsResult,
10+
TaskResult,
11+
check_tasks,
12+
find_task_essential_reactions,
13+
)
14+
from raven_python.tasks.tasklist import Task, parse_task_list
15+
16+
__all__ = [
17+
"EssentialReactionsResult",
18+
"Task",
19+
"TaskResult",
20+
"check_tasks",
21+
"find_task_essential_reactions",
22+
"parse_task_list",
23+
]

0 commit comments

Comments
 (0)