SysBioChalmers
diff --git a/‎src/raven_python/init/__init__.py‎
Lines changed: 46 additions & 0 deletions b/‎src/raven_python/init/__init__.py‎
Lines changed: 46 additions & 0 deletions
diff --git a/‎src/raven_python/init/build.py‎
Lines changed: 113 additions & 0 deletions b/‎src/raven_python/init/build.py‎
Lines changed: 113 additions & 0 deletions
diff --git a/‎src/raven_python/init/genes.py‎
Lines changed: 85 additions & 0 deletions b/‎src/raven_python/init/genes.py‎
Lines changed: 85 additions & 0 deletions
@@ -0,0 +1,46 @@
+"""Context-specific model extraction (tINIT / ftINIT).
+
+tINIT:
+* :func:`run_init` — the classic INIT MILP.
+* :func:`score_reactions_from_genes` / :func:`gene_scores_from_expression` —
+  gene → reaction scoring (RNA-seq is the common upstream).
+* :func:`get_init_model` — the tINIT pipeline (dead-end removal + ``run_init``).
+
+ftINIT (faster, staged):
+* :func:`run_ftinit` — the single-step ftINIT MILP (continuous indicators for
+  positive-score reactions; binaries only on negatives — the speedup over ``run_init``).
+* :func:`ftinit` — the full pipeline (``prep_init_model`` → staged ``run_ftinit`` →
+  ``fill_tasks`` → ``remove_low_score_genes``).
+"""
+from raven_python.init.build import InitModelResult, get_init_model
+from raven_python.init.ftinit import FtInitResult, ftinit, run_ftinit
+from raven_python.init.genes import remove_low_score_genes
+from raven_python.init.init import InitResult, run_init
+from raven_python.init.merge import group_rxn_scores, merge_linear
+from raven_python.init.prep import PrepData, ReactionMasks, classify_reactions, prep_init_model
+from raven_python.init.score import gene_scores_from_expression, score_reactions_from_genes
+from raven_python.init.steps import InitStep, get_init_steps
+from raven_python.init.taskfill import TaskFillResult, fill_tasks
+
+__all__ = [
+    "FtInitResult",
+    "InitModelResult",
+    "InitResult",
+    "InitStep",
+    "PrepData",
+    "ReactionMasks",
+    "TaskFillResult",
+    "classify_reactions",
+    "fill_tasks",
+    "ftinit",
+    "gene_scores_from_expression",
+    "get_init_model",
+    "get_init_steps",
+    "group_rxn_scores",
+    "merge_linear",
+    "prep_init_model",
+    "remove_low_score_genes",
+    "run_ftinit",
+    "run_init",
+    "score_reactions_from_genes",
+]
@@ -0,0 +1,113 @@
+"""tINIT model building — high-level pipeline.
+
+Turn expression-derived scores into reaction scores (via the GPR), drop reactions that
+cannot carry flux, then run the INIT MILP to extract a context-specific model. Pass
+gene scores (typically from :func:`gene_scores_from_expression` or one of the omics
+loaders) or reaction scores directly. ``essential_rxns`` are forced kept.
+
+For task-aware gap-filling on top of the resulting model, use ftINIT
+(:func:`raven_python.init.ftinit`); ``get_init_model`` itself does not run the task layer.
+"""
+from __future__ import annotations
+
+from collections.abc import Iterable, Mapping
+from dataclasses import dataclass
+
+import cobra
+from cobra.flux_analysis import find_blocked_reactions
+
+from raven_python.init.init import run_init
+from raven_python.init.score import score_reactions_from_genes
+
+
+@dataclass
+class InitModelResult:
+    """Result of :func:`get_init_model`."""
+
+    model: cobra.Model
+    reaction_scores: dict[str, float]
+    deleted_dead_end_reactions: list[str]
+    deleted_in_init: list[str]
+    met_production: dict[str, bool]
+    objective: float
+
+
+def get_init_model(
+    ref_model: cobra.Model,
+    *,
+    rxn_scores: Mapping[str, float] | None = None,
+    gene_scores: Mapping[str, float] | None = None,
+    isozyme_scoring: str = "max",
+    complex_scoring: str = "min",
+    no_gene_score: float = -2.0,
+    essential_rxns: Iterable[str] | None = None,
+    present_mets: Iterable[str] | None = None,
+    prod_weight: float = 0.5,
+    allow_excretion: bool = True,
+    no_rev_loops: bool = False,
+    remove_dead_ends: bool = True,
+    eps: float = 1.0,
+    big_m: float | None = None,
+    mip_gap: float | None = None,
+    time_limit: float | None = None,
+) -> InitModelResult:
+    """Extract a context-specific model with tINIT.
+
+    Provide either ``rxn_scores`` (reaction id → score) or ``gene_scores`` (gene id →
+    score, converted via the GPR with :func:`score_reactions_from_genes`). Reactions
+    that cannot carry flux (with exchanges open) are removed first unless
+    ``remove_dead_ends=False``; ``essential_rxns`` are kept regardless. The remaining
+    model is passed to :func:`run_init`.
+    """
+    if (rxn_scores is None) == (gene_scores is None):
+        raise ValueError("Provide exactly one of rxn_scores or gene_scores.")
+
+    model = ref_model.copy()
+    essential = set(essential_rxns or [])
+    if gene_scores is not None:
+        scores = score_reactions_from_genes(
+            model, gene_scores, isozyme_scoring=isozyme_scoring,
+            complex_scoring=complex_scoring, no_gene_score=no_gene_score,
+        )
+    else:
+        scores = dict(rxn_scores)
+
+    deleted_dead_end: list[str] = []
+    if remove_dead_ends:
+        # Identify and drop reactions that cannot carry flux even under the
+        # *most permissive* boundary regime: every metabolite open for excretion
+        # (when ``allow_excretion``) plus the exchange-opened FVA. That makes
+        # the pre-filter conservative — only reactions blocked under both lax
+        # and strict regimes are removed, so the strict run_init path never
+        # loses a candidate it could have used.
+        probe = model.copy()
+        original_ids = {r.id for r in model.reactions}
+        if allow_excretion:
+            has_boundary = {m.id for r in probe.boundary for m in r.metabolites}
+            for met in list(probe.metabolites):
+                if met.id not in has_boundary:
+                    probe.add_boundary(met, type="demand")
+        blocked = set(find_blocked_reactions(probe, open_exchanges=True))
+        deleted_dead_end = sorted((blocked & original_ids) - essential)
+        model.remove_reactions(deleted_dead_end, remove_orphans=True)
+
+    result = run_init(
+        model, scores,
+        present_mets=present_mets,
+        essential_rxns=essential & {r.id for r in model.reactions},
+        prod_weight=prod_weight,
+        allow_excretion=allow_excretion,
+        no_rev_loops=no_rev_loops,
+        eps=eps,
+        big_m=big_m,
+        mip_gap=mip_gap,
+        time_limit=time_limit,
+    )
+    return InitModelResult(
+        model=result.model,
+        reaction_scores=scores,
+        deleted_dead_end_reactions=deleted_dead_end,
+        deleted_in_init=result.deleted_reactions,
+        met_production=result.met_production,
+        objective=result.objective,
+    )
@@ -0,0 +1,85 @@
+"""Prune low-scoring genes from a model — the last ftINIT step.
+
+Drop negative-scoring genes from each reaction's GPR, while
+respecting enzyme structure — genes joined by **OR** (isozymes) are candidates for
+removal, but at least one must remain (the least-negative if all are negative);
+genes joined by **AND** (complex subunits) are *not* removed individually, though a
+whole complex can be dropped as one isozyme alternative if its (aggregated) score is
+negative. Operates on cobra's GPR AST recursively, so nested rules like
+``G1 and (G2 or G3) and G4`` prune the inner isozyme group correctly.
+"""
+from __future__ import annotations
+
+import ast
+import statistics
+from collections.abc import Mapping
+
+import cobra
+from cobra.manipulation import remove_genes
+
+_AGG = {"min": min, "max": max, "median": statistics.median, "average": statistics.fmean}
+
+
+def _prune(node, scores, iso, cplx) -> tuple[str | None, float | None]:
+    """Return (pruned GPR string, aggregate score) for an AST node, or (None, None)."""
+    if isinstance(node, ast.Name):
+        return node.id, scores.get(node.id)  # None = unscored (NaN: never removed)
+    if not isinstance(node, ast.BoolOp):
+        return None, None
+
+    children = [_prune(v, scores, iso, cplx) for v in node.values]
+    children = [(s, sc) for s, sc in children if s is not None]
+
+    if isinstance(node.op, ast.And):  # complex: keep every subunit, prune nested ORs
+        kept = children
+    else:  # OR / isozymes: drop negative-scoring alternatives, keep at least one
+        kept = [(s, sc) for s, sc in children if sc is None or sc >= 0]
+        if not kept:  # all negative → keep the least-negative
+            kept = [max(children, key=lambda c: c[1])]
+
+    parts = [s for s, _ in kept]
+    score_vals = [sc for _, sc in kept if sc is not None]
+    agg = (cplx if isinstance(node.op, ast.And) else iso)
+    score = agg(score_vals) if score_vals else None
+    op = " and " if isinstance(node.op, ast.And) else " or "
+    text = parts[0] if len(parts) == 1 else "(" + op.join(parts) + ")"
+    return text, score
+
+
+def remove_low_score_genes(
+    model: cobra.Model,
+    gene_scores: Mapping[str, float],
+    *,
+    isozyme_scoring: str = "max",
+    complex_scoring: str = "min",
+) -> tuple[cobra.Model, list[str]]:
+    """Remove negative-scoring genes from GPRs (RAVEN ``removeLowScoreGenes``).
+
+    ``gene_scores`` maps gene id → score; genes absent from it are treated as unscored
+    (never removed). Returns ``(new_model, removed_gene_ids)`` — genes dropped from
+    *every* rule they were in (and thus from the model). ``isozyme_scoring`` /
+    ``complex_scoring`` aggregate alternative/subunit scores (``max``/``min`` default).
+
+    When all isozyme alternatives are negative the least-negative one is kept
+    **deterministically** (first on a tie), unlike RAVEN's random tie-break — same
+    quality, reproducible.
+    """
+    for name, value in (("isozyme_scoring", isozyme_scoring), ("complex_scoring", complex_scoring)):
+        if value not in _AGG:
+            raise ValueError(f"{name} must be one of {sorted(_AGG)}; got {value!r}.")
+    iso, cplx = _AGG[isozyme_scoring], _AGG[complex_scoring]
+
+    out = model.copy()
+    for rxn in out.reactions:
+        body = rxn.gpr.body
+        if body is None or not rxn.genes:
+            continue
+        pruned, _ = _prune(body, gene_scores, iso, cplx)
+        if pruned is not None:
+            rxn.gene_reaction_rule = pruned
+
+    used = {g.id for rxn in out.reactions for g in rxn.genes}
+    removed = sorted(g.id for g in out.genes if g.id not in used)
+    if removed:
+        remove_genes(out, removed, remove_reactions=False)
+    return out, removed