Add Excel export and the Standard-GEM git-layout export

edkerk · edkerk · commit 7a9b69accd99 · 2026-05-29T23:29:01.000+02:00
diff --git a/src/raven_python/io/excel.py b/src/raven_python/io/excel.py
@@ -0,0 +1,136 @@
+"""Export a model to the RAVEN Microsoft Excel format.
+
+Writes the five-sheet RAVEN xlsx layout — RXNS, METS, COMPS, GENES, MODEL — pulling
+RAVEN-specific values back out of cobra's ``annotation`` / ``notes`` (where the
+raven_python YAML reader stashes them). Excel *import* is intentionally not provided.
+
+Requires the optional ``openpyxl`` dependency (``pip install raven_python[excel]``).
+"""
+from __future__ import annotations
+
+from pathlib import Path
+
+import cobra
+
+
+def _miriam_string(annotation: dict, exclude: tuple[str, ...] = ()) -> str:
+    """RAVEN MIRIAM column: ``namespace/id;namespace/id2;...`` (sorted)."""
+    parts = []
+    for namespace in sorted(annotation):
+        if namespace in exclude:
+            continue
+        values = annotation[namespace]
+        if isinstance(values, str):
+            values = [values]
+        parts.extend(f"{namespace}/{value}" for value in values)
+    return ";".join(parts)
+
+
+def _equation(rxn: cobra.Reaction) -> str:
+    """Human-readable equation in RAVEN ``name[comp]`` form."""
+
+    def side(items):
+        return " + ".join(
+            f"{abs(coef):g} {met.name}[{met.compartment}]" for met, coef in items
+        )
+
+    reactants = [(m, c) for m, c in rxn.metabolites.items() if c < 0]
+    products = [(m, c) for m, c in rxn.metabolites.items() if c > 0]
+    arrow = " <=> " if rxn.reversibility else " => "
+    return f"{side(reactants)}{arrow}{side(products)}"
+
+
+def _ec_codes(rxn: cobra.Reaction) -> str:
+    codes = rxn.annotation.get("ec-code", [])
+    if isinstance(codes, str):
+        codes = [codes]
+    return ";".join(codes)
+
+
+def export_to_excel(
+    model: cobra.Model, path: str | Path, *, sort_ids: bool = False
+) -> None:
+    """Write ``model`` to a RAVEN-format ``.xlsx`` file.
+
+    Parameters
+    ----------
+    sort_ids
+        If True, write reactions/metabolites/genes sorted alphabetically by ID
+        (the model itself is not modified).
+    """
+    try:
+        from openpyxl import Workbook
+    except ImportError as exc:  # pragma: no cover - exercised only without openpyxl
+        raise ImportError(
+            "export_to_excel requires openpyxl. Install it with "
+            "`pip install raven_python[excel]` (or `pip install openpyxl`)."
+        ) from exc
+
+    reactions = sorted(model.reactions, key=lambda r: r.id) if sort_ids else list(model.reactions)
+    metabolites = (
+        sorted(model.metabolites, key=lambda m: m.id) if sort_ids else list(model.metabolites)
+    )
+    genes = sorted(model.genes, key=lambda g: g.id) if sort_ids else list(model.genes)
+    metadata = dict(model.notes.get("metaData", {})) if model.notes else {}
+
+    wb = Workbook()
+    wb.remove(wb.active)  # drop the default empty sheet
+
+    # --- RXNS ---
+    ws = wb.create_sheet("RXNS")
+    ws.append(
+        ["#", "ID", "NAME", "EQUATION", "EC-NUMBER", "GENE ASSOCIATION", "LOWER BOUND",
+         "UPPER BOUND", "OBJECTIVE", "COMPARTMENT", "MIRIAM", "SUBSYSTEM",
+         "REPLACEMENT ID", "NOTE", "REFERENCE", "CONFIDENCE SCORE"]
+    )
+    for r in reactions:
+        subsystem = r.subsystem
+        if isinstance(subsystem, (list, tuple)):
+            subsystem = ";".join(subsystem)
+        ws.append([
+            None, r.id, r.name, _equation(r), _ec_codes(r), r.gene_reaction_rule,
+            r.lower_bound, r.upper_bound,
+            r.objective_coefficient or None, None,
+            _miriam_string(r.annotation, exclude=("ec-code",)), subsystem, None,
+            r.notes.get("note"), r.notes.get("references"), r.notes.get("confidence_score"),
+        ])
+
+    # --- METS ---
+    ws = wb.create_sheet("METS")
+    ws.append(["#", "ID", "NAME", "UNCONSTRAINED", "MIRIAM", "COMPOSITION", "InChI",
+               "COMPARTMENT", "REPLACEMENT ID", "CHARGE"])
+    for m in metabolites:
+        inchi = m.notes.get("inchis")
+        ws.append([
+            None, f"{m.name}[{m.compartment}]", m.name, None,
+            _miriam_string(m.annotation, exclude=("smiles",)),
+            None if inchi else m.formula, inchi, m.compartment, m.id, m.charge,
+        ])
+
+    # --- COMPS ---
+    ws = wb.create_sheet("COMPS")
+    ws.append(["#", "ABBREVIATION", "NAME", "INSIDE", "MIRIAM"])
+    comps = sorted(model.compartments) if sort_ids else list(model.compartments)
+    for cid in comps:
+        ws.append([None, cid, model.compartments.get(cid, ""), None, None])
+
+    # --- GENES ---
+    if genes:
+        ws = wb.create_sheet("GENES")
+        ws.append(["#", "NAME", "MIRIAM", "SHORT NAME", "COMPARTMENT"])
+        for g in genes:
+            ws.append([None, g.id, _miriam_string(g.annotation), g.name, None])
+
+    # --- MODEL ---
+    ws = wb.create_sheet("MODEL")
+    ws.append(["#", "ID", "NAME", "TAXONOMY", "DEFAULT LOWER", "DEFAULT UPPER",
+               "CONTACT GIVEN NAME", "CONTACT FAMILY NAME", "CONTACT EMAIL",
+               "ORGANIZATION", "NOTES"])
+    ws.append([
+        None, model.id or "blankID", model.name or "blankName",
+        metadata.get("taxonomy"), metadata.get("defaultLB"), metadata.get("defaultUB"),
+        metadata.get("givenName"), metadata.get("familyName"), metadata.get("email"),
+        metadata.get("organization"), metadata.get("note"),
+    ])
+
+    wb.save(str(path))
diff --git a/src/raven_python/io/git.py b/src/raven_python/io/git.py
@@ -0,0 +1,106 @@
+"""Export a model into a Standard-GEM versioned-repository layout.
+
+Writes the model in several formats into the Standard-GEM folder structure (a
+``model/`` directory with one subfolder per format), ready to commit to a
+Git-maintained model repository (Metabolic Atlas / Human-GEM / yeast-GEM style),
+plus a ``dependencies.txt`` recording tool versions.
+
+Thin orchestration over the writers raven_python already exposes: ``write_yaml_model``,
+cobra's ``write_sbml_model`` and ``save_matlab_model``, ``export_to_excel``, plus a
+single-file reaction table (txt).
+"""
+from __future__ import annotations
+
+import importlib.metadata as _md
+import platform
+from collections.abc import Iterable
+from pathlib import Path
+
+import cobra
+
+from raven_python.io.excel import _equation, export_to_excel
+from raven_python.io.yaml import write_yaml_model
+from raven_python.utils.sort import sort_identifiers
+
+_ALL_FORMATS = ("yml", "xml", "mat", "xlsx", "txt")
+
+
+def _version(package: str) -> str:
+    try:
+        return _md.version(package)
+    except _md.PackageNotFoundError:
+        return "unknown"
+
+
+def _write_txt(model: cobra.Model, path: Path) -> None:
+    """Single-file, human-readable reaction table (RAVEN exportForGit txt)."""
+    with open(path, "w", encoding="utf-8") as fh:
+        fh.write("Rxn name\tFormula\tGene-reaction association\tLB\tUB\tObjective\n")
+        for r in model.reactions:
+            fh.write(
+                f"{r.id}\t{_equation(r)}\t{r.gene_reaction_rule}\t"
+                f"{r.lower_bound:g}\t{r.upper_bound:g}\t{r.objective_coefficient:g}\n"
+            )
+
+
+def export_for_git(
+    model: cobra.Model,
+    path: str | Path = ".",
+    *,
+    prefix: str = "model",
+    formats: Iterable[str] = ("yml", "xml", "mat", "xlsx"),
+    sub_dirs: bool = True,
+) -> Path:
+    """Write ``model`` into a Standard-GEM repository layout.
+
+    Parameters
+    ----------
+    path
+        Directory to populate.
+    prefix
+        Base filename for every format (default ``"model"``).
+    formats
+        Which formats to write; any of ``"yml"``, ``"xml"``, ``"mat"``,
+        ``"xlsx"``, ``"txt"`` (default ``yml``/``xml``/``mat``/``xlsx``).
+    sub_dirs
+        If True (default), write ``model/<fmt>/<prefix>.<fmt>`` (standard-GEM
+        layout); otherwise all files go directly in ``path``.
+
+    Returns
+    -------
+    pathlib.Path
+        The root directory written to.
+    """
+    formats = list(formats)
+    unknown = set(formats) - set(_ALL_FORMATS)
+    if unknown:
+        raise ValueError(f"Unknown format(s): {sorted(unknown)}; allowed: {_ALL_FORMATS}")
+
+    # Sort a copy so the caller's model is untouched.
+    model = sort_identifiers(model.copy())
+
+    root = Path(path) / "model" if sub_dirs else Path(path)
+    root.mkdir(parents=True, exist_ok=True)
+
+    def target(fmt: str) -> Path:
+        folder = root / fmt if sub_dirs else root
+        folder.mkdir(parents=True, exist_ok=True)
+        return folder / f"{prefix}.{fmt}"
+
+    if "yml" in formats:
+        write_yaml_model(model, target("yml"))
+    if "xml" in formats:
+        cobra.io.write_sbml_model(model, str(target("xml")))
+    if "mat" in formats:
+        cobra.io.save_matlab_model(model, str(target("mat")))
+    if "xlsx" in formats:
+        export_to_excel(model, target("xlsx"))
+    if "txt" in formats:
+        _write_txt(model, target("txt"))
+
+    with open(root / "dependencies.txt", "w", encoding="utf-8") as fh:
+        fh.write(f"python\t{platform.python_version()}\n")
+        fh.write(f"cobra\t{_version('cobra')}\n")
+        fh.write(f"raven_python\t{_version('raven_python')}\n")
+
+    return root
diff --git a/tests/test_io_excel.py b/tests/test_io_excel.py
@@ -0,0 +1,111 @@
+"""Tests for raven_python.io.excel (exportToExcelFormat port, export only)."""
+import cobra
+import pytest
+
+openpyxl = pytest.importorskip("openpyxl")
+
+from raven_python.io import export_to_excel
+from raven_python.manipulation import add_reactions_from_equations
+
+
+@pytest.fixture
+def model():
+    m = cobra.Model("yeastGEM")
+    m.name = "Yeast"
+    m.compartments = {"c": "cytoplasm"}
+    m.notes["metaData"] = {"taxonomy": "taxonomy/559292", "defaultLB": "-1000"}
+    m.add_metabolites(
+        [
+            cobra.Metabolite("atp_c", name="ATP", formula="C10H16N5O13P3", charge=-4, compartment="c"),
+            cobra.Metabolite("adp_c", name="ADP", compartment="c"),
+        ]
+    )
+    m.metabolites.atp_c.annotation = {"kegg.compound": ["C00002"], "smiles": ["C1=NC"]}
+    m.metabolites.atp_c.notes = {"inchis": "InChI=1S/X"}
+    add_reactions_from_equations(
+        m,
+        [{"id": "R1", "equation": "atp_c <=> adp_c", "name": "rxn one",
+          "gene_reaction_rule": "G1", "subsystem": "glycolysis"}],
+    )
+    r = m.reactions.R1
+    r.annotation = {"ec-code": ["1.1.1.1"], "kegg.reaction": ["R00001"]}
+    r.notes = {"confidence_score": 2, "note": "a note", "references": "PMID:1"}
+    r.objective_coefficient = 1
+    return m
+
+
+def _wb(path):
+    return openpyxl.load_workbook(path)
+
+
+def test_sheets_present(model, tmp_path):
+    out = tmp_path / "m.xlsx"
+    export_to_excel(model, out)
+    wb = _wb(out)
+    assert set(wb.sheetnames) == {"RXNS", "METS", "COMPS", "GENES", "MODEL"}
+
+
+def test_rxns_sheet(model, tmp_path):
+    out = tmp_path / "m.xlsx"
+    export_to_excel(model, out)
+    ws = _wb(out)["RXNS"]
+    header = [c.value for c in ws[1]]
+    row = {header[i]: c.value for i, c in enumerate(ws[2])}
+    assert row["ID"] == "R1"
+    assert row["NAME"] == "rxn one"
+    assert "ATP[c]" in row["EQUATION"] and "<=>" in row["EQUATION"]
+    assert row["EC-NUMBER"] == "1.1.1.1"
+    assert row["GENE ASSOCIATION"] == "G1"
+    assert row["SUBSYSTEM"] == "glycolysis"
+    assert row["OBJECTIVE"] == 1
+    assert row["CONFIDENCE SCORE"] == 2
+    assert row["NOTE"] == "a note"
+    assert row["MIRIAM"] == "kegg.reaction/R00001"  # ec-code excluded (own column)
+
+
+def test_mets_sheet(model, tmp_path):
+    out = tmp_path / "m.xlsx"
+    export_to_excel(model, out)
+    ws = _wb(out)["METS"]
+    header = [c.value for c in ws[1]]
+    rows = {
+        r[header.index("REPLACEMENT ID")].value: {header[i]: c.value for i, c in enumerate(r)}
+        for r in ws.iter_rows(min_row=2)
+    }
+    atp = rows["atp_c"]
+    assert atp["ID"] == "ATP[c]"
+    assert atp["NAME"] == "ATP"
+    assert atp["InChI"] == "InChI=1S/X"
+    assert atp["COMPOSITION"] is None  # suppressed when InChI present
+    assert atp["CHARGE"] == -4
+    assert atp["MIRIAM"] == "kegg.compound/C00002"  # smiles excluded
+
+
+def test_model_sheet(model, tmp_path):
+    out = tmp_path / "m.xlsx"
+    export_to_excel(model, out)
+    ws = _wb(out)["MODEL"]
+    header = [c.value for c in ws[1]]
+    row = {header[i]: c.value for i, c in enumerate(ws[2])}
+    assert row["ID"] == "yeastGEM"
+    assert row["NAME"] == "Yeast"
+    assert row["TAXONOMY"] == "taxonomy/559292"
+    assert row["DEFAULT LOWER"] == "-1000"
+
+
+def test_genes_sheet(model, tmp_path):
+    out = tmp_path / "m.xlsx"
+    export_to_excel(model, out)
+    ws = _wb(out)["GENES"]
+    header = [c.value for c in ws[1]]
+    row = {header[i]: c.value for i, c in enumerate(ws[2])}
+    assert row["NAME"] == "G1"
+
+
+def test_no_genes_skips_sheet(tmp_path):
+    m = cobra.Model("t")
+    m.add_metabolites([cobra.Metabolite("a_c", compartment="c")])
+    add_reactions_from_equations(m, [{"id": "R1", "equation": "a_c -->"}])
+    out = tmp_path / "m.xlsx"
+    export_to_excel(m, out)
+    assert "GENES" not in _wb(out).sheetnames
diff --git a/tests/test_io_git.py b/tests/test_io_git.py