Skip to content

Commit 7a9b69a

Browse files
committed
Add Excel export and the Standard-GEM git-layout export
1 parent 6ef3357 commit 7a9b69a

4 files changed

Lines changed: 422 additions & 0 deletions

File tree

src/raven_python/io/excel.py

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
"""Export a model to the RAVEN Microsoft Excel format.
2+
3+
Writes the five-sheet RAVEN xlsx layout — RXNS, METS, COMPS, GENES, MODEL — pulling
4+
RAVEN-specific values back out of cobra's ``annotation`` / ``notes`` (where the
5+
raven_python YAML reader stashes them). Excel *import* is intentionally not provided.
6+
7+
Requires the optional ``openpyxl`` dependency (``pip install raven_python[excel]``).
8+
"""
9+
from __future__ import annotations
10+
11+
from pathlib import Path
12+
13+
import cobra
14+
15+
16+
def _miriam_string(annotation: dict, exclude: tuple[str, ...] = ()) -> str:
17+
"""RAVEN MIRIAM column: ``namespace/id;namespace/id2;...`` (sorted)."""
18+
parts = []
19+
for namespace in sorted(annotation):
20+
if namespace in exclude:
21+
continue
22+
values = annotation[namespace]
23+
if isinstance(values, str):
24+
values = [values]
25+
parts.extend(f"{namespace}/{value}" for value in values)
26+
return ";".join(parts)
27+
28+
29+
def _equation(rxn: cobra.Reaction) -> str:
30+
"""Human-readable equation in RAVEN ``name[comp]`` form."""
31+
32+
def side(items):
33+
return " + ".join(
34+
f"{abs(coef):g} {met.name}[{met.compartment}]" for met, coef in items
35+
)
36+
37+
reactants = [(m, c) for m, c in rxn.metabolites.items() if c < 0]
38+
products = [(m, c) for m, c in rxn.metabolites.items() if c > 0]
39+
arrow = " <=> " if rxn.reversibility else " => "
40+
return f"{side(reactants)}{arrow}{side(products)}"
41+
42+
43+
def _ec_codes(rxn: cobra.Reaction) -> str:
44+
codes = rxn.annotation.get("ec-code", [])
45+
if isinstance(codes, str):
46+
codes = [codes]
47+
return ";".join(codes)
48+
49+
50+
def export_to_excel(
51+
model: cobra.Model, path: str | Path, *, sort_ids: bool = False
52+
) -> None:
53+
"""Write ``model`` to a RAVEN-format ``.xlsx`` file.
54+
55+
Parameters
56+
----------
57+
sort_ids
58+
If True, write reactions/metabolites/genes sorted alphabetically by ID
59+
(the model itself is not modified).
60+
"""
61+
try:
62+
from openpyxl import Workbook
63+
except ImportError as exc: # pragma: no cover - exercised only without openpyxl
64+
raise ImportError(
65+
"export_to_excel requires openpyxl. Install it with "
66+
"`pip install raven_python[excel]` (or `pip install openpyxl`)."
67+
) from exc
68+
69+
reactions = sorted(model.reactions, key=lambda r: r.id) if sort_ids else list(model.reactions)
70+
metabolites = (
71+
sorted(model.metabolites, key=lambda m: m.id) if sort_ids else list(model.metabolites)
72+
)
73+
genes = sorted(model.genes, key=lambda g: g.id) if sort_ids else list(model.genes)
74+
metadata = dict(model.notes.get("metaData", {})) if model.notes else {}
75+
76+
wb = Workbook()
77+
wb.remove(wb.active) # drop the default empty sheet
78+
79+
# --- RXNS ---
80+
ws = wb.create_sheet("RXNS")
81+
ws.append(
82+
["#", "ID", "NAME", "EQUATION", "EC-NUMBER", "GENE ASSOCIATION", "LOWER BOUND",
83+
"UPPER BOUND", "OBJECTIVE", "COMPARTMENT", "MIRIAM", "SUBSYSTEM",
84+
"REPLACEMENT ID", "NOTE", "REFERENCE", "CONFIDENCE SCORE"]
85+
)
86+
for r in reactions:
87+
subsystem = r.subsystem
88+
if isinstance(subsystem, (list, tuple)):
89+
subsystem = ";".join(subsystem)
90+
ws.append([
91+
None, r.id, r.name, _equation(r), _ec_codes(r), r.gene_reaction_rule,
92+
r.lower_bound, r.upper_bound,
93+
r.objective_coefficient or None, None,
94+
_miriam_string(r.annotation, exclude=("ec-code",)), subsystem, None,
95+
r.notes.get("note"), r.notes.get("references"), r.notes.get("confidence_score"),
96+
])
97+
98+
# --- METS ---
99+
ws = wb.create_sheet("METS")
100+
ws.append(["#", "ID", "NAME", "UNCONSTRAINED", "MIRIAM", "COMPOSITION", "InChI",
101+
"COMPARTMENT", "REPLACEMENT ID", "CHARGE"])
102+
for m in metabolites:
103+
inchi = m.notes.get("inchis")
104+
ws.append([
105+
None, f"{m.name}[{m.compartment}]", m.name, None,
106+
_miriam_string(m.annotation, exclude=("smiles",)),
107+
None if inchi else m.formula, inchi, m.compartment, m.id, m.charge,
108+
])
109+
110+
# --- COMPS ---
111+
ws = wb.create_sheet("COMPS")
112+
ws.append(["#", "ABBREVIATION", "NAME", "INSIDE", "MIRIAM"])
113+
comps = sorted(model.compartments) if sort_ids else list(model.compartments)
114+
for cid in comps:
115+
ws.append([None, cid, model.compartments.get(cid, ""), None, None])
116+
117+
# --- GENES ---
118+
if genes:
119+
ws = wb.create_sheet("GENES")
120+
ws.append(["#", "NAME", "MIRIAM", "SHORT NAME", "COMPARTMENT"])
121+
for g in genes:
122+
ws.append([None, g.id, _miriam_string(g.annotation), g.name, None])
123+
124+
# --- MODEL ---
125+
ws = wb.create_sheet("MODEL")
126+
ws.append(["#", "ID", "NAME", "TAXONOMY", "DEFAULT LOWER", "DEFAULT UPPER",
127+
"CONTACT GIVEN NAME", "CONTACT FAMILY NAME", "CONTACT EMAIL",
128+
"ORGANIZATION", "NOTES"])
129+
ws.append([
130+
None, model.id or "blankID", model.name or "blankName",
131+
metadata.get("taxonomy"), metadata.get("defaultLB"), metadata.get("defaultUB"),
132+
metadata.get("givenName"), metadata.get("familyName"), metadata.get("email"),
133+
metadata.get("organization"), metadata.get("note"),
134+
])
135+
136+
wb.save(str(path))

src/raven_python/io/git.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
"""Export a model into a Standard-GEM versioned-repository layout.
2+
3+
Writes the model in several formats into the Standard-GEM folder structure (a
4+
``model/`` directory with one subfolder per format), ready to commit to a
5+
Git-maintained model repository (Metabolic Atlas / Human-GEM / yeast-GEM style),
6+
plus a ``dependencies.txt`` recording tool versions.
7+
8+
Thin orchestration over the writers raven_python already exposes: ``write_yaml_model``,
9+
cobra's ``write_sbml_model`` and ``save_matlab_model``, ``export_to_excel``, plus a
10+
single-file reaction table (txt).
11+
"""
12+
from __future__ import annotations
13+
14+
import importlib.metadata as _md
15+
import platform
16+
from collections.abc import Iterable
17+
from pathlib import Path
18+
19+
import cobra
20+
21+
from raven_python.io.excel import _equation, export_to_excel
22+
from raven_python.io.yaml import write_yaml_model
23+
from raven_python.utils.sort import sort_identifiers
24+
25+
_ALL_FORMATS = ("yml", "xml", "mat", "xlsx", "txt")
26+
27+
28+
def _version(package: str) -> str:
29+
try:
30+
return _md.version(package)
31+
except _md.PackageNotFoundError:
32+
return "unknown"
33+
34+
35+
def _write_txt(model: cobra.Model, path: Path) -> None:
36+
"""Single-file, human-readable reaction table (RAVEN exportForGit txt)."""
37+
with open(path, "w", encoding="utf-8") as fh:
38+
fh.write("Rxn name\tFormula\tGene-reaction association\tLB\tUB\tObjective\n")
39+
for r in model.reactions:
40+
fh.write(
41+
f"{r.id}\t{_equation(r)}\t{r.gene_reaction_rule}\t"
42+
f"{r.lower_bound:g}\t{r.upper_bound:g}\t{r.objective_coefficient:g}\n"
43+
)
44+
45+
46+
def export_for_git(
47+
model: cobra.Model,
48+
path: str | Path = ".",
49+
*,
50+
prefix: str = "model",
51+
formats: Iterable[str] = ("yml", "xml", "mat", "xlsx"),
52+
sub_dirs: bool = True,
53+
) -> Path:
54+
"""Write ``model`` into a Standard-GEM repository layout.
55+
56+
Parameters
57+
----------
58+
path
59+
Directory to populate.
60+
prefix
61+
Base filename for every format (default ``"model"``).
62+
formats
63+
Which formats to write; any of ``"yml"``, ``"xml"``, ``"mat"``,
64+
``"xlsx"``, ``"txt"`` (default ``yml``/``xml``/``mat``/``xlsx``).
65+
sub_dirs
66+
If True (default), write ``model/<fmt>/<prefix>.<fmt>`` (standard-GEM
67+
layout); otherwise all files go directly in ``path``.
68+
69+
Returns
70+
-------
71+
pathlib.Path
72+
The root directory written to.
73+
"""
74+
formats = list(formats)
75+
unknown = set(formats) - set(_ALL_FORMATS)
76+
if unknown:
77+
raise ValueError(f"Unknown format(s): {sorted(unknown)}; allowed: {_ALL_FORMATS}")
78+
79+
# Sort a copy so the caller's model is untouched.
80+
model = sort_identifiers(model.copy())
81+
82+
root = Path(path) / "model" if sub_dirs else Path(path)
83+
root.mkdir(parents=True, exist_ok=True)
84+
85+
def target(fmt: str) -> Path:
86+
folder = root / fmt if sub_dirs else root
87+
folder.mkdir(parents=True, exist_ok=True)
88+
return folder / f"{prefix}.{fmt}"
89+
90+
if "yml" in formats:
91+
write_yaml_model(model, target("yml"))
92+
if "xml" in formats:
93+
cobra.io.write_sbml_model(model, str(target("xml")))
94+
if "mat" in formats:
95+
cobra.io.save_matlab_model(model, str(target("mat")))
96+
if "xlsx" in formats:
97+
export_to_excel(model, target("xlsx"))
98+
if "txt" in formats:
99+
_write_txt(model, target("txt"))
100+
101+
with open(root / "dependencies.txt", "w", encoding="utf-8") as fh:
102+
fh.write(f"python\t{platform.python_version()}\n")
103+
fh.write(f"cobra\t{_version('cobra')}\n")
104+
fh.write(f"raven_python\t{_version('raven_python')}\n")
105+
106+
return root

tests/test_io_excel.py

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
"""Tests for raven_python.io.excel (exportToExcelFormat port, export only)."""
2+
import cobra
3+
import pytest
4+
5+
openpyxl = pytest.importorskip("openpyxl")
6+
7+
from raven_python.io import export_to_excel
8+
from raven_python.manipulation import add_reactions_from_equations
9+
10+
11+
@pytest.fixture
12+
def model():
13+
m = cobra.Model("yeastGEM")
14+
m.name = "Yeast"
15+
m.compartments = {"c": "cytoplasm"}
16+
m.notes["metaData"] = {"taxonomy": "taxonomy/559292", "defaultLB": "-1000"}
17+
m.add_metabolites(
18+
[
19+
cobra.Metabolite("atp_c", name="ATP", formula="C10H16N5O13P3", charge=-4, compartment="c"),
20+
cobra.Metabolite("adp_c", name="ADP", compartment="c"),
21+
]
22+
)
23+
m.metabolites.atp_c.annotation = {"kegg.compound": ["C00002"], "smiles": ["C1=NC"]}
24+
m.metabolites.atp_c.notes = {"inchis": "InChI=1S/X"}
25+
add_reactions_from_equations(
26+
m,
27+
[{"id": "R1", "equation": "atp_c <=> adp_c", "name": "rxn one",
28+
"gene_reaction_rule": "G1", "subsystem": "glycolysis"}],
29+
)
30+
r = m.reactions.R1
31+
r.annotation = {"ec-code": ["1.1.1.1"], "kegg.reaction": ["R00001"]}
32+
r.notes = {"confidence_score": 2, "note": "a note", "references": "PMID:1"}
33+
r.objective_coefficient = 1
34+
return m
35+
36+
37+
def _wb(path):
38+
return openpyxl.load_workbook(path)
39+
40+
41+
def test_sheets_present(model, tmp_path):
42+
out = tmp_path / "m.xlsx"
43+
export_to_excel(model, out)
44+
wb = _wb(out)
45+
assert set(wb.sheetnames) == {"RXNS", "METS", "COMPS", "GENES", "MODEL"}
46+
47+
48+
def test_rxns_sheet(model, tmp_path):
49+
out = tmp_path / "m.xlsx"
50+
export_to_excel(model, out)
51+
ws = _wb(out)["RXNS"]
52+
header = [c.value for c in ws[1]]
53+
row = {header[i]: c.value for i, c in enumerate(ws[2])}
54+
assert row["ID"] == "R1"
55+
assert row["NAME"] == "rxn one"
56+
assert "ATP[c]" in row["EQUATION"] and "<=>" in row["EQUATION"]
57+
assert row["EC-NUMBER"] == "1.1.1.1"
58+
assert row["GENE ASSOCIATION"] == "G1"
59+
assert row["SUBSYSTEM"] == "glycolysis"
60+
assert row["OBJECTIVE"] == 1
61+
assert row["CONFIDENCE SCORE"] == 2
62+
assert row["NOTE"] == "a note"
63+
assert row["MIRIAM"] == "kegg.reaction/R00001" # ec-code excluded (own column)
64+
65+
66+
def test_mets_sheet(model, tmp_path):
67+
out = tmp_path / "m.xlsx"
68+
export_to_excel(model, out)
69+
ws = _wb(out)["METS"]
70+
header = [c.value for c in ws[1]]
71+
rows = {
72+
r[header.index("REPLACEMENT ID")].value: {header[i]: c.value for i, c in enumerate(r)}
73+
for r in ws.iter_rows(min_row=2)
74+
}
75+
atp = rows["atp_c"]
76+
assert atp["ID"] == "ATP[c]"
77+
assert atp["NAME"] == "ATP"
78+
assert atp["InChI"] == "InChI=1S/X"
79+
assert atp["COMPOSITION"] is None # suppressed when InChI present
80+
assert atp["CHARGE"] == -4
81+
assert atp["MIRIAM"] == "kegg.compound/C00002" # smiles excluded
82+
83+
84+
def test_model_sheet(model, tmp_path):
85+
out = tmp_path / "m.xlsx"
86+
export_to_excel(model, out)
87+
ws = _wb(out)["MODEL"]
88+
header = [c.value for c in ws[1]]
89+
row = {header[i]: c.value for i, c in enumerate(ws[2])}
90+
assert row["ID"] == "yeastGEM"
91+
assert row["NAME"] == "Yeast"
92+
assert row["TAXONOMY"] == "taxonomy/559292"
93+
assert row["DEFAULT LOWER"] == "-1000"
94+
95+
96+
def test_genes_sheet(model, tmp_path):
97+
out = tmp_path / "m.xlsx"
98+
export_to_excel(model, out)
99+
ws = _wb(out)["GENES"]
100+
header = [c.value for c in ws[1]]
101+
row = {header[i]: c.value for i, c in enumerate(ws[2])}
102+
assert row["NAME"] == "G1"
103+
104+
105+
def test_no_genes_skips_sheet(tmp_path):
106+
m = cobra.Model("t")
107+
m.add_metabolites([cobra.Metabolite("a_c", compartment="c")])
108+
add_reactions_from_equations(m, [{"id": "R1", "equation": "a_c -->"}])
109+
out = tmp_path / "m.xlsx"
110+
export_to_excel(m, out)
111+
assert "GENES" not in _wb(out).sheetnames

0 commit comments

Comments
 (0)