Skip to content

Commit cf06afe

Browse files
committed
Add readYAML and applyCondition for shared yeast-GEM use
Two generic helpers extracted from yeast-GEM's MATLAB port (see the yeast-GEM/code/python/PORTING_PLAN.md and UPSTREAM_CANDIDATES.md documents for the broader migration). Both are organism-agnostic and useful to any GEM project that wants to keep configuration / condition presets as data rather than as code. io/readYAML.m Read an arbitrary YAML document into a MATLAB struct / cell tree. Complements readYAMLmodel, which is specialised for the cobra model schema; readYAML is for free-form configuration files. Delegates to py.yaml.safe_load via MATLAB's Python bridge, with a recursive py.dict / py.list -> struct / cell converter and a matlabFieldName sanitiser for non-alphanumeric YAML keys. core/applyCondition.m Apply a deterministic "condition" to a model. The schema is intentionally narrow (prelude.reset_exchanges, cofactor pseudo- reaction metabolite removals + charge rebalance, biomass stoichiometry delta, per-reaction bounds diff, expected_uptake_count sanity check). The function accepts either a YAML file path or a pre-parsed struct. Project-specific extensions (e.g. yeast-GEM's amino_acid_ratio step that rewrites a protein pseudoreaction from a side-car TSV) are handled by the *caller* before / after this function — the upstream contract is intentionally kept narrow.
1 parent a8b45f5 commit cf06afe

2 files changed

Lines changed: 221 additions & 0 deletions

File tree

core/applyCondition.m

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
function model = applyCondition(model, condition)
2+
% applyCondition
3+
% Apply a deterministic "condition" to a model: a prelude that resets
4+
% exchange bounds, optional metabolite removals + automatic charge
5+
% rebalancing of a pseudoreaction, optional biomass-stoichiometry
6+
% delta, and a per-reaction bounds diff. The schema is intentionally
7+
% narrow so a condition can be reviewed as data.
8+
%
9+
% Yeast-GEM was the first consumer; the same schema works for any
10+
% GEM that keeps its condition presets as data rather than as code.
11+
% Project-specific extensions (e.g. yeast-GEM's amino_acid_ratio
12+
% step that rewrites a protein pseudoreaction's stoichiometry from a
13+
% side-car TSV) are handled by the *caller* before / after this
14+
% function — kept upstream-narrow on purpose.
15+
%
16+
% Inputs:
17+
% model RAVEN model struct.
18+
% condition Either a path to a YAML condition file or a struct
19+
% already produced by readYAML. The expected schema
20+
% (all keys optional):
21+
%
22+
% prelude:
23+
% reset_exchanges: out % truthy -> reset all
24+
%
25+
% cofactor_pseudoreaction:
26+
% rxn_id: r_4598
27+
% remove_mets:
28+
% - { met: s_3714 }
29+
% charge_balance_met: s_0794
30+
%
31+
% biomass_stoichiometry_delta:
32+
% rxn_id: r_4041
33+
% add:
34+
% - { met: s_0689, coef: 0.08 }
35+
% - { met: s_0687, coef: -0.08 }
36+
% - { met: s_0794, coef: -0.16 }
37+
%
38+
% bounds:
39+
% - { rxn: r_1654, lb: -1000 }
40+
% - { rxn: r_1992, lb: 0 }
41+
% - { rxn: r_1663, lb: 0, ub: 0 }
42+
%
43+
% expected_uptake_count: 15
44+
%
45+
% Output:
46+
% model Modified model.
47+
%
48+
% Usage: model = applyCondition(model, 'data/conditions/anaerobic.yml')
49+
% model = applyCondition(model, readYAML('data/conditions/anaerobic.yml'))
50+
51+
if ischar(condition) || isstring(condition)
52+
cond = readYAML(char(condition));
53+
elseif isstruct(condition)
54+
cond = condition;
55+
else
56+
error('applyCondition:invalidCondition', ...
57+
'condition must be a YAML file path or a struct.');
58+
end
59+
60+
% --- Step 1: prelude ---------------------------------------------------
61+
if isfield(cond, 'prelude') && isfield(cond.prelude, 'reset_exchanges')
62+
[~, exchangeRxns] = getExchangeRxns(model, cond.prelude.reset_exchanges);
63+
model.lb(exchangeRxns) = 0;
64+
model.ub(exchangeRxns) = 1000;
65+
end
66+
67+
% --- Step 2: cofactor pseudoreaction edits ----------------------------
68+
if isfield(cond, 'cofactor_pseudoreaction')
69+
cp = cond.cofactor_pseudoreaction;
70+
cofacIdx = getIndexes(model, cp.rxn_id, 'rxns');
71+
if isfield(cp, 'remove_mets')
72+
for i = 1:numel(cp.remove_mets)
73+
metIdx = getIndexes(model, cp.remove_mets{i}.met, 'mets');
74+
model.S(metIdx, cofacIdx) = 0;
75+
end
76+
end
77+
if isfield(cp, 'charge_balance_met')
78+
balanceIdx = find(strcmp(model.mets, cp.charge_balance_met));
79+
model.S(balanceIdx, cofacIdx) = 0;
80+
model.S(balanceIdx, cofacIdx) = ...
81+
-sum(model.S(:, cofacIdx) .* model.metCharges, 'omitnan');
82+
end
83+
end
84+
85+
% --- Step 3: biomass stoichiometry delta ------------------------------
86+
if isfield(cond, 'biomass_stoichiometry_delta')
87+
delta = cond.biomass_stoichiometry_delta;
88+
bioIdx = getIndexes(model, delta.rxn_id, 'rxns');
89+
if isfield(delta, 'add')
90+
for i = 1:numel(delta.add)
91+
entry = delta.add{i};
92+
metIdx = getIndexes(model, entry.met, 'mets');
93+
model.S(metIdx, bioIdx) = full(model.S(metIdx, bioIdx)) + entry.coef;
94+
end
95+
end
96+
end
97+
98+
% --- Step 4: bounds ---------------------------------------------------
99+
nUptake = 0;
100+
if isfield(cond, 'bounds')
101+
for i = 1:numel(cond.bounds)
102+
b = cond.bounds{i};
103+
rxnIdx = find(strcmp(model.rxns, b.rxn));
104+
if isempty(rxnIdx)
105+
warning('applyCondition:missingRxn', ...
106+
'Reaction %s not found in model; skipping.', b.rxn);
107+
continue;
108+
end
109+
if isfield(b, 'lb')
110+
model.lb(rxnIdx) = b.lb;
111+
if b.lb == -1000
112+
nUptake = nUptake + 1;
113+
end
114+
end
115+
if isfield(b, 'ub')
116+
model.ub(rxnIdx) = b.ub;
117+
end
118+
end
119+
end
120+
121+
% --- Step 5: uptake sanity check --------------------------------------
122+
if isfield(cond, 'expected_uptake_count')
123+
if nUptake ~= cond.expected_uptake_count
124+
warning('applyCondition:uptakeCountMismatch', ...
125+
'Expected %d uptake reactions, applied %d. Some may be missing from the model.', ...
126+
cond.expected_uptake_count, nUptake);
127+
end
128+
end
129+
130+
end

io/readYAML.m

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
function out = readYAML(filename)
2+
% readYAML
3+
% Read an arbitrary YAML file into a MATLAB struct / cell tree.
4+
%
5+
% Use this for parsing arbitrary YAML configuration / data files
6+
% (e.g. yeast-GEM's data/conditions/*.yml). For loading a cobra-format
7+
% model YAML, use readYAMLmodel instead — that function knows the
8+
% model schema and returns a populated RAVEN model struct.
9+
%
10+
% Implementation: delegates to Python's yaml.safe_load, then
11+
% recursively converts the py.dict / py.list tree to native MATLAB
12+
% struct / cell. Requires a working MATLAB-Python bridge and the
13+
% pyyaml package in the linked Python environment:
14+
%
15+
% pip install pyyaml % from the MATLAB-linked Python env
16+
%
17+
% Input:
18+
% filename path to the YAML file.
19+
%
20+
% Output:
21+
% out MATLAB representation of the document:
22+
% py.dict -> struct
23+
% py.list -> cell column vector
24+
% py.str -> char
25+
% py.int -> double
26+
% py.float -> double
27+
% py.bool -> logical
28+
% py.None -> []
29+
%
30+
% Usage: cfg = readYAML('data/conditions/anaerobic.yml')
31+
32+
if ~isfile(filename)
33+
error('readYAML:fileNotFound', 'File not found: %s', filename);
34+
end
35+
36+
try
37+
py.importlib.import_module('yaml');
38+
catch ME
39+
error('readYAML:pyyamlMissing', ...
40+
['pyyaml is required to read arbitrary YAML files. Install it ' ...
41+
'in your MATLAB-linked Python environment (`pip install pyyaml`).' ...
42+
'\nUnderlying error: %s'], ME.message);
43+
end
44+
45+
f = py.builtins.open(filename, 'r');
46+
cleanup = onCleanup(@() f.close());
47+
data = py.yaml.safe_load(f);
48+
49+
out = pyToMatlab(data);
50+
end
51+
52+
53+
function v = pyToMatlab(obj)
54+
% Recursively convert pyyaml-loaded Python objects into MATLAB types.
55+
if isa(obj, 'py.NoneType')
56+
v = [];
57+
elseif isa(obj, 'py.bool')
58+
v = logical(obj);
59+
elseif isa(obj, 'py.int') || isa(obj, 'py.float')
60+
v = double(obj);
61+
elseif isa(obj, 'py.str')
62+
v = char(obj);
63+
elseif isa(obj, 'py.dict')
64+
v = struct();
65+
keys = cell(py.list(obj.keys()));
66+
vals = cell(py.list(obj.values()));
67+
for i = 1:numel(keys)
68+
v.(matlabFieldName(char(keys{i}))) = pyToMatlab(vals{i});
69+
end
70+
elseif isa(obj, 'py.list') || isa(obj, 'py.tuple')
71+
cells = cell(obj);
72+
v = cell(numel(cells), 1);
73+
for i = 1:numel(cells)
74+
v{i} = pyToMatlab(cells{i});
75+
end
76+
else
77+
% Fallback: best-effort
78+
v = obj;
79+
end
80+
end
81+
82+
83+
function name = matlabFieldName(key)
84+
% Sanitise a YAML key into a valid MATLAB field name. Replaces non-
85+
% alphanumeric characters with underscores; prefixes a digit-starting
86+
% key with 'f_'.
87+
name = regexprep(key, '[^A-Za-z0-9_]', '_');
88+
if isempty(name) || ~isstrprop(name(1), 'alpha')
89+
name = ['f_' name];
90+
end
91+
end

0 commit comments

Comments
 (0)