Skip to content

Commit 114e1f0

Browse files
committed
Preserve species referenced in equations during filtering
Updates BaseThinLayer to retain species that are referenced in ODE, assignment, initial assignment, and kinetic law equations, even if they are not part of reactions or defined as ODEs. Adds tests to verify that such species are not removed during optimization.
1 parent d47d2a5 commit 114e1f0

2 files changed

Lines changed: 233 additions & 6 deletions

File tree

pyenzyme/thinlayers/base.py

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
from abc import ABC, abstractmethod
22
from functools import cached_property
3-
from typing import Dict, List, Optional, Tuple, TypeAlias
3+
from typing import Dict, List, Optional, Set, Tuple, TypeAlias
44

55
import pandas as pd
6+
from sympy import Symbol, sympify
67

78
import pyenzyme as pe
89
from pyenzyme.versions import v2
@@ -79,7 +80,9 @@ def _remove_unmodeled_species(enzmldoc: v2.EnzymeMLDocument) -> v2.EnzymeMLDocum
7980
enzmldoc = enzmldoc.model_copy(deep=True)
8081

8182
# Collect all species that are explicitly modeled
83+
all_species = BaseThinLayer._get_all_species(enzmldoc)
8284
modeled_species = set()
85+
equations = []
8386

8487
# Add species from reactions (reactants and products)
8588
for reaction in enzmldoc.reactions:
@@ -88,11 +91,24 @@ def _remove_unmodeled_species(enzmldoc: v2.EnzymeMLDocument) -> v2.EnzymeMLDocum
8891
)
8992
modeled_species.update(product.species_id for product in reaction.products)
9093

94+
if reaction.kinetic_law:
95+
equations.append(sympify(reaction.kinetic_law.equation))
96+
9197
# Add species from ODE equations
98+
for equation in enzmldoc.equations:
99+
if equation.equation_type == v2.EquationType.ODE:
100+
equations.append(sympify(equation.equation))
101+
modeled_species.add(equation.species_id)
102+
elif (
103+
equation.equation_type == v2.EquationType.ASSIGNMENT
104+
or equation.equation_type == v2.EquationType.INITIAL_ASSIGNMENT
105+
):
106+
equations.append(sympify(equation.equation))
107+
108+
# Find species referenced in equations
109+
equation_symbols = {symbol for eq in equations for symbol in eq.free_symbols}
92110
modeled_species.update(
93-
equation.species_id
94-
for equation in enzmldoc.equations
95-
if equation.equation_type == v2.EquationType.ODE
111+
species for species in all_species if Symbol(species) in equation_symbols
96112
)
97113

98114
if not modeled_species:
@@ -132,6 +148,18 @@ def _remove_unmodeled_species(enzmldoc: v2.EnzymeMLDocument) -> v2.EnzymeMLDocum
132148

133149
return enzmldoc
134150

151+
@staticmethod
152+
def _get_all_species(enzmldoc: v2.EnzymeMLDocument) -> Set[str]:
153+
"""
154+
Gets all species from the EnzymeML document.
155+
"""
156+
return set(
157+
species.id
158+
for species in enzmldoc.small_molecules
159+
+ enzmldoc.proteins
160+
+ enzmldoc.complexes
161+
)
162+
135163
@abstractmethod
136164
def integrate(
137165
self,

tests/unit/test_thinlayer.py

Lines changed: 201 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from pyenzyme.thinlayers.base import BaseThinLayer
2-
from pyenzyme.versions.v2 import EnzymeMLDocument, EquationType
2+
from pyenzyme.versions.v2 import EnzymeMLDocument, Equation, EquationType
33

44
# Mock data for creating test species measurements
55
MOCK_DATA = {
@@ -96,6 +96,205 @@ def test_remove_unmodeled_species_odes(self):
9696
f"Unmodeled species should be removed, but appears in measurements {measurement_has_unmodeled}."
9797
)
9898

99+
def test_includes_species_in_ode_equation(self):
100+
"""
101+
Test that species referenced in equations are kept in the document even when
102+
they are not defined as ODEs or part of reactions.
103+
104+
This test verifies that:
105+
- A species that appears in an equation (but not in reactions or ODEs) is kept
106+
- The species can have initial values in measurements without time course data
107+
- The filtering function correctly identifies equation-referenced species as modeled
108+
"""
109+
enzmldoc = self._create_enzmldoc()
110+
111+
# Add a new species that will only appear in equations (not reactions/ODEs)
112+
equation_only_species = enzmldoc.add_to_small_molecules(
113+
id="p1",
114+
name="Equation Only Species",
115+
)
116+
117+
# Add an initial for the species to each measurement
118+
for i, measurement in enumerate(enzmldoc.measurements):
119+
measurement.add_to_species_data(
120+
species_id=equation_only_species.id,
121+
initial=i * 2.0,
122+
data=[],
123+
time=[],
124+
)
125+
126+
# Add an assignment equation that references this species
127+
# This species is NOT an ODE and NOT part of any reaction
128+
enzmldoc.add_to_equations(
129+
species_id="Product", # Left side of equation
130+
equation_type=EquationType.ODE,
131+
equation="p1 * 2.0", # p1 appears in the equation
132+
)
133+
134+
# Verify the species exists before filtering
135+
assert equation_only_species.id in [s.id for s in enzmldoc.small_molecules]
136+
137+
# Remove unmodeled species
138+
thinlayer = MockThinLayer(enzmldoc)
139+
tl_enzmldoc = thinlayer.optimize()
140+
141+
assert equation_only_species.id in [
142+
s.id for s in tl_enzmldoc.small_molecules
143+
], (
144+
f"Species '{equation_only_species.id}' should be kept because it appears in an equation, "
145+
f"but it was removed. Remaining species: {[s.id for s in tl_enzmldoc.small_molecules]}"
146+
)
147+
148+
def test_includes_species_in_assignment_equation(self):
149+
"""
150+
Test that species referenced in equations are kept in the document even when
151+
they are not defined as ODEs or part of reactions.
152+
153+
This test verifies that:
154+
- A species that appears in an equation (but not in reactions or ODEs) is kept
155+
- The species can have initial values in measurements without time course data
156+
- The filtering function correctly identifies equation-referenced species as modeled
157+
"""
158+
enzmldoc = self._create_enzmldoc()
159+
160+
# Add a new species that will only appear in equations (not reactions/ODEs)
161+
equation_only_species = enzmldoc.add_to_small_molecules(
162+
id="p1",
163+
name="Equation Only Species",
164+
)
165+
166+
# Add an initial for the species to each measurement
167+
for i, measurement in enumerate(enzmldoc.measurements):
168+
measurement.add_to_species_data(
169+
species_id=equation_only_species.id,
170+
initial=i * 2.0,
171+
data=[],
172+
time=[],
173+
)
174+
175+
# Add an assignment equation that references this species
176+
# This species is NOT an ODE and NOT part of any reaction
177+
enzmldoc.add_to_equations(
178+
species_id="Product", # Left side of equation
179+
equation_type=EquationType.ASSIGNMENT,
180+
equation="p1 * 2.0", # p1 appears in the equation
181+
)
182+
183+
# Verify the species exists before filtering
184+
assert equation_only_species.id in [s.id for s in enzmldoc.small_molecules]
185+
186+
# Remove unmodeled species
187+
thinlayer = MockThinLayer(enzmldoc)
188+
tl_enzmldoc = thinlayer.optimize()
189+
190+
assert equation_only_species.id in [
191+
s.id for s in tl_enzmldoc.small_molecules
192+
], (
193+
f"Species '{equation_only_species.id}' should be kept because it appears in an equation, "
194+
f"but it was removed. Remaining species: {[s.id for s in tl_enzmldoc.small_molecules]}"
195+
)
196+
197+
def test_includes_species_in_initial_assignment_equation(self):
198+
"""
199+
Test that species referenced in equations are kept in the document even when
200+
they are not defined as ODEs or part of reactions.
201+
202+
This test verifies that:
203+
- A species that appears in an equation (but not in reactions or ODEs) is kept
204+
- The species can have initial values in measurements without time course data
205+
- The filtering function correctly identifies equation-referenced species as modeled
206+
"""
207+
enzmldoc = self._create_enzmldoc()
208+
209+
# Add a new species that will only appear in equations (not reactions/ODEs)
210+
equation_only_species = enzmldoc.add_to_small_molecules(
211+
id="p1",
212+
name="Equation Only Species",
213+
)
214+
215+
# Add an initial for the species to each measurement
216+
for i, measurement in enumerate(enzmldoc.measurements):
217+
measurement.add_to_species_data(
218+
species_id=equation_only_species.id,
219+
initial=i * 2.0,
220+
data=[],
221+
time=[],
222+
)
223+
224+
# Add an assignment equation that references this species
225+
# This species is NOT an ODE and NOT part of any reaction
226+
enzmldoc.add_to_equations(
227+
species_id="Product", # Left side of equation
228+
equation_type=EquationType.INITIAL_ASSIGNMENT,
229+
equation="p1 * 2.0", # p1 appears in the equation
230+
)
231+
232+
# Verify the species exists before filtering
233+
assert equation_only_species.id in [s.id for s in enzmldoc.small_molecules]
234+
235+
# Remove unmodeled species
236+
thinlayer = MockThinLayer(enzmldoc)
237+
tl_enzmldoc = thinlayer.optimize()
238+
239+
assert equation_only_species.id in [
240+
s.id for s in tl_enzmldoc.small_molecules
241+
], (
242+
f"Species '{equation_only_species.id}' should be kept because it appears in an equation, "
243+
f"but it was removed. Remaining species: {[s.id for s in tl_enzmldoc.small_molecules]}"
244+
)
245+
246+
def test_includes_species_in_kinetic_law_equation(self):
247+
"""
248+
Test that species referenced in equations are kept in the document even when
249+
they are not defined as ODEs or part of reactions.
250+
251+
This test verifies that:
252+
- A species that appears in an equation (but not in reactions or ODEs) is kept
253+
- The species can have initial values in measurements without time course data
254+
- The filtering function correctly identifies equation-referenced species as modeled
255+
"""
256+
enzmldoc = self._create_enzmldoc()
257+
258+
# Add a new species that will only appear in equations (not reactions/ODEs)
259+
equation_only_species = enzmldoc.add_to_small_molecules(
260+
id="p1",
261+
name="Equation Only Species",
262+
)
263+
264+
# Add an initial for the species to each measurement
265+
for i, measurement in enumerate(enzmldoc.measurements):
266+
measurement.add_to_species_data(
267+
species_id=equation_only_species.id,
268+
initial=i * 2.0,
269+
data=[],
270+
time=[],
271+
)
272+
273+
# Add a reaction that references this species
274+
# This species is NOT an ODE and NOT part of any reaction
275+
reaction = enzmldoc.add_to_reactions(id="R1", name="R1")
276+
reaction.add_to_reactants(species_id="Substrate", stoichiometry=1)
277+
reaction.add_to_products(species_id="Product", stoichiometry=1)
278+
reaction.kinetic_law = Equation(
279+
equation="p1 * 2.0",
280+
equation_type=EquationType.RATE_LAW,
281+
species_id="v",
282+
)
283+
284+
# Verify the species exists before filtering
285+
assert equation_only_species.id in [s.id for s in enzmldoc.small_molecules]
286+
287+
# Remove unmodeled species
288+
thinlayer = MockThinLayer(enzmldoc)
289+
tl_enzmldoc = thinlayer.optimize()
290+
291+
assert equation_only_species.id in [
292+
s.id for s in tl_enzmldoc.small_molecules
293+
], (
294+
f"Species '{equation_only_species.id}' should be kept because it appears in a reaction, "
295+
f"but it was removed. Remaining species: {[s.id for s in tl_enzmldoc.small_molecules]}"
296+
)
297+
99298
def _create_enzmldoc(self) -> EnzymeMLDocument:
100299
"""
101300
Create a test EnzymeML document with various measurement scenarios.
@@ -116,7 +315,7 @@ def _create_enzmldoc(self) -> EnzymeMLDocument:
116315
# Add small molecules
117316
substrate = enzmldoc.add_to_small_molecules(id="Substrate", name="Substrate")
118317
product = enzmldoc.add_to_small_molecules(id="Product", name="Product")
119-
unmodeled = enzmldoc.add_to_small_molecules(id="Unmodeled", name="Unmodeled")
318+
unmodeled = enzmldoc.add_to_small_molecules(id="s1", name="Unmodeled")
120319

121320
# Add a measurement with unmodeled species
122321
measurement = enzmldoc.add_to_measurements(id="M1", name="M1")

0 commit comments

Comments
 (0)