Skip to content

Commit eaf086f

Browse files
committed
Add removal of unmodeled species to BaseThinLayer
Introduces a method to filter out species not referenced in reactions or ODEs from EnzymeML documents, and updates measurements accordingly. Adds unit tests to verify correct removal and preservation of unmodeled species based on the new flag.
1 parent 5f6cdac commit eaf086f

2 files changed

Lines changed: 283 additions & 1 deletion

File tree

pyenzyme/thinlayers/base.py

Lines changed: 86 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,18 +36,103 @@ def __init__(
3636
enzmldoc: v2.EnzymeMLDocument,
3737
measurement_ids: Optional[List[str]] = None,
3838
df_per_measurement: bool = False,
39+
remove_unmodeled_species: bool = True,
3940
):
4041
assert isinstance(enzmldoc, v2.EnzymeMLDocument)
4142
assert isinstance(measurement_ids, list) or measurement_ids is None
4243

44+
# Remove empty measurements
45+
enzmldoc.measurements = [
46+
meas for meas in enzmldoc.measurements if meas.species_data
47+
]
48+
4349
if measurement_ids is None:
4450
measurement_ids = [meas.id for meas in enzmldoc.measurements]
4551

46-
self.enzmldoc = enzmldoc
52+
if remove_unmodeled_species:
53+
enzmldoc = self._remove_unmodeled_species(enzmldoc)
54+
55+
self.enzmldoc = enzmldoc.model_copy(deep=True)
4756
self.fitted_doc = enzmldoc.model_copy(deep=True)
4857
self.measurement_ids = measurement_ids
4958
self.df_per_measurement = df_per_measurement
5059

60+
@staticmethod
61+
def _remove_unmodeled_species(enzmldoc: v2.EnzymeMLDocument) -> v2.EnzymeMLDocument:
62+
"""
63+
Removes species that are not modeled from the EnzymeML document.
64+
65+
This method filters out species that are not referenced in any reactions or ODEs,
66+
cleaning up the document to only include modeled species. It also removes
67+
measurements that have no remaining species data after filtering.
68+
69+
Args:
70+
enzmldoc (v2.EnzymeMLDocument): The EnzymeML document to filter.
71+
72+
Returns:
73+
v2.EnzymeMLDocument: A deep copy of the document with unmodeled species removed.
74+
75+
Note:
76+
- Creates a deep copy to avoid modifying the original document
77+
- Removes measurements that become empty after species filtering
78+
- Only considers species from reactions (reactants/products) and ODE equations
79+
"""
80+
enzmldoc = enzmldoc.model_copy(deep=True)
81+
82+
# Collect all species that are explicitly modeled
83+
modeled_species = set()
84+
85+
# Add species from reactions (reactants and products)
86+
for reaction in enzmldoc.reactions:
87+
modeled_species.update(
88+
reactant.species_id for reactant in reaction.reactants
89+
)
90+
modeled_species.update(product.species_id for product in reaction.products)
91+
92+
# Add species from ODE equations
93+
modeled_species.update(
94+
equation.species_id
95+
for equation in enzmldoc.equations
96+
if equation.equation_type == v2.EquationType.ODE
97+
)
98+
99+
if not modeled_species:
100+
enzmldoc.measurements = []
101+
enzmldoc.small_molecules = []
102+
enzmldoc.proteins = []
103+
enzmldoc.complexes = []
104+
return enzmldoc
105+
106+
filtered_measurements = []
107+
for measurement in enzmldoc.measurements:
108+
# Filter species data to only include modeled species
109+
filtered_species_data = [
110+
data
111+
for data in measurement.species_data
112+
if data.species_id in modeled_species
113+
]
114+
115+
# Only keep measurements that still have species data
116+
if filtered_species_data:
117+
measurement.species_data = filtered_species_data
118+
filtered_measurements.append(measurement)
119+
120+
# Update all collections to only include modeled species
121+
enzmldoc.measurements = filtered_measurements
122+
enzmldoc.small_molecules = [
123+
species
124+
for species in enzmldoc.small_molecules
125+
if species.id in modeled_species
126+
]
127+
enzmldoc.proteins = [
128+
protein for protein in enzmldoc.proteins if protein.id in modeled_species
129+
]
130+
enzmldoc.complexes = [
131+
complex for complex in enzmldoc.complexes if complex.id in modeled_species
132+
]
133+
134+
return enzmldoc
135+
51136
@abstractmethod
52137
def integrate(
53138
self,

tests/unit/test_thinlayer.py

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
from pyenzyme.thinlayers.base import BaseThinLayer
2+
from pyenzyme.versions.v2 import EnzymeMLDocument, EquationType
3+
4+
# Mock data for creating test species measurements
5+
MOCK_DATA = {
6+
"initial": 1.0,
7+
"time": [1.0, 2.0, 3.0, 4.0],
8+
"data": [1.0, 2.0, 3.0, 4.0],
9+
}
10+
11+
12+
class TestThinLayer:
13+
"""Test suite for BaseThinLayer functionality."""
14+
15+
def test_remove_unmodeled_species_reaction(self):
16+
"""
17+
Test that unmodeled species are removed when they're not part of any reaction.
18+
19+
This test verifies that:
20+
- Species not referenced in reactions are removed from the document
21+
- Measurements containing only unmodeled species are removed
22+
- Measurements with mixed modeled/unmodeled species keep only modeled ones
23+
"""
24+
enzmldoc = self._create_enzmldoc()
25+
26+
# Add reaction with only Substrate and Product (Unmodeled is not included)
27+
reaction = enzmldoc.add_to_reactions(id="R1", name="R1")
28+
reaction.add_to_reactants(species_id="Substrate", stoichiometry=1)
29+
reaction.add_to_products(species_id="Product", stoichiometry=1)
30+
31+
# Remove unmodeled species
32+
thinlayer = MockThinLayer(enzmldoc, remove_unmodeled_species=True)
33+
34+
assert len(thinlayer.enzmldoc.small_molecules) == 2, (
35+
f"Unmodeled small molecules should be removed, but {len(thinlayer.enzmldoc.small_molecules)} remain."
36+
)
37+
assert len(thinlayer.enzmldoc.measurements) == 2, (
38+
f"Unmodeled measurements should be removed, but {len(thinlayer.enzmldoc.measurements)} remain."
39+
)
40+
41+
measurement_has_unmodeled: list[str] = []
42+
43+
for measurement in thinlayer.enzmldoc.measurements:
44+
for species_data in measurement.species_data:
45+
if species_data.species_id == "Unmodeled":
46+
measurement_has_unmodeled.append(measurement.id)
47+
48+
assert len(measurement_has_unmodeled) == 0, (
49+
f"Unmodeled species should be removed, but appears in measurements {measurement_has_unmodeled}."
50+
)
51+
52+
def test_remove_unmodeled_species_odes(self):
53+
"""
54+
Test that unmodeled species are removed when they're not part of any ODE.
55+
56+
This test verifies that:
57+
- Species not referenced in ODE equations are removed from the document
58+
- Measurements containing only unmodeled species are removed
59+
- Measurements with mixed modeled/unmodeled species keep only modeled ones
60+
"""
61+
enzmldoc = self._create_enzmldoc()
62+
63+
# Add ODEs with only Substrate and Product (Unmodeled is not included)
64+
enzmldoc.add_to_equations(
65+
species_id="Substrate",
66+
equation_type=EquationType.ODE,
67+
equation="-Substrate",
68+
)
69+
70+
enzmldoc.add_to_equations(
71+
species_id="Product",
72+
equation_type=EquationType.ODE,
73+
equation="Substrate",
74+
)
75+
76+
# Remove unmodeled species
77+
thinlayer = MockThinLayer(enzmldoc, remove_unmodeled_species=True)
78+
79+
assert len(thinlayer.enzmldoc.small_molecules) == 2, (
80+
f"Unmodeled small molecules should be removed, but {len(thinlayer.enzmldoc.small_molecules)} remain."
81+
)
82+
assert len(thinlayer.enzmldoc.measurements) == 2, (
83+
f"Unmodeled measurements should be removed, but {len(thinlayer.enzmldoc.measurements)} remain."
84+
)
85+
86+
measurement_has_unmodeled: list[str] = []
87+
88+
for measurement in thinlayer.enzmldoc.measurements:
89+
for species_data in measurement.species_data:
90+
if species_data.species_id == "Unmodeled":
91+
measurement_has_unmodeled.append(measurement.id)
92+
93+
assert len(measurement_has_unmodeled) == 0, (
94+
f"Unmodeled species should be removed, but appears in measurements {measurement_has_unmodeled}."
95+
)
96+
97+
def test_leave_unmodeled_species(self):
98+
"""
99+
Test that unmodeled species are preserved when remove_unmodeled_species=False.
100+
101+
This test verifies that:
102+
- All species are kept in the document regardless of modeling status
103+
- Empty measurements are still removed
104+
- Measurements with unmodeled species are preserved
105+
"""
106+
enzmldoc = self._create_enzmldoc()
107+
108+
# Add reaction with only Substrate and Product (Unmodeled remains unmodeled)
109+
reaction = enzmldoc.add_to_reactions(id="R1", name="R1")
110+
reaction.add_to_reactants(species_id="Substrate", stoichiometry=1)
111+
reaction.add_to_products(species_id="Product", stoichiometry=1)
112+
113+
# Keep unmodeled species
114+
thinlayer = MockThinLayer(enzmldoc, remove_unmodeled_species=False)
115+
116+
assert len(thinlayer.enzmldoc.small_molecules) == 3, (
117+
f"Unmodeled small molecules should not be removed, but {len(thinlayer.enzmldoc.small_molecules)} remain."
118+
)
119+
120+
assert len(thinlayer.enzmldoc.measurements) == 3, (
121+
f"Empty measurements should be removed, but {len(thinlayer.enzmldoc.measurements)} remain."
122+
)
123+
124+
measurement_has_unmodeled: list[str] = []
125+
126+
for measurement in thinlayer.enzmldoc.measurements:
127+
for species_data in measurement.species_data:
128+
if species_data.species_id == "Unmodeled":
129+
measurement_has_unmodeled.append(measurement.id)
130+
131+
assert len(measurement_has_unmodeled) == 2, (
132+
f"Unmodeled species should not be removed, but appears in measurements {measurement_has_unmodeled}."
133+
)
134+
135+
def _create_enzmldoc(self) -> EnzymeMLDocument:
136+
"""
137+
Create a test EnzymeML document with various measurement scenarios.
138+
139+
Creates a document with:
140+
- Three species: Substrate, Product, and Unmodeled
141+
- Four measurements:
142+
- M1: Contains all three species (mixed modeled/unmodeled)
143+
- M2: Contains only modeled species (Substrate, Product)
144+
- M3: Contains only unmodeled species (Unmodeled)
145+
- M4: Empty measurement (no species data)
146+
147+
Returns:
148+
EnzymeMLDocument: A test document for use in unit tests.
149+
"""
150+
enzmldoc = EnzymeMLDocument(name="Test")
151+
152+
# Add small molecules
153+
substrate = enzmldoc.add_to_small_molecules(id="Substrate", name="Substrate")
154+
product = enzmldoc.add_to_small_molecules(id="Product", name="Product")
155+
unmodeled = enzmldoc.add_to_small_molecules(id="Unmodeled", name="Unmodeled")
156+
157+
# Add a measurement with unmodeled species
158+
measurement = enzmldoc.add_to_measurements(id="M1", name="M1")
159+
measurement.add_to_species_data(species_id=substrate.id, **MOCK_DATA)
160+
measurement.add_to_species_data(species_id=product.id, **MOCK_DATA)
161+
measurement.add_to_species_data(species_id=unmodeled.id, **MOCK_DATA)
162+
163+
# Add a Measurement only with modeled species
164+
measurement = enzmldoc.add_to_measurements(id="M2", name="M2")
165+
measurement.add_to_species_data(species_id=substrate.id, **MOCK_DATA)
166+
measurement.add_to_species_data(species_id=product.id, **MOCK_DATA)
167+
168+
# Add a Measurement with only unmodeled species
169+
measurement = enzmldoc.add_to_measurements(id="M3", name="M3")
170+
measurement.add_to_species_data(species_id=unmodeled.id, **MOCK_DATA)
171+
172+
# Add an empty measurement
173+
measurement = enzmldoc.add_to_measurements(id="M4", name="M4")
174+
175+
return enzmldoc
176+
177+
178+
class MockThinLayer(BaseThinLayer):
179+
"""
180+
Mock implementation of BaseThinLayer for testing purposes.
181+
182+
This class provides minimal implementations of the abstract methods
183+
to allow testing of the base class functionality without requiring
184+
a full thin layer implementation.
185+
"""
186+
187+
def integrate(self, *args, **kwargs):
188+
"""Mock integration method that does nothing."""
189+
pass
190+
191+
def optimize(self, *args, **kwargs):
192+
"""Mock optimization method that does nothing."""
193+
pass
194+
195+
def write(self, *args, **kwargs):
196+
"""Mock write method that does nothing."""
197+
pass

0 commit comments

Comments
 (0)