Skip to content

Commit db59977

Browse files
committed
Improve rmg_kinetics & rmg_thermo scripts
- deep-copy RMG-db kinetics before scaling so a second query can't read a doubly-scaled A - guard degeneracy scaling to Arrhenius/EP so non-Arrhenius forms (Chebyshev/PLOG/etc.) are skipped safely - add sys.path bootstrap so `from common import ...` works regardless of CWD - add an --output flag so the input YAML isn't overwritten - document the training-entry filter and output units - route the helper's debug print to stderr so it can't pollute parsed stdout, and harden it against reactions without reactants. - extend the SI->cm A-factor conversion to termolecular reactions (1e12) instead of mislabeling them s^-1 - factor the degeneracy guard into scale_kinetics_by_degeneracy() and test it against the real code path (a dropped guard now corrupts Chebyshev coeffs and fails the test) - tighten test docs/assertions (JSON not YAML; true byte-level non-overwrite check). Adds argparse + subprocess unit tests.
1 parent 3258243 commit db59977

4 files changed

Lines changed: 243 additions & 13 deletions

File tree

arc/scripts/common.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,14 @@ def parse_command_line_arguments(command_line_args=None):
1717
1818
Returns:
1919
The parsed command-line arguments by keywords.
20+
``args.file`` is the input YAML path (positional).
21+
``args.output`` is an optional output path (``-o``/``--output``); when omitted,
22+
callers should default to overwriting ``args.file`` to preserve historical behavior.
2023
"""
2124
parser = argparse.ArgumentParser(description='Automatic Rate Calculator (ARC)')
2225
parser.add_argument('file', metavar='FILE', type=str, nargs=1, help='a file with input information')
26+
parser.add_argument('-o', '--output', type=str, default=None,
27+
help='optional output YAML path; if omitted, the input file is overwritten')
2328
args = parser.parse_args(command_line_args)
2429
args.file = args.file[0]
2530
return args

arc/scripts/rmg_kinetics.py

Lines changed: 75 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,33 @@
22
# encoding: utf-8
33

44
"""
5-
A standalone script to run RMG
6-
and get kinetic rate coefficients for reactions
5+
A standalone script to run RMG and get kinetic rate coefficients for reactions.
6+
7+
Output units (per entry returned by ``get_kinetics_from_reactions``):
8+
- ``A``: cm^3/(mol*s) for bimolecular reactions, s^-1 for unimolecular
9+
(3-body: cm^6/(mol^2*s)). Reported in the units stored on the
10+
Arrhenius object after the SI->cm conversion below.
11+
- ``n``: dimensionless temperature exponent.
12+
- ``Ea``: kJ/mol (converted from SI J/mol).
13+
- ``T_min``, ``T_max``: K.
714
"""
815

16+
import copy
917
import os
18+
import sys
1019
from typing import Dict, List, Optional, Tuple
1120

21+
# Make ``from common import ...`` work no matter how this script is invoked
22+
# (e.g. ``python /abs/path/to/rmg_kinetics.py``, ``cd elsewhere && python ...``).
23+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
24+
1225
from common import parse_command_line_arguments, read_yaml_file, save_yaml_file
1326

1427
from rmgpy.data.kinetics.common import find_degenerate_reactions
1528
from rmgpy.data.kinetics.family import KineticsFamily
1629
from rmgpy.data.rmg import RMGDatabase
1730
from rmgpy import settings as rmg_settings
31+
from rmgpy.kinetics import Arrhenius, ArrheniusEP, KineticsModel
1832
from rmgpy.reaction import same_species_lists, Reaction
1933
from rmgpy.species import Species
2034

@@ -35,11 +49,12 @@ def main():
3549
"""
3650
args = parse_command_line_arguments()
3751
input_file = args.file
52+
output_file = args.output or input_file
3853
reaction_list = read_yaml_file(path=input_file)
3954
if not isinstance(reaction_list, list):
4055
raise ValueError(f'The content of {input_file} must be a list, got {reaction_list} which is a {type(reaction_list)}')
4156
result = get_rate_coefficients(reaction_list)
42-
save_yaml_file(path=input_file, content=result)
57+
save_yaml_file(path=output_file, content=result)
4358

4459

4560
def get_rate_coefficients(reaction_list: List[Dict]) -> List[Dict]:
@@ -62,6 +77,42 @@ def get_rate_coefficients(reaction_list: List[Dict]) -> List[Dict]:
6277
return reaction_list
6378

6479

80+
def get_a_factor_si_to_cm(num_reactants: int) -> float:
81+
"""
82+
Get the factor that converts an Arrhenius A-factor from SI (m-based) units to
83+
cm-based units, based on the reaction molecularity.
84+
85+
Args:
86+
num_reactants (int): The number of reactants.
87+
88+
Returns:
89+
float: 1.0 for unimolecular (s^-1), 1e6 for bimolecular (m^3->cm^3),
90+
1e12 for termolecular (m^6->cm^6). Defaults to 1.0 otherwise.
91+
"""
92+
return {1: 1.0, 2: 1e6, 3: 1e12}.get(num_reactants, 1.0)
93+
94+
95+
def scale_kinetics_by_degeneracy(kinetics: KineticsModel,
96+
degeneracy: float,
97+
) -> KineticsModel:
98+
"""
99+
Scale Arrhenius-type kinetics in place by the reaction-path degeneracy.
100+
101+
Non-Arrhenius forms (e.g. Chebyshev, PDepArrhenius) are returned unchanged,
102+
since ``change_rate`` would otherwise corrupt their parameters.
103+
104+
Args:
105+
kinetics (KineticsModel): An RMG kinetics object.
106+
degeneracy (float): The reaction-path degeneracy to scale by.
107+
108+
Returns:
109+
KineticsModel: The (possibly scaled) kinetics object.
110+
"""
111+
if isinstance(kinetics, (Arrhenius, ArrheniusEP)):
112+
kinetics.change_rate(degeneracy)
113+
return kinetics
114+
115+
65116
def determine_rmg_kinetics(rmgdb: RMGDatabase,
66117
reaction: Reaction,
67118
dh_rxn298: Optional[float] = None,
@@ -71,6 +122,13 @@ def determine_rmg_kinetics(rmgdb: RMGDatabase,
71122
Determine kinetics for `reaction` (an RMG Reaction object) from RMG's database, if possible.
72123
Assigns a list of all matching entries from both libraries and families.
73124
125+
Note:
126+
Family entries originating from the training set are intentionally filtered out
127+
(an empty returned list therefore means "no matching libraries and only training-set
128+
family hits", not necessarily "no match at all"). Database kinetics are deep-copied
129+
before any in-place mutation (degeneracy scaling, unit conversion) so the loaded
130+
``rmgdb`` instance remains unchanged across calls.
131+
74132
Args:
75133
rmgdb (RMGDatabase): The RMG database instance.
76134
reaction (Reaction): The RMG Reaction object.
@@ -79,6 +137,7 @@ def determine_rmg_kinetics(rmgdb: RMGDatabase,
79137
80138
Returns: list[dict]
81139
All matching RMG reactions kinetics (both libraries and families) as a dict of parameters.
140+
Empty list if nothing matched (or only training-set entries matched).
82141
"""
83142
rmg_reactions = list()
84143
# Libraries:
@@ -89,19 +148,22 @@ def determine_rmg_kinetics(rmgdb: RMGDatabase,
89148
library_reaction.comment = f'Library: {library.label}'
90149
rmg_reactions.append(library_reaction)
91150
break
92-
# # Families:
93-
A_units = "cm^3/(mol*s)" if len(reaction.reactants) == 2 else "s^-1"
151+
# Families:
152+
a_factor = get_a_factor_si_to_cm(len(reaction.reactants))
94153
fam_list = loop_families(rmgdb, reaction)
95154
dh_rxn298 = dh_rxn298 or get_dh_rxn298(rmgdb=rmgdb, reaction=reaction) # J/mol
96155
for family, degenerate_reactions in fam_list:
97156
for deg_rxn in degenerate_reactions:
98157
kinetics_list = family.get_kinetics(reaction=deg_rxn, template_labels=deg_rxn.template, degeneracy=deg_rxn.degeneracy)
99158
for kinetics_detailes in kinetics_list:
100-
kinetics = kinetics_detailes[0]
101-
kinetics.change_rate(deg_rxn.degeneracy)
159+
# Deep-copy before mutating so the database object isn't double-scaled
160+
# if the same family rule is queried again for another reaction.
161+
kinetics = copy.deepcopy(kinetics_detailes[0])
162+
kinetics = scale_kinetics_by_degeneracy(kinetics, deg_rxn.degeneracy)
102163
if hasattr(kinetics, 'to_arrhenius'):
103164
kinetics = kinetics.to_arrhenius(dh_rxn298) # Convert ArrheniusEP to Arrhenius
104-
kinetics.A.value_si = kinetics.A.value_si * (1e6 if A_units == "cm^3/(mol*s)" else 1)
165+
if a_factor != 1.0 and isinstance(kinetics, Arrhenius):
166+
kinetics.A.value_si = kinetics.A.value_si * a_factor
105167
deg_rxn.kinetics = kinetics
106168
deg_rxn.comment = f'Family: {deg_rxn.family}'
107169
if 'training' in deg_rxn.kinetics.comment:
@@ -150,7 +212,11 @@ def get_kinetics_from_reactions(reactions: List[Reaction]) -> List[Dict]:
150212
"""
151213
kinetics_list = list()
152214
for rxn in reactions:
153-
print(f'rxn: {rxn}, kinetics: {rxn.kinetics}, comment: {rxn.comment}')
215+
try:
216+
rxn_repr = str(rxn)
217+
except (TypeError, AttributeError):
218+
rxn_repr = '<reaction without reactants/products labels>'
219+
print(f'rxn: {rxn_repr}, kinetics: {rxn.kinetics}, comment: {rxn.comment}', file=sys.stderr)
154220
kinetics_list.append({
155221
'kinetics': rxn.kinetics.__repr__(),
156222
'comment': rxn.comment,

arc/scripts/rmg_thermo.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,19 @@
22
# encoding: utf-8
33

44
"""
5-
A standalone script to run RMG
6-
and get thermodynamic properties for species
5+
A standalone script to run RMG and get thermodynamic properties for species.
6+
7+
Output units (per entry returned by ``get_thermo``):
8+
- ``h298``: kJ/mol (converted from SI J/mol).
9+
- ``s298``: J/(mol*K).
10+
- ``comment``: source / estimation comment from RMG.
711
"""
812

913
import os
14+
import sys
15+
16+
# Make ``from common import ...`` work no matter how this script is invoked.
17+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
1018

1119
from common import parse_command_line_arguments, read_yaml_file, save_yaml_file
1220

@@ -33,11 +41,12 @@ def main():
3341
"""
3442
args = parse_command_line_arguments()
3543
input_file = args.file
44+
output_file = args.output or input_file
3645
species_list = read_yaml_file(path=input_file)
3746
if not isinstance(species_list, list):
3847
raise ValueError(f'The content of {input_file} must be a list, got {species_list} which is a {type(species_list)}')
3948
result = get_thermo(species_list)
40-
save_yaml_file(path=input_file, content=result)
49+
save_yaml_file(path=output_file, content=result)
4150

4251

4352
def get_thermo(species_list: list[dict]) -> list[dict]:

arc/scripts_test.py

Lines changed: 151 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,11 @@
1212
import shutil
1313
import subprocess
1414
import tempfile
15+
import textwrap
1516
import unittest
1617

17-
from arc.common import ARC_PATH, ARC_TESTING_PATH, read_yaml_file
18+
from arc.common import ARC_PATH, ARC_TESTING_PATH, read_yaml_file, save_yaml_file
19+
from arc.scripts.common import parse_command_line_arguments
1820

1921

2022
def _rmg_env_available() -> bool:
@@ -117,5 +119,153 @@ def test_cp_data_present(self):
117119
self.assertIn('cp_j_mol_k', cp[0])
118120

119121

122+
class TestCommonArgparse(unittest.TestCase):
123+
"""Test the shared CLI parser used by the standalone scripts."""
124+
125+
def test_positional_file_only(self):
126+
"""Without ``--output`` the parser exposes ``args.output is None``."""
127+
args = parse_command_line_arguments(['/tmp/in.yml'])
128+
self.assertEqual(args.file, '/tmp/in.yml')
129+
self.assertIsNone(args.output)
130+
131+
def test_output_long_form(self):
132+
"""``--output`` populates ``args.output`` so callers can avoid overwriting input."""
133+
args = parse_command_line_arguments(['/tmp/in.yml', '--output', '/tmp/out.yml'])
134+
self.assertEqual(args.file, '/tmp/in.yml')
135+
self.assertEqual(args.output, '/tmp/out.yml')
136+
137+
def test_output_short_form(self):
138+
"""``-o`` is an accepted short form."""
139+
args = parse_command_line_arguments(['/tmp/in.yml', '-o', '/tmp/out.yml'])
140+
self.assertEqual(args.output, '/tmp/out.yml')
141+
142+
143+
@unittest.skipUnless(RMG_ENV, 'rmg_env not available')
144+
class TestRmgKineticsHelpers(unittest.TestCase):
145+
"""
146+
Unit tests for ``rmg_kinetics.py`` helpers that don't need a full RMG database load.
147+
148+
Each test runs a tiny ``python -c`` snippet inside ``rmg_env`` so we can import
149+
rmgpy and the script module directly. Stdout is parsed as JSON.
150+
"""
151+
152+
SCRIPT_DIR = os.path.join(ARC_PATH, 'arc', 'scripts')
153+
154+
def _run_in_rmg_env(self, snippet: str) -> str:
155+
"""Execute ``snippet`` inside rmg_env and return stripped stdout."""
156+
result = subprocess.run(
157+
['conda', 'run', '-n', 'rmg_env', 'python', '-c', snippet],
158+
capture_output=True, text=True, timeout=120,
159+
)
160+
self.assertEqual(result.returncode, 0,
161+
f'snippet failed: stderr={result.stderr}\nstdout={result.stdout}')
162+
return result.stdout.strip()
163+
164+
def test_get_kinetics_from_reactions_arrhenius(self):
165+
"""``get_kinetics_from_reactions`` reports A/n/Ea (Ea in kJ/mol) for an Arrhenius rxn."""
166+
snippet = textwrap.dedent(f"""
167+
import sys, json
168+
sys.path.insert(0, {self.SCRIPT_DIR!r})
169+
from rmg_kinetics import get_kinetics_from_reactions
170+
from rmgpy.kinetics import Arrhenius
171+
from rmgpy.reaction import Reaction
172+
rxn = Reaction()
173+
rxn.kinetics = Arrhenius(A=(1.5e13, 'cm^3/(mol*s)'), n=0.0, Ea=(20.0, 'kJ/mol'),
174+
Tmin=(300.0, 'K'), Tmax=(2500.0, 'K'))
175+
rxn.comment = 'unit-test'
176+
out = get_kinetics_from_reactions([rxn])
177+
print(json.dumps(out[0]))
178+
""")
179+
import json
180+
entry = json.loads(self._run_in_rmg_env(snippet))
181+
self.assertEqual(entry['comment'], 'unit-test')
182+
self.assertAlmostEqual(entry['A'], 1.5e13, delta=1e7)
183+
self.assertEqual(entry['n'], 0.0)
184+
self.assertAlmostEqual(entry['Ea'], 20.0, places=6) # kJ/mol
185+
self.assertEqual(entry['T_min'], 300.0)
186+
self.assertEqual(entry['T_max'], 2500.0)
187+
188+
def test_get_kinetics_from_reactions_handles_missing_T_bounds(self):
189+
"""Tmin/Tmax may be absent; helper should yield None rather than crashing."""
190+
snippet = textwrap.dedent(f"""
191+
import sys, json
192+
sys.path.insert(0, {self.SCRIPT_DIR!r})
193+
from rmg_kinetics import get_kinetics_from_reactions
194+
from rmgpy.kinetics import Arrhenius
195+
from rmgpy.reaction import Reaction
196+
rxn = Reaction()
197+
rxn.kinetics = Arrhenius(A=(1.0, 's^-1'), n=1.0, Ea=(0.0, 'J/mol'))
198+
rxn.comment = 'no-T-bounds'
199+
print(json.dumps(get_kinetics_from_reactions([rxn])[0]))
200+
""")
201+
import json
202+
entry = json.loads(self._run_in_rmg_env(snippet))
203+
self.assertIsNone(entry['T_min'])
204+
self.assertIsNone(entry['T_max'])
205+
206+
def test_scale_kinetics_by_degeneracy_skips_non_arrhenius(self):
207+
"""``scale_kinetics_by_degeneracy`` scales Arrhenius A by the degeneracy but
208+
leaves non-Arrhenius forms (e.g. Chebyshev) untouched. Dropping the guard would
209+
let ``change_rate`` corrupt the Chebyshev coefficients and fail this test."""
210+
snippet = textwrap.dedent(f"""
211+
import sys
212+
sys.path.insert(0, {self.SCRIPT_DIR!r})
213+
from rmg_kinetics import scale_kinetics_by_degeneracy
214+
from rmgpy.kinetics import Arrhenius, Chebyshev
215+
arr = Arrhenius(A=(1.0, 's^-1'), n=0.0, Ea=(0.0, 'J/mol'))
216+
scale_kinetics_by_degeneracy(arr, 2)
217+
assert abs(arr.A.value_si - 2.0) < 1e-9, arr.A.value_si
218+
cheb = Chebyshev(coeffs=[[1.0, 0.0], [0.0, 0.0]],
219+
kunits='cm^3/(mol*s)',
220+
Tmin=(300.0, 'K'), Tmax=(2000.0, 'K'),
221+
Pmin=(0.01, 'bar'), Pmax=(100.0, 'bar'))
222+
before = cheb.coeffs.value_si.tolist()
223+
scale_kinetics_by_degeneracy(cheb, 2)
224+
assert cheb.coeffs.value_si.tolist() == before, 'Chebyshev coeffs were mutated'
225+
print('ok')
226+
""")
227+
self.assertEqual(self._run_in_rmg_env(snippet), 'ok')
228+
229+
230+
@unittest.skipUnless(RMG_ENV, 'rmg_env not available')
231+
class TestRmgScriptsOutputFlag(unittest.TestCase):
232+
"""Verify ``--output`` writes to a fresh path and leaves the input file untouched."""
233+
234+
def setUp(self):
235+
self.tmp_dir = tempfile.mkdtemp(prefix='rmg_scripts_test_')
236+
237+
def tearDown(self):
238+
shutil.rmtree(self.tmp_dir, ignore_errors=True)
239+
240+
def _h2_adjlist(self) -> str:
241+
return '1 H u0 p0 c0 {2,S}\n2 H u0 p0 c0 {1,S}\n'
242+
243+
def test_rmg_thermo_output_does_not_overwrite_input(self):
244+
"""The thermo script writes the augmented YAML to ``--output`` and preserves input."""
245+
input_path = os.path.join(self.tmp_dir, 'in.yml')
246+
output_path = os.path.join(self.tmp_dir, 'out.yml')
247+
original = [{'label': 'H2', 'adjlist': self._h2_adjlist()}]
248+
save_yaml_file(path=input_path, content=original)
249+
with open(input_path, 'rb') as f:
250+
input_bytes_before = f.read()
251+
252+
script = os.path.join(ARC_PATH, 'arc', 'scripts', 'rmg_thermo.py')
253+
result = subprocess.run(
254+
['conda', 'run', '-n', 'rmg_env', 'python', script, input_path, '--output', output_path],
255+
capture_output=True, text=True, timeout=300,
256+
)
257+
self.assertEqual(result.returncode, 0, f'thermo script failed: {result.stderr}')
258+
259+
# Input must be byte-identical (the script must not overwrite it).
260+
with open(input_path, 'rb') as f:
261+
self.assertEqual(f.read(), input_bytes_before)
262+
# Output must contain the new keys.
263+
out = read_yaml_file(output_path)
264+
self.assertEqual(len(out), 1)
265+
self.assertIn('h298', out[0])
266+
self.assertIn('s298', out[0])
267+
self.assertIn('comment', out[0])
268+
269+
120270
if __name__ == '__main__':
121271
unittest.main(testRunner=unittest.TextTestRunner(verbosity=2))

0 commit comments

Comments
 (0)