Skip to content

Commit ff2d176

Browse files
committed
Added timeout to RMG subprocess run
1 parent 790c64b commit ff2d176

2 files changed

Lines changed: 30 additions & 2 deletions

File tree

t3/main.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,7 @@ def run_rmg(self, restart_rmg: bool = False):
582582
t3_project_name=self.project,
583583
rmg_execution_type=self.rmg['rmg_execution_type'],
584584
restart_rmg=restart_rmg,
585+
walltime=self.t3['options']['max_RMG_walltime'],
585586
)
586587
if rmg_exception_encountered:
587588
self.rmg_exceptions_counter += 1

t3/runners/rmg_runner.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,9 +161,24 @@ def rmg_job_converged(project_directory: str) -> Tuple[bool, Optional[str]]:
161161
return rmg_converged, error
162162

163163

164+
_DEFAULT_RMG_TIMEOUT_S = 6 * 3600 # 6 hours
165+
166+
logger = logging.getLogger(__name__)
167+
168+
169+
def _parse_walltime_to_seconds(walltime: str) -> int:
170+
"""Parse a 'DD:HH:MM:SS' walltime string to total seconds. Returns 0 for '00:00:00:00'."""
171+
parts = walltime.split(':')
172+
if len(parts) != 4:
173+
return 0
174+
days, hours, minutes, seconds = (int(p) for p in parts)
175+
return days * 86400 + hours * 3600 + minutes * 60 + seconds
176+
177+
164178
def run_rmg_incore(rmg_input_file_path: str,
165179
verbose: Optional[int] = None,
166180
max_iterations: Optional[int] = None,
181+
walltime: Optional[str] = None,
167182
) -> bool:
168183
"""
169184
Run RMG incore under the rmg_env.
@@ -172,10 +187,14 @@ def run_rmg_incore(rmg_input_file_path: str,
172187
rmg_input_file_path (str): The path to the RMG input file.
173188
max_iterations (int, optional): Max RMG iterations.
174189
verbose (int, optional): Level of verbosity.
190+
walltime (str, optional): Max walltime in 'DD:HH:MM:SS' format. Defaults to 6 hours.
175191
176192
Returns:
177193
bool: Whether an exception was raised.
178194
"""
195+
timeout_s = _parse_walltime_to_seconds(walltime) if walltime else 0
196+
if timeout_s <= 0:
197+
timeout_s = _DEFAULT_RMG_TIMEOUT_S
179198
project_directory = os.path.abspath(os.path.dirname(rmg_input_file_path))
180199
verbose = f' -v {verbose}' if verbose is not None else ''
181200
max_iterations = f' -m {max_iterations}' if max_iterations is not None else ''
@@ -192,8 +211,13 @@ def run_rmg_incore(rmg_input_file_path: str,
192211
echo "Micromamba/Mamba/Conda required" >&2
193212
exit 1
194213
fi' '''
195-
stdout, stderr = execute_command(shell_script, shell=True, no_fail=True, executable='/bin/bash')
196-
stderr_text = ''.join(stderr) if isinstance(stderr, list) else (stderr or '')
214+
try:
215+
result = subprocess.run(shell_script, shell=True, executable='/bin/bash',
216+
capture_output=True, text=True, timeout=timeout_s)
217+
stderr_text = result.stderr or ''
218+
except subprocess.TimeoutExpired:
219+
logger.error(f'RMG incore timed out after {timeout_s}s')
220+
return True
197221
if 'RMG threw an exception and did not converge.' in stderr_text:
198222
return True
199223
return False
@@ -271,6 +295,7 @@ def rmg_runner(rmg_input_file_path: str,
271295
t3_project_name: Optional[str] = None,
272296
rmg_execution_type: Optional[str] = None,
273297
restart_rmg: bool = False,
298+
walltime: Optional[str] = None,
274299
) -> bool:
275300
"""
276301
Run an RMG job as a subprocess under the rmg_env.
@@ -286,6 +311,7 @@ def rmg_runner(rmg_input_file_path: str,
286311
t3_project_name (str, optional): The T3 project name, used for setting a job name on the server for the RMG run.
287312
rmg_execution_type (str, optional): The RMG execution type (incore or local). Also set via settings.py.
288313
restart_rmg (bool, optional): Whether to restart RMG from seed.
314+
walltime (str, optional): Max walltime in 'DD:HH:MM:SS' format. Defaults to 6 hours.
289315
290316
Returns:
291317
bool: Whether an exception was raised.
@@ -299,6 +325,7 @@ def rmg_runner(rmg_input_file_path: str,
299325
rmg_exception_encountered = run_rmg_incore(rmg_input_file_path=rmg_input_file_path,
300326
verbose=verbose,
301327
max_iterations=max_iterations,
328+
walltime=walltime,
302329
)
303330
return rmg_exception_encountered
304331
elif rmg_execution_type == 'local':

0 commit comments

Comments
 (0)