@@ -161,9 +161,24 @@ def rmg_job_converged(project_directory: str) -> Tuple[bool, Optional[str]]:
161161 return rmg_converged , error
162162
163163
164+ _DEFAULT_RMG_TIMEOUT_S = 6 * 3600 # 6 hours
165+
166+ logger = logging .getLogger (__name__ )
167+
168+
169+ def _parse_walltime_to_seconds (walltime : str ) -> int :
170+ """Parse a 'DD:HH:MM:SS' walltime string to total seconds. Returns 0 for '00:00:00:00'."""
171+ parts = walltime .split (':' )
172+ if len (parts ) != 4 :
173+ return 0
174+ days , hours , minutes , seconds = (int (p ) for p in parts )
175+ return days * 86400 + hours * 3600 + minutes * 60 + seconds
176+
177+
164178def run_rmg_incore (rmg_input_file_path : str ,
165179 verbose : Optional [int ] = None ,
166180 max_iterations : Optional [int ] = None ,
181+ walltime : Optional [str ] = None ,
167182 ) -> bool :
168183 """
169184 Run RMG incore under the rmg_env.
@@ -172,10 +187,14 @@ def run_rmg_incore(rmg_input_file_path: str,
172187 rmg_input_file_path (str): The path to the RMG input file.
173188 max_iterations (int, optional): Max RMG iterations.
174189 verbose (int, optional): Level of verbosity.
190+ walltime (str, optional): Max walltime in 'DD:HH:MM:SS' format. Defaults to 6 hours.
175191
176192 Returns:
177193 bool: Whether an exception was raised.
178194 """
195+ timeout_s = _parse_walltime_to_seconds (walltime ) if walltime else 0
196+ if timeout_s <= 0 :
197+ timeout_s = _DEFAULT_RMG_TIMEOUT_S
179198 project_directory = os .path .abspath (os .path .dirname (rmg_input_file_path ))
180199 verbose = f' -v { verbose } ' if verbose is not None else ''
181200 max_iterations = f' -m { max_iterations } ' if max_iterations is not None else ''
@@ -192,8 +211,13 @@ def run_rmg_incore(rmg_input_file_path: str,
192211 echo "Micromamba/Mamba/Conda required" >&2
193212 exit 1
194213fi' '''
195- stdout , stderr = execute_command (shell_script , shell = True , no_fail = True , executable = '/bin/bash' )
196- stderr_text = '' .join (stderr ) if isinstance (stderr , list ) else (stderr or '' )
214+ try :
215+ result = subprocess .run (shell_script , shell = True , executable = '/bin/bash' ,
216+ capture_output = True , text = True , timeout = timeout_s )
217+ stderr_text = result .stderr or ''
218+ except subprocess .TimeoutExpired :
219+ logger .error (f'RMG incore timed out after { timeout_s } s' )
220+ return True
197221 if 'RMG threw an exception and did not converge.' in stderr_text :
198222 return True
199223 return False
@@ -271,6 +295,7 @@ def rmg_runner(rmg_input_file_path: str,
271295 t3_project_name : Optional [str ] = None ,
272296 rmg_execution_type : Optional [str ] = None ,
273297 restart_rmg : bool = False ,
298+ walltime : Optional [str ] = None ,
274299 ) -> bool :
275300 """
276301 Run an RMG job as a subprocess under the rmg_env.
@@ -286,6 +311,7 @@ def rmg_runner(rmg_input_file_path: str,
286311 t3_project_name (str, optional): The T3 project name, used for setting a job name on the server for the RMG run.
287312 rmg_execution_type (str, optional): The RMG execution type (incore or local). Also set via settings.py.
288313 restart_rmg (bool, optional): Whether to restart RMG from seed.
314+ walltime (str, optional): Max walltime in 'DD:HH:MM:SS' format. Defaults to 6 hours.
289315
290316 Returns:
291317 bool: Whether an exception was raised.
@@ -299,6 +325,7 @@ def rmg_runner(rmg_input_file_path: str,
299325 rmg_exception_encountered = run_rmg_incore (rmg_input_file_path = rmg_input_file_path ,
300326 verbose = verbose ,
301327 max_iterations = max_iterations ,
328+ walltime = walltime ,
302329 )
303330 return rmg_exception_encountered
304331 elif rmg_execution_type == 'local' :
0 commit comments