Skip to content

Commit cfebdfb

Browse files
authored
Updated ARC PBS to recognise Zeus submission error (#815)
Due to the changes in Zeus, unless the user is submitting arc via n170 host, they will not be informed there is a job submission error occurring and ARC will continuously say it cannot find the output files (even though it technically was never able to submit the ESS jobs). This PR will therefore error ARC if it receives such a message about job submission issues.
2 parents 43d59cf + 2fc8211 commit cfebdfb

2 files changed

Lines changed: 16 additions & 0 deletions

File tree

arc/job/local.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,11 @@ def submit_job(path: str,
267267
)
268268
elif cluster_soft.lower() == 'pbs' and any('qsub: Illegal attribute or resource value' in err_line for err_line in stderr):
269269
raise ValueError(f'Got the following error when trying to submit job:\n{stderr}. Please check your submit script')
270+
elif cluster_soft.lower() == 'pbs' and (
271+
any('Please do NOT submit jobs on compute nodes' in err_line for err_line in stderr)
272+
or any('Jobs should be submitted on login server' in err_line for err_line in stderr)
273+
):
274+
raise ValueError('PBS job submission attempted from a compute node. Submit jobs from the login server.')
270275
if not len(stdout) or recursion:
271276
return None, None
272277
if len(stderr) > 0 or len(stdout) == 0:

arc/job/local_test.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import os
1010
import shutil
1111
import unittest
12+
from unittest.mock import patch
1213

1314
import arc.job.local as local
1415
from arc.common import ARC_PATH
@@ -96,6 +97,16 @@ def test_parse_running_jobs_ids(self):
9697
running_job_ids = local.parse_running_jobs_ids(stdout, cluster_soft='htcondor')
9798
self.assertEqual(running_job_ids, ['11224', '11225', '11226', '11227', '11228', '11229', '11230', '11231'])
9899

100+
def test_submit_job_pbs_compute_node_error(self):
101+
"""Test submit_job() error handling for PBS compute node submissions."""
102+
stderr = ['qsub: Unauthorized Request: Please do NOT submit jobs on compute nodes!',
103+
'Jobs should be submitted on login server, i.e. ZEUS.']
104+
with patch('arc.job.local.execute_command', side_effect=[([], stderr), ([], stderr)]):
105+
with patch('time.sleep', return_value=None):
106+
with self.assertRaises(ValueError) as cm:
107+
local.submit_job(path='.', cluster_soft='pbs', submit_cmd='qsub', submit_filename='submit.sh')
108+
self.assertIn('compute node', str(cm.exception))
109+
99110

100111
if __name__ == '__main__':
101112
unittest.main(testRunner=unittest.TextTestRunner(verbosity=2))

0 commit comments

Comments
 (0)