Skip to content

Commit 1086a1e

Browse files
committed
fix(wms): JobAgent endlessly looping in checkSubmittedJobs
1 parent 616492d commit 1086a1e

2 files changed

Lines changed: 32 additions & 1 deletion

File tree

src/DIRAC/WorkloadManagementSystem/Agent/JobAgent.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -706,7 +706,13 @@ def _checkSubmittedJobs(self):
706706
# Here we iterate over a copy of the keys because we are modifying the dictionary within the loop
707707
for jobID in list(self.jobs.keys()):
708708
taskID = self.jobs[jobID].get("TaskID")
709-
if taskID is None or taskID not in self.computingElement.taskResults:
709+
if taskID is None:
710+
# This generally means that there was an error before the submission
711+
# and the TaskID was not set and will never be.
712+
self.log.info("No taskID found for job", jobID)
713+
del self.jobs[jobID]
714+
continue
715+
if taskID not in self.computingElement.taskResults:
710716
continue
711717

712718
result = self.computingElement.taskResults[taskID]

src/DIRAC/WorkloadManagementSystem/Agent/test/Test_Agent_JobAgent.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -754,3 +754,28 @@ def test_submitAndCheck2Jobs(mocker):
754754

755755
# From here, taskResults should be empty
756756
assert len(jobAgent.computingElement.taskResults) == 0
757+
758+
759+
def test_failureBeforeSubmission(mocker):
760+
"""Test that a failure before job submission is handled correctly.
761+
762+
We want to make sure that there is no endless loop in the finalize method.
763+
"""
764+
# Mock the JobAgent
765+
mocker.patch("DIRAC.WorkloadManagementSystem.Agent.JobAgent.AgentModule.__init__")
766+
mocker.patch("DIRAC.Core.Security.X509Chain.X509Chain.dumpAllToString", return_value=S_OK())
767+
768+
jobAgent = JobAgent("JobAgent", "Test")
769+
jobAgent.log = gLogger.getSubLogger("JobAgent")
770+
771+
# Here we simulate a failure before the job submission: no TaskID
772+
jobID = "123"
773+
jobAgent.jobs[jobID] = {}
774+
jobAgent.jobs[jobID]["JobReport"] = JobReport(jobID)
775+
776+
# Make sure that the job is removed from jobAgent.jobs
777+
result = jobAgent._checkSubmittedJobs()
778+
assert result["OK"]
779+
assert result["Value"] == ([], [])
780+
781+
assert jobAgent.jobs == {}

0 commit comments

Comments
 (0)