From f1ca8c7d49a4e11143018fb9e59254975db7e722 Mon Sep 17 00:00:00 2001 From: Julian Squires Date: Mon, 23 Mar 2026 12:07:30 -0230 Subject: [PATCH 1/2] Handle flag cases of slurm job state We had a user report a 500 error, caused by indexing off the end of the status list in this code. When I took a look at the database, I saw a job state value of 1024, which prompted me to check slurm's source for how state values are handled. See the following files in slurm's source: src/common/slurm_protocol_defs.c slurm/slurm.h This code follows the inconsistent order in which slurm's own code handles these flags, all of which seem to preempt the JOB_STATE_BASE values. Note that there are some flags defined which aren't handled; presumbly the base states take priority in these cases, in terms of reporting a single string to summarize the state. --- slurm/models.py | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/slurm/models.py b/slurm/models.py index 2ff45d6..d0aff55 100644 --- a/slurm/models.py +++ b/slurm/models.py @@ -311,16 +311,38 @@ def timelimit_display(self): return '{:.1f}m'.format(self.timelimit) def status(self): - status = ['Pending', 'Running', 'Suspended', 'Complete', 'Cancelled', + flags = [('Completing', 1 << 15), ('Stage Out', 1 << 23), + ('Configuring', 1 << 14), ('Expediting', 1 << 24), + ('Resizing', 1 << 13), ('Requeue', 1 << 10), + ('Requeue Federation', 1 << 20), + ('Requeue Hold', 1 << 11), ('Special Exit', 1 << 12), + ('Stopped', 1 << 16), ('Revoked', 1 << 19), + ('Held', 1 << 21), ('Signaling', 1 << 22)] + states = ['Pending', 'Running', 'Suspended', 'Complete', 'Cancelled', 'Failed', 'Timeout', 'Node failed', 'Preempted', 'Boot failed', 'End', 'OOM'] - return status[self.state] + # process this the same way as slurm does, per + # src/common/slurm_protocol_defs.c:job_state_string() + for (s, bit) in flags: + if (self.state & bit) != 0: + return s + base = self.state & 0xff + if 0 <= base < len(states): + return states[base] + return 'Unknown' def status_badge(self): - status = ['info', 'primary', 'warning', 'success', 'danger', - 'danger', 'danger', 'danger', 'warning', 'danger', - 'sucess', 'danger'] - return '{}'.format(status[self.state]) + match self.status(): + case 'Pending': + return 'info' + case 'Running': + return 'primary' + case 'Complete' | 'End': + return 'success' + case ('Cancelled' | 'Failed' | 'Timeout' | 'Node failed' | 'Boot failed' | 'OOM'): + return 'danger' + case 'Suspended' | 'Preempted' | _: + return 'warning' def gpu_count(self): __gpu_count = 0 From b60183556f6e75c2b7a20608734c55755b589a28 Mon Sep 17 00:00:00 2001 From: Julian Squires Date: Fri, 24 Apr 2026 11:05:23 -0230 Subject: [PATCH 2/2] Provide job state display that handles flags It's not clear to me that there's a way to write this using `choices` that can express everything going on with slurm job state, so we provide get_state_display() ourselves. Keep StatesJob, however, as its constants are used elsewhere in the code (somewhat validly, though ideally we'd mask such tests by 255). --- slurm/models.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/slurm/models.py b/slurm/models.py index d0aff55..e34b5a4 100644 --- a/slurm/models.py +++ b/slurm/models.py @@ -199,6 +199,9 @@ class StatesJob(models.IntegerChoices): END = 10 OOM = 11 + def get_state_display(self): + return self.state() + job_db_inx = models.BigAutoField(primary_key=True) mod_time = models.PositiveBigIntegerField() deleted = models.IntegerField() @@ -233,7 +236,7 @@ class StatesJob(models.IntegerChoices): node_inx = models.TextField(blank=True, null=True) partition = models.TextField() priority = models.PositiveIntegerField() - state = models.PositiveIntegerField(choices=StatesJob.choices) + state = models.PositiveIntegerField() timelimit = models.PositiveIntegerField() time_submit = models.PositiveBigIntegerField() time_eligible = models.PositiveBigIntegerField()