From 0f2370b18c54848859cf07dee4164cfd747dcb05 Mon Sep 17 00:00:00 2001 From: Adam Rajfer Date: Fri, 17 Apr 2026 09:16:41 +0200 Subject: [PATCH] fix(slurm): request zero GPUs when export falls back to GPU partition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #901 introduced `export_partition = cpu_partition or cfg.execution.partition`. When cpu_partition is null (existing configs), export falls back to execution.partition — typically a GPU partition like 'batch' — with no --gpus-per-node in the sbatch header, and schedulers that enforce GPU specification on non-CPU partitions reject the submission: sbatch: error: Cannot find GPU specification, you may not submit a job not requesting GPUs in a non-CPU partition, partition: batch The existing `--gpus 0` on the inner srun only applies to the step, not to the allocation. Declare `--gpus-per-node=0` at the sbatch level when falling back so the scheduler accepts an allocation with no GPUs. Signed-off-by: Adam Rajfer --- .../src/nemo_evaluator_launcher/executors/slurm/executor.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/executors/slurm/executor.py b/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/executors/slurm/executor.py index 0bc6e1365..812774059 100644 --- a/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/executors/slurm/executor.py +++ b/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/executors/slurm/executor.py @@ -1181,6 +1181,8 @@ def _generate_auto_export_section( export_sbatch += "#SBATCH --time=00:30:00\n" export_sbatch += f"#SBATCH --account {cfg.execution.account}\n" export_sbatch += f"#SBATCH --partition {export_partition}\n" + if not cpu_partition: + export_sbatch += "#SBATCH --gpus-per-node=0\n" export_sbatch += "#SBATCH --no-requeue\n" export_sbatch += ( f"#SBATCH --output {remote_task_subdir / 'logs' / 'export-%A.log'}\n"