55default :
66 @ just --list
77
8+ # Resolve SLURM time limit for a partition (fallback to sensible defaults).
9+ _ slurm-time-limit partition :
10+ #!/usr/bin/env bash
11+ set -euo pipefail
12+ time_limit=$(sinfo -p {{partition}} -h -o " %l" | head -1)
13+ if [ -z " $time_limit" ] || [ " $time_limit" = " infinite" ]; then
14+ case " {{ partition}} " in
15+ short|interactive)
16+ time_limit=" 12:00:00"
17+ ;;
18+ medium)
19+ time_limit=" 2-00:00:00"
20+ ;;
21+ long)
22+ time_limit=" 5-00:00:00"
23+ ;;
24+ *)
25+ time_limit=" 7-00:00:00"
26+ ;;
27+ esac
28+ fi
29+ echo " $time_limit"
30+
831# ============================================================================
932# Setup & Data
1033# ============================================================================
@@ -89,31 +112,17 @@ tensorboard-run experiment timestamp port='6006':
89112# just slurm short 8 4 "python scripts/main.py --config tutorials/lucchi.yaml"
90113# just slurm short 8 4 "just train lucchi++" "" "64G" # override memory
91114# Time limits: short=12h, medium=2d, long=5d
115+ # CPU-only convenience wrapper for single-task jobs.
116+ # just slurm-cpu short 8 0 "python scripts/downsample_nisb.py --splits train"
92117slurm partition num_cpu num_gpu cmd constraint = ' ' mem = ' 32G':
93118 #!/usr/bin/env bash
94119 constraint_flag=" "
95120 if [ -n " {{ constraint}} " ]; then
96121 constraint_flag=" --constraint={{ constraint}} "
97122 fi
98123
99- # Set time limit to partition maximum
100- time_limit=$(sinfo -p {{partition}} -h -o " %l" | head -1)
101- if [ -z " $time_limit" ] || [ " $time_limit" = " infinite" ]; then
102- case " {{ partition}} " in
103- short|interactive)
104- time_limit=" 12:00:00"
105- ;;
106- medium)
107- time_limit=" 2-00:00:00"
108- ;;
109- long)
110- time_limit=" 5-00:00:00"
111- ;;
112- *)
113- time_limit=" 7-00:00:00"
114- ;;
115- esac
116- fi
124+ # Resolve partition time limit (with fallback defaults)
125+ time_limit=$(just _slurm-time-limit {{partition}})
117126
118127 # Run the command exactly as provided (no auto "just" wrapping).
119128 sbatch --job-name=" pytc_{{ cmd}} " \
@@ -129,9 +138,57 @@ slurm partition num_cpu num_gpu cmd constraint='' mem='32G':
129138 $constraint_flag \
130139 - -wrap=" mkdir -p \$ HOME/.just && export JUST_TEMPDIR=\$ HOME/.just TMPDIR=\$ HOME/.just NCCL_SOCKET_FAMILY=AF_INET && source /projects/weilab/weidf/lib/miniconda3/bin/activate pytc && cd $PWD && srun --ntasks=1 --gpus-per-task={{ num_gpu}} --cpus-per-task={{ num_cpu}} {{ cmd}} "
131140
132- # Alias for slurm (kept for backward compatibility)
133- slurm-sh partition num_cpu num_gpu cmd constraint = ' ' mem = ' 32G':
134- just slurm {{ partition}} {{ num_cpu}} {{ num_gpu}} {{ cmd}} {{ constraint}} {{ mem}}
141+ # Generic CPU-only multi-task launcher (single node, no GPU).
142+ # Example:
143+ # just slurm-cpu-parallel short 7 1 "python scripts/downsample_nisb.py --task \$SLURM_PROCID"
144+ slurm-cpu-parallel partition num_tasks = ' 7' cpu_per_task = ' 4' cmd = ' ' constraint = ' ' mem = ' 64G':
145+ #!/usr/bin/env bash
146+ set -euo pipefail
147+ mkdir -p slurm_outputs
148+ cmd_value=' {{ cmd}} '
149+ if [ -z " $cmd_value" ]; then
150+ echo " Error: cmd must be provided. Usage:"
151+ echo " just slurm-cpu-parallel <partition> <num_tasks> <cpu_per_task> \" <command>\" [constraint] [mem]"
152+ exit 2
153+ fi
154+
155+ constraint_value=' {{ constraint}} '
156+ constraint_flag=" "
157+ if [ -n " $constraint_value" ]; then
158+ constraint_flag=" --constraint=$constraint_value"
159+ fi
160+
161+ # Resolve partition time limit (with fallback defaults)
162+ time_limit=$(just _slurm-time-limit {{partition}})
163+
164+ sbatch --job-name=" pytc_cpu_{{ num_tasks}} t" \
165+ - -partition={{ partition}} \
166+ - -output=slurm_outputs/ slurm-%j.out \
167+ - -error =slurm_outputs/ slurm-%j.err \
168+ - -nodes=1 \
169+ - -ntasks={{ num_tasks}} \
170+ - -gpus-per-task=0 \
171+ - -cpus-per-task={{ cpu_per_task}} \
172+ - -mem={{ mem}} \
173+ - -time=$time_limit \
174+ $constraint_flag \
175+ - -wrap=" mkdir -p \$ HOME/.just && export JUST_TEMPDIR=\$ HOME/.just TMPDIR=\$ HOME/.just && source /projects/weilab/weidf/lib/miniconda3/bin/activate pytc && cd $PWD && srun --ntasks={{ num_tasks}} --gpus-per-task=0 --cpus-per-task={{ cpu_per_task}} bash -c '$cmd_value'"
176+
177+ # Generic CPU-only multi-task launcher for sharded scripts.
178+ # Automatically appends:
179+ # --num-shards $SLURM_NTASKS --shard-index $SLURM_PROCID
180+ # Example:
181+ # just slurm-cpu-sharded short 7 1 "python scripts/downsample_nisb.py"
182+ slurm-cpu-sharded partition num_tasks = ' 7' cpu_per_task = ' 4' cmd = ' ' constraint = ' ' mem = ' 64G':
183+ #!/usr/bin/env bash
184+ set -euo pipefail
185+ cmd_value=' {{ cmd}} '
186+ if [ -z " $cmd_value" ]; then
187+ echo " Error: cmd must be provided. Usage:"
188+ echo " just slurm-cpu-sharded <partition> <num_tasks> <cpu_per_task> \" <command>\" [constraint] [mem]"
189+ exit 2
190+ fi
191+ just slurm-cpu-parallel {{ partition}} {{ num_tasks}} {{ cpu_per_task}} " {{ cmd}} --num-shards \$ SLURM_NTASKS --shard-index \$ SLURM_PROCID" " {{ constraint}} " " {{ mem}} "
135192
136193# Launch parameter sweep from config (e.g., just sweep tutorials/sweep_example.yaml)
137194sweep config :
0 commit comments