Skip to content

Commit 91f3932

Browse files
authored
Merge pull request #1625 from virtualcell/dan-ss-results3
Using the default singularity on batch run node
2 parents d125203 + fd9d4e7 commit 91f3932

4 files changed

Lines changed: 24 additions & 39 deletions

File tree

docker/build/Dockerfile-submit-dev

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,9 @@ ENV softwareVersion=SOFTWARE-VERSION-NOT-SET \
8787
slurm_singularity_cachedir="slurm_singularity_cachedir-not-set" \
8888
slurm_singularity_pullfolder="slurm_singularity_pullfolder-not-set" \
8989
slurm_singularity_module_name="slurm_singularity_module_name-not-set" \
90+
slurm_langevin_timeoutPerTaskSeconds="slurm_langevin_timeoutPerTaskSeconds-not-set" \
91+
slurm_langevin_batchMemoryLimitPerTaskMB="slurm_langevin_batchMemoryLimitPerTaskMB-not-set" \
92+
slurm_langevin_memoryBlockSizeMB="slurm_langevin_memoryBlockSizeMB-not-set" \
9093
jmsblob_minsize=100000 \
9194
vcell_ssh_cmd_cmdtimeout="cmdSrvcSshCmdTimeoutMS-not-set" \
9295
vcell_ssh_cmd_restoretimeout="cmdSrvcSshCmdRestoreTimeoutFactor-not-set" \
@@ -143,6 +146,9 @@ ENTRYPOINT java \
143146
-Dvcell.slurm.singularity.cachedir="${slurm_singularity_cachedir}" \
144147
-Dvcell.slurm.singularity.pullfolder="${slurm_singularity_pullfolder}" \
145148
-Dvcell.slurm.singularity.module.name="${slurm_singularity_module_name}" \
149+
-Dvcell.slurm.langevin.timeoutPerTaskSeconds="${slurm_langevin_timeoutPerTaskSeconds}" \
150+
-Dvcell.slurm.langevin.batchMemoryLimitPerTaskMB="${slurm_langevin_batchMemoryLimitPerTaskMB}" \
151+
-Dvcell.slurm.langevin.memoryBlockSizeMB="${slurm_langevin_memoryBlockSizeMB}" \
146152
-Dvcell.htc.vcellfvsolver.docker.name="${htc_vcellfvsolver_docker_name}" \
147153
-Dvcell.htc.vcellfvsolver.solver.list="${htc_vcellfvsolver_solver_list}" \
148154
-Dvcell.htc.vcellsolvers.docker.name="${htc_vcellsolvers_docker_name}" \

vcell-core/src/main/java/cbit/vcell/resource/PropertyLoader.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,12 @@ public static void setConfigProvider(VCellConfigProvider configProvider) {
115115
public static final String MPI_HOME_EXTERNAL = record("vcell.htc.mpi.home",ValueType.GEN);
116116
public static final String nativeSolverDir_External = record("vcell.nativesolverdir.external",ValueType.GEN);
117117

118+
// specific to langevin solver when running in batch mode
119+
public static final String slurm_langevin_timeoutPerTaskSeconds = record("vcell.slurm.langevin.timeoutPerTaskSeconds",ValueType.GEN);
120+
public static final String slurm_langevin_batchMemoryLimitPerTaskMB = record("vcell.slurm.langevin.batchMemoryLimitPerTaskMB",ValueType.GEN);
121+
public static final String slurm_langevin_memoryBlockSizeMB = record("vcell.slurm.langevin.memoryBlockSizeMB",ValueType.GEN);
122+
123+
118124
// public static final String finiteVolumeExecutableProperty = record("vcell.finitevolume.executable",ValueType.EXE);
119125
//
120126
// //

vcell-server/src/main/java/cbit/vcell/message/server/htc/slurm/SlurmProxy.java

Lines changed: 9 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -523,38 +523,23 @@ private void writeSingularitySetup(LineStringBuilder lsb) {
523523
String singularityPullfolder = PropertyLoader.getRequiredProperty(PropertyLoader.slurm_singularity_pullfolder);
524524
String singularityModuleName = PropertyLoader.getRequiredProperty(PropertyLoader.slurm_singularity_module_name);
525525

526-
lsb.write("echo \"=== Singularity check BEFORE module load ===\"");
526+
lsb.write("echo \"=== Singularity check ===\"");
527527
lsb.write("if command -v singularity >/dev/null 2>&1; then");
528528
lsb.write(" echo \"Singularity found at: $(command -v singularity)\"");
529529
lsb.write(" singularity --version");
530530
lsb.write("else");
531-
lsb.write(" echo \"Singularity not found before module load\"");
531+
lsb.write(" echo \"Singularity not found\"");
532+
lsb.write(" exit 127");
532533
lsb.write("fi");
533534
lsb.write("");
534535

535536
lsb.write("TMPDIR=" + slurmTmpDir);
536537
lsb.write("if [ ! -e $TMPDIR ]; then mkdir -p $TMPDIR ; fi");
537538
lsb.write("echo `hostname`");
538539
lsb.write("export MODULEPATH=/isg/shared/modulefiles:/tgcapps/modulefiles");
539-
lsb.write("if [ -f /usr/share/modules/init/bash ]; then");
540-
lsb.write(" source /usr/share/modules/init/bash");
541-
lsb.write(" module load " + singularityModuleName);
542-
lsb.write("else");
543-
lsb.write(" echo \"[Warning] Module init script not found - skipping module setup\"");
544-
lsb.write("fi");
545540
lsb.write("export SINGULARITY_CACHEDIR=" + singularityCachedir);
546541
lsb.write("export SINGULARITY_PULLFOLDER=" + singularityPullfolder);
547542
lsb.write("");
548-
549-
lsb.write("echo \"=== Singularity check AFTER module load ===\"");
550-
lsb.write("if command -v singularity >/dev/null 2>&1; then");
551-
lsb.write(" echo \"Singularity found at: $(command -v singularity)\"");
552-
lsb.write(" singularity --version");
553-
lsb.write("else");
554-
lsb.write(" echo \"Singularity not found after module load\"");
555-
lsb.write(" exit 127");
556-
lsb.write("fi");
557-
lsb.write("");
558543
}
559544
private void writeSlurmJobMetadata(LineStringBuilder lsb) {
560545
lsb.write("# Compute memory per task and per job");
@@ -661,10 +646,12 @@ String generateLangevinBatchScript(String jobName, ExecutableCommand.Container
661646
SolverDescription solverDescription = std.getSolverDescription();
662647
MemLimitResults memoryMBAllowed = HtcProxy.getMemoryLimit(vcellUserid, simID, solverDescription, memSizeMB, simTask.isPowerUser());
663648

664-
// TODO: do we hardcode these? Should it be part of LangevinSimulationOptions? Or, even better, properties?
665-
int timeoutPerTaskSeconds = 86400; // seconds. 24 hours
666-
long hardbBtchMemoryLimitPerTask = 1024; // MB. we hard limit mem to 1G for langevin batch jobs
667-
int blockSizeMB = 256; // MB. SLURM memory allocation granularity
649+
String sTimeoutPerTaskSeconds = PropertyLoader.getProperty(PropertyLoader.slurm_langevin_timeoutPerTaskSeconds, "86400");
650+
String sHardbBtchMemoryLimitPerTask = PropertyLoader.getProperty(PropertyLoader.slurm_langevin_batchMemoryLimitPerTaskMB, "1024");
651+
String sBlockSizeMB = PropertyLoader.getProperty(PropertyLoader.slurm_langevin_memoryBlockSizeMB, "256");
652+
int timeoutPerTaskSeconds = Integer.parseInt(sTimeoutPerTaskSeconds); // seconds. 24 hours
653+
long hardbBtchMemoryLimitPerTask = Long.parseLong(sHardbBtchMemoryLimitPerTask); // MB. we hard limit mem to 2G for langevin batch jobs
654+
int blockSizeMB = Integer.parseInt(sBlockSizeMB); // MB. SLURM memory allocation granularity
668655
String slurmJobTimeout = computeSlurmTimeLimit(totalNumberOfJobs, numberOfConcurrentTasks, timeoutPerTaskSeconds);
669656
long batchMemoryLimitPerTask = memoryMBAllowed.getMemLimit();
670657
batchMemoryLimitPerTask = Math.min(batchMemoryLimitPerTask, hardbBtchMemoryLimitPerTask);

vcell-server/src/test/resources/slurm_fixtures/langevin/V_TEST2_999999999_0_0.slurm.sub

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -36,36 +36,22 @@ rm -f /share/apps/vcell3/users/${USERID}/SimID_${SIM_ID}_0_.functions
3636
rm -f /share/apps/vcell3/users/${USERID}/SimID_${SIM_ID}_0_.langevinInput
3737
rm -f /share/apps/vcell3/users/${USERID}/SimID_${SIM_ID}_0_.langevinMessagingConfig
3838

39-
echo "=== Singularity check BEFORE module load ==="
39+
echo "=== Singularity check ==="
4040
if command -v singularity >/dev/null 2>&1; then
4141
echo "Singularity found at: $(command -v singularity)"
4242
singularity --version
4343
else
44-
echo "Singularity not found before module load"
44+
echo "Singularity not found"
45+
exit 127
4546
fi
4647

4748
TMPDIR=/scratch/vcell
4849
if [ ! -e $TMPDIR ]; then mkdir -p $TMPDIR ; fi
4950
echo `hostname`
5051
export MODULEPATH=/isg/shared/modulefiles:/tgcapps/modulefiles
51-
if [ -f /usr/share/modules/init/bash ]; then
52-
source /usr/share/modules/init/bash
53-
module load singularity/vcell-3.10.0
54-
else
55-
echo "[Warning] Module init script not found - skipping module setup"
56-
fi
5752
export SINGULARITY_CACHEDIR=/share/apps/vcell3/singularity/cachdir
5853
export SINGULARITY_PULLFOLDER=/share/apps/vcell3/singularity/pullfolder
5954

60-
echo "=== Singularity check AFTER module load ==="
61-
if command -v singularity >/dev/null 2>&1; then
62-
echo "Singularity found at: $(command -v singularity)"
63-
singularity --version
64-
else
65-
echo "Singularity not found after module load"
66-
exit 127
67-
fi
68-
6955
# Compute memory per task and per job
7056
MEM_TASK=$(( SLURM_MEM_PER_CPU * SLURM_CPUS_PER_TASK ))
7157
MEM_JOB=$(( MEM_TASK * SLURM_NTASKS ))

0 commit comments

Comments
 (0)