|
| 1 | +mktempdir() do tmpdir |
| 2 | + fake_bindir = joinpath(tmpdir, "bin") |
| 3 | + fake_srun = joinpath(tmpdir, "bin", "srun") |
| 4 | + mkpath(fake_bindir) |
| 5 | + open(fake_srun, "w") do io |
| 6 | + println(io, "#!/usr/bin/env bash") |
| 7 | + println(io, "set -euf -o pipefail") |
| 8 | + # println(io, "set -x") |
| 9 | + |
| 10 | + # we only print this to stderr; don't print to stdout, or we won't hit the desired error path |
| 11 | + # (we'll hit a different error path instead, not the one we want to test) |
| 12 | + println(io, "echo [stderr] fake-srun: sleeping for 15 seconds... >&2") |
| 13 | + |
| 14 | + # Bash sleep for 15-seconds: |
| 15 | + println(io, "sleep 15") |
| 16 | + |
| 17 | + println(io, "echo [stdout] fake-srun: INTENTIONALLY ERROR-ING") |
| 18 | + println(io, "echo [stderr] fake-srun: INTENTIONALLY ERROR-ING >&2") |
| 19 | + println(io, "exit 1") |
| 20 | + end |
| 21 | + chmod(fake_srun, 0o700) # chmod +x |
| 22 | + directory_separator = Sys.iswindows() ? ';' : ':' |
| 23 | + new_env = Dict{String, String}() |
| 24 | + new_env["SLURM_NTASKS"] = "8" |
| 25 | + new_env["SLURM_JOB_ID"] = "1234" |
| 26 | + if haskey(ENV, "PATH") |
| 27 | + old_path = ENV["PATH"] |
| 28 | + new_env["PATH"] = fake_bindir * directory_separator * old_path |
| 29 | + else |
| 30 | + new_env["PATH"] = fake_bindir |
| 31 | + end |
| 32 | + |
| 33 | + @info "with old PATH" Sys.which("srun") |
| 34 | + withenv(new_env...) do |
| 35 | + @info "with new PATH" Sys.which("srun") |
| 36 | + |
| 37 | + if Base.VERSION >= v"1.2-" |
| 38 | + expected_outer_ex_T = TaskFailedException |
| 39 | + expected_inner_ex_INSTANCE = ErrorException("launch_timeout exceeded") |
| 40 | + else |
| 41 | + expected_outer_ex_T = ErrorException |
| 42 | + expected_inner_ex_INSTANCE = ErrorException("launch_timeout exceeded") |
| 43 | + end |
| 44 | + |
| 45 | + mgr = SlurmClusterManager.SlurmManager(; launch_timeout = 2.0) |
| 46 | + test_result = @test_throws expected_outer_ex_T Distributed.addprocs(mgr) |
| 47 | + |
| 48 | + cfg = ConfigForTestingTaskFailedException(; |
| 49 | + expected_outer_ex_T=expected_outer_ex_T, |
| 50 | + expected_inner_ex_INSTANCE=expected_inner_ex_INSTANCE, |
| 51 | + ) |
| 52 | + test_task_failed_exception(test_result, cfg) |
| 53 | + end |
| 54 | + end |
0 commit comments