Skip to content

Commit 08e0cbc

Browse files
author
Caspar van Leeuwen
committed
Improve message
1 parent 05a69e4 commit 08e0cbc

1 file changed

Lines changed: 7 additions & 2 deletions

File tree

create_lmodsitepackage.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -192,8 +192,13 @@
192192
if not cudaVersion or cudaVersion == "" then
193193
local eessi_prefix = os.getenv("EESSI_PREFIX")
194194
local script = pathJoin(eessi_prefix, 'scripts', 'gpu_support', 'nvidia', 'get_cuda_driver_version.sh')
195-
-- Check return code first. We don't want source_sh to raise an LmodError, we just print
196-
-- an LmodWarning stating we couldn't do a proper version compatibility check
195+
-- We cannot immedately use source_sh, since lmod has no way of catching a potential error
196+
-- and we don't want this to raise an LmodError just because nvidia-smi doesn't exist or
197+
-- doesn't print the right output (happens on a node with nvidia-smi but no driver installed).
198+
-- The only way to catch this is to source the script first with os.execute and make sure it
199+
-- returns with a zero exit code. Unfortunately, this means we have to run nvidia-smi twice, which
200+
-- is a bit slow. Since the result is then cached in the EESSI_CUDA_DRIVER_VERSION environment
201+
-- variable, this is probably acceptable
197202
local r1, r2, r3 = os.execute("bash -c 'source " .. script .. "'")
198203
local exit_code = 0
199204
if type(r1) == "number" then

0 commit comments

Comments
 (0)