Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 38 additions & 19 deletions create_lmodsitepackage.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,6 @@
else
cudaDriverDir = eessi_eprefix .. "/lib/nvidia"
end
local cudaVersionFile = cudaDriverDir .. "/cuda_version.txt"
local cudaDriverFile = cudaDriverDir .. "/libcuda.so"
local cudaDriverExists = isFile(cudaDriverFile)
local singularityCudaExists = isFile("/.singularity.d/libs/libcuda.so")
Expand All @@ -189,28 +188,48 @@
else
-- CUDA driver exists, now we check its version to see if an update is needed
if cudaDriverExists then
local cudaVersion = read_file(cudaVersionFile)
if not cudaVersion then
LmodError("No CUDA version file\\n" .. cudaVersionFile .. "\\nfound. " .. refer_to_docs)
local cudaVersion = os.getenv("EESSI_CUDA_DRIVER_VERSION")
if not cudaVersion or cudaVersion == "" then
local eessi_prefix = os.getenv("EESSI_PREFIX")
local script = pathJoin(eessi_prefix, 'scripts', 'gpu_support', 'nvidia', 'get_cuda_driver_version.sh')
-- Check return code first. We don't want source_sh to raise an LmodError, we just print
-- an LmodWarning stating we couldn't do a proper version compatibility check
local rc = os.execute("bash -c 'source " .. script .. "'")
if rc == 0 then
Comment thread
bedroge marked this conversation as resolved.
Outdated
source_sh("bash", script)
end
end
cudaVersion = os.getenv("EESSI_CUDA_DRIVER_VERSION")
local cudaVersion_req = os.getenv("EESSICUDAVERSION")
-- driver CUDA versions don't give a patch version for CUDA
local major, minor = string.match(cudaVersion, "(%d+)%.(%d+)")
local major_req, minor_req, patch_req = string.match(cudaVersion_req, "(%d+)%.(%d+)%.(%d+)")
local driver_libs_need_update = false
if major < major_req then
driver_libs_need_update = true
elseif major == major_req then
if minor < minor_req then
if not cudaVersion or cudaVersion == "" then
local suppress_var = "EESSI_CUDA_DRIVER_VERSION_SUPPRESS_WARNING"
local warn = "Environment variable EESSI_CUDA_DRIVER_VERSION not found. "
warn = warn .. "Cannot ensure that driver version is new enough for CUDA toolkit version: '"
warn = warn .. cudaVersion_req .. "'. This module will still be loaded, but may not function "
warn = warn .. "as expected. Export " .. suppress_var .. "=1"
local suppress_warn = os.getenv(suppress_var)
if not suppress_warn or suppress_warn == 1 then
LmodWarning(warn)
end
else
-- driver CUDA versions don't give a patch version for CUDA
local major, minor = string.match(cudaVersion, "(%d+)%.(%d+)")
local major_req, minor_req, patch_req = string.match(cudaVersion_req, "(%d+)%.(%d+)%.(%d+)")
local driver_libs_need_update = false
if tonumber(major) < tonumber(major_req) then
driver_libs_need_update = true
elseif tonumber(major) == tonumber(major_req) then
if tonumber(minor) < tonumber(minor_req) then
driver_libs_need_update = true
end
end
if driver_libs_need_update == true then
local advice = "but the module you want to load requires CUDA " .. cudaVersion_req .. ". "
advice = advice .. "Please update your CUDA driver libraries and then "
advice = advice .. "let EESSI know about the update.\\n"
advice = advice .. refer_to_docs
LmodError("\\nYour driver CUDA version is ", cudaVersion, " ", advice)
end
end
if driver_libs_need_update == true then
local advice = "but the module you want to load requires CUDA " .. cudaVersion_req .. ". "
advice = advice .. "Please update your CUDA driver libraries and then "
advice = advice .. "let EESSI know about the update.\\n"
advice = advice .. refer_to_docs
LmodError("\\nYour driver CUDA version is ", cudaVersion, " ", advice)
end
end
end
Expand Down
4 changes: 4 additions & 0 deletions scripts/gpu_support/nvidia/get_cuda_driver_version.sh
Comment thread
bedroge marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# This can be leveraged by the source_sh() feature of Lmod
set -o pipefail
EESSI_CUDA_DRIVER_VERSION=$(nvidia-smi --query | grep -oP 'CUDA Version\s*:\s*\K[0-9.]+') || return $?
export EESSI_CUDA_DRIVER_VERSION
Loading