Skip to content

Commit e010c2d

Browse files
author
Caspar van Leeuwen
committed
Initial attempt at dynamically determining CUDA version
1 parent f453fe9 commit e010c2d

2 files changed

Lines changed: 22 additions & 7 deletions

File tree

create_lmodsitepackage.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,6 @@
172172
else
173173
cudaDriverDir = eessi_eprefix .. "/lib/nvidia"
174174
end
175-
local cudaVersionFile = cudaDriverDir .. "/cuda_version.txt"
176175
local cudaDriverFile = cudaDriverDir .. "/libcuda.so"
177176
local cudaDriverExists = isFile(cudaDriverFile)
178177
local singularityCudaExists = isFile("/.singularity.d/libs/libcuda.so")
@@ -189,19 +188,31 @@
189188
else
190189
-- CUDA driver exists, now we check its version to see if an update is needed
191190
if cudaDriverExists then
192-
local cudaVersion = read_file(cudaVersionFile)
193-
if not cudaVersion then
194-
LmodError("No CUDA version file\\n" .. cudaVersionFile .. "\\nfound. " .. refer_to_docs)
191+
LmodMessage("EESSI_CUDA_DRIVER_VERSION initial: " .. os.getenv("EESSI_CUDA_DRIVER_VERSION"))
192+
local cudaVersion = os.getenv("EESSI_CUDA_DRIVER_VERSION")
193+
if not cudaVersion or cudaVersion == "" then
194+
-- Hardcode for local testing
195+
-- local eessi_prefix = os.getenv("EESSI_PREFIX")
196+
local eessi_prefix = pathJoin('/home', 'casparl', 'EESSI', 'software-layer-scripts')
197+
local script = pathJoin(eessi_prefix, 'scripts', 'gpu_support', 'nvidia', 'get_cuda_driver_version.sh')
198+
LmodMessage("Getting version")
199+
source_sh("bash", script)
200+
end
201+
cudaVersion = os.getenv("EESSI_CUDA_DRIVER_VERSION")
202+
LmodMessage("CUDA VERSION" .. cudaVersion)
203+
if not cudaVersion or cudaVersion == "" then
204+
-- Change to warning?
205+
LmodError("Environment variable EESSI_CUDA_DRIVER_VERSION not found. " .. refer_to_docs)
195206
end
196207
local cudaVersion_req = os.getenv("EESSICUDAVERSION")
197208
-- driver CUDA versions don't give a patch version for CUDA
198209
local major, minor = string.match(cudaVersion, "(%d+)%.(%d+)")
199210
local major_req, minor_req, patch_req = string.match(cudaVersion_req, "(%d+)%.(%d+)%.(%d+)")
200211
local driver_libs_need_update = false
201-
if major < major_req then
212+
if tonumber(major) < tonumber(major_req) then
202213
driver_libs_need_update = true
203-
elseif major == major_req then
204-
if minor < minor_req then
214+
elseif tonumber(major) == tonumber(major_req) then
215+
if tonumber(minor) < tonumber(minor_req) then
205216
driver_libs_need_update = true
206217
end
207218
end
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# This can be leveraged by the source_sh() feature of Lmod
2+
set -o pipefail
3+
EESSI_CUDA_DRIVER_VERSION=$(nvidia-smi | grep -oP 'CUDA Version:\s*\K[0-9]+\.[0-9]+') || return $?
4+
export EESSI_CUDA_DRIVER_VERSION

0 commit comments

Comments
 (0)