Skip to content

Commit 8865385

Browse files
authored
Merge pull request EESSI#185 from ocaisa/correct_cuda_checks
Directly check the location of `libcuda.so` where the linker expects to find it
2 parents 1e810da + 687fcd4 commit 8865385

1 file changed

Lines changed: 21 additions & 7 deletions

File tree

create_lmodsitepackage.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@
124124
if packagesList[simpleName] then
125125
-- simpleName is a module in packagesList
126126
-- first, check the old host_injections path prior to https://github.com/EESSI/software-layer-scripts/pull/59
127-
-- If that exists, print a more targetted, explanatory warning
127+
-- If that exists, print a more targeted, explanatory warning
128128
local previousHostInjections = string.gsub(os.getenv('EESSI_SOFTWARE_PATH') or "", 'versions', 'host_injections')
129129
local previousPackageEasyBuildDir = previousHostInjections .. "/software/" .. t.modFullName .. "/easybuild"
130130
local previousPackageDirExists = isDir(previousPackageEasyBuildDir)
@@ -161,24 +161,38 @@
161161
local checkGpu = mt:haveProperty(simpleName,"arch","gpu")
162162
local overrideGpuCheck = os.getenv("EESSI_OVERRIDE_GPU_CHECK")
163163
if checkGpu and (overrideGpuCheck == nil) then
164-
local arch = os.getenv("EESSI_CPU_FAMILY") or ""
165-
local cvmfs_repo = os.getenv("EESSI_CVMFS_REPO") or ""
166-
local cudaVersionFile = cvmfs_repo .. "/host_injections/nvidia/" .. arch .. "/latest/cuda_version.txt"
167-
local cudaDriverFile = cvmfs_repo .. "/host_injections/nvidia/" .. arch .. "/latest/libcuda.so"
164+
local eessi_version = os.getenv('EESSI_VERSION') or ""
165+
local eessi_eprefix = os.getenv("EESSI_EPREFIX") or ""
166+
if eessi_eprefix == nil or eessi_version == nil then
167+
LmodError("EESSI_VERSION and EESSI_EPREFIX must be defined for GPU driver check to work\\n")
168+
end
169+
local cudaDriverDir = nil
170+
if eessi_version == "2023.06" then
171+
cudaDriverDir = string.gsub(eessi_eprefix, 'versions', 'host_injections') .. "/lib"
172+
else
173+
cudaDriverDir = eessi_eprefix .. "/lib/nvidia"
174+
end
175+
local cudaVersionFile = cudaDriverDir .. "/cuda_version.txt"
176+
local cudaDriverFile = cudaDriverDir .. "/libcuda.so"
168177
local cudaDriverExists = isFile(cudaDriverFile)
169178
local singularityCudaExists = isFile("/.singularity.d/libs/libcuda.so")
170179
if not (cudaDriverExists or singularityCudaExists) then
171180
local advice = "which relies on the CUDA runtime environment and driver libraries. "
172181
advice = advice .. "In order to be able to use the module, you will need "
173-
advice = advice .. "to make sure EESSI can find the GPU driver libraries on your host system. You can "
174-
advice = advice .. "override this check by setting the environment variable EESSI_OVERRIDE_GPU_CHECK but "
182+
advice = advice .. "to make sure EESSI can find the GPU driver libraries on your host system. "
183+
advice = advice .. "The file being checked for on your system is \\n" .. cudaDriverFile .. "\\n"
184+
advice = advice .. "You can override this check by setting the environment variable "
185+
advice = advice .. "EESSI_OVERRIDE_GPU_CHECK but "
175186
advice = advice .. "the loaded application will not be able to execute on your system.\\n"
176187
advice = advice .. refer_to_docs
177188
LmodError("\\nYou requested to load ", simpleName, " ", advice)
178189
else
179190
-- CUDA driver exists, now we check its version to see if an update is needed
180191
if cudaDriverExists then
181192
local cudaVersion = read_file(cudaVersionFile)
193+
if not cudaVersion then
194+
LmodError("No CUDA version file\\n" .. cudaVersionFile .. "\\nfound. " .. refer_to_docs)
195+
end
182196
local cudaVersion_req = os.getenv("EESSICUDAVERSION")
183197
-- driver CUDA versions don't give a patch version for CUDA
184198
local major, minor = string.match(cudaVersion, "(%d+)%.(%d+)")

0 commit comments

Comments
 (0)