|
210 | 210 | end |
211 | 211 | end |
212 | 212 |
|
| 213 | +local function using_eessi_accel_stack () |
| 214 | + local modulepath = os.getenv("MODULEPATH") or "" |
| 215 | + local accel_stack_in_modulepath = false |
| 216 | +
|
| 217 | + -- Check if we are using an EESSI version 2023 accelerator stack by checking if the $MODULEPATH contains |
| 218 | + -- a path that starts with /cvmfs/software.eessi.io and contains accel/nvidia/ccNN |
| 219 | + for path in string.gmatch(modulepath, '(.-):') do |
| 220 | + if string.sub(path, 1, 41) == "/cvmfs/software.eessi.io/versions/2023.06" then |
| 221 | + if string.find(path, "accel/nvidia/cc%d%d") then |
| 222 | + accel_stack_in_modulepath = true |
| 223 | + break |
| 224 | + end |
| 225 | + end |
| 226 | + end |
| 227 | + return accel_stack_in_modulepath |
| 228 | +end |
| 229 | +
|
| 230 | +local function eessi_removed_module_warning_startup_hook(usrCmd) |
| 231 | + if usrCmd == 'load' and not os.getenv("EESSI_SKIP_REMOVED_MODULES_CHECK") then |
| 232 | + local CUDA_RELOCATION_MSG = [[All CUDA installations and modules depending on CUDA have been relocated to GPU-specific stacks. |
| 233 | + Please see https://www.eessi.io/docs/site_specific_config/gpu/ for more information.]] |
| 234 | +
|
| 235 | + local RELOCATED_CUDA_MODULES = { |
| 236 | + ['NCCL'] = CUDA_RELOCATION_MSG, |
| 237 | + ['NCCL/2.18.3-GCCcore-12.3.0-CUDA-12.1.1'] = CUDA_RELOCATION_MSG, |
| 238 | + ['UCX-CUDA'] = CUDA_RELOCATION_MSG, |
| 239 | + ['UCX-CUDA/1.14.1-GCCcore-12.3.0-CUDA-12.1.1'] = CUDA_RELOCATION_MSG, |
| 240 | + -- we also have non-CUDA versions of OSU Micro Benchmarks, so only match the CUDA version |
| 241 | + ['OSU-Micro-Benchmarks/7.2-gompi-2023a-CUDA-12.1.1'] = CUDA_RELOCATION_MSG, |
| 242 | + ['UCC-CUDA'] = CUDA_RELOCATION_MSG, |
| 243 | + ['UCC-CUDA/1.2.0-GCCcore-12.3.0-CUDA-12.1.1'] = CUDA_RELOCATION_MSG, |
| 244 | + ['CUDA'] = CUDA_RELOCATION_MSG, |
| 245 | + ['CUDA/12.1.1'] = CUDA_RELOCATION_MSG, |
| 246 | + ['CUDA-Samples'] = CUDA_RELOCATION_MSG, |
| 247 | + ['CUDA-Samples/12.1-GCC-12.3.0-CUDA-12.1.1'] = CUDA_RELOCATION_MSG, |
| 248 | + } |
| 249 | +
|
| 250 | + local REMOVED_MODULES = { |
| 251 | + ['ipympl/0.9.3-foss-2023a'] = 'This module has been replaced by ipympl/0.9.3-gfbf-2023a', |
| 252 | + } |
| 253 | +
|
| 254 | + local masterTbl = masterTbl() |
| 255 | + local error_msg = "" |
| 256 | + -- The CUDA messages should only be shown if the accelerator stack is NOT being used |
| 257 | + if not using_eessi_accel_stack() then |
| 258 | + for _, module in pairs(masterTbl.pargs) do |
| 259 | + if RELOCATED_CUDA_MODULES[module] ~= nil then |
| 260 | + error_msg = error_msg .. module .. ': ' .. RELOCATED_CUDA_MODULES[module] .. '\\n\\n' |
| 261 | + end |
| 262 | + end |
| 263 | + end |
| 264 | + for _, module in pairs(masterTbl.pargs) do |
| 265 | + if REMOVED_MODULES[module] ~= nil then |
| 266 | + error_msg = error_msg .. module .. ': ' .. REMOVED_MODULES[module] .. '\\n\\n' |
| 267 | + end |
| 268 | + end |
| 269 | + if error_msg ~= "" then |
| 270 | + LmodError('\\n' .. error_msg .. 'If you know what you are doing and you want to ignore this check for removed/relocated modules, set $EESSI_SKIP_REMOVED_MODULES_CHECK to any value.') |
| 271 | + end |
| 272 | + end |
| 273 | +end |
| 274 | +
|
| 275 | +function eessi_startup_hook(usrCmd) |
| 276 | + eessi_removed_module_warning_startup_hook(usrCmd) |
| 277 | +end |
| 278 | +
|
| 279 | +hook.register("startup", eessi_startup_hook) |
213 | 280 | hook.register("load", eessi_load_hook) |
214 | 281 |
|
215 | 282 | """ |
|
0 commit comments