Skip to content

Commit dbbcf6b

Browse files
authored
Merge branch 'main' into change_120f_to_120_for_cuda_1280
2 parents fc98f1e + c7ff662 commit dbbcf6b

7 files changed

Lines changed: 118 additions & 28 deletions

File tree

.github/workflows/scripts/test_init_scripts.sh

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -154,17 +154,30 @@ for shell in ${SHELLS[@]}; do
154154

155155
# Optional test 11, check if the prompt has been updated
156156
if [ "$shell" = "bash" ] || [ "$shell" = "ksh" ] || [ "$shell" = "zsh" ] || [ "$shell" = "sh" ]; then
157+
# Let's configure things to use the EESSI module within the PR
158+
sed 's|export MODULEPATH=.*|export MODULEPATH=init/modules|' init/lmod/sh >init/lmod/sh.test
159+
ln -srf init/lmod/sh.test init/lmod/bash.test
160+
ln -srf init/lmod/sh.test init/lmod/ksh.test
161+
ln -srf init/lmod/sh.test init/lmod/zsh.test
157162
# Typically this is a non-interactive shell, so manually unset PS1 and reset to a non-exported variable when testing
158-
TEST_EESSI_PS1_UPDATE=$($shell -c "unset PS1 ; PS1='$ ' ; . init/lmod/$shell 2>/dev/null ; echo \"\$PS1\"")
159-
TEST_EESSI_NO_PS1_UPDATE=$($shell -c "unset PS1 ; . init/lmod/$shell 2>/dev/null ; echo \"\$PS1\"")
163+
TEST_EESSI_PS1_UPDATE=$($shell -c "unset PS1 ; PS1='$ ' ; . init/lmod/$shell.test 2>/dev/null ; echo \"\$PS1\"")
164+
TEST_EESSI_NO_PS1_UPDATE=$($shell -c "unset PS1 ; . init/lmod/$shell.test 2>/dev/null ; echo \"\$PS1\"")
160165
pattern="{EESSI/${EESSI_VERSION}} "
161166
assert_raises 'echo "$TEST_EESSI_PS1_UPDATE" | grep "$pattern"'
162167
assert_raises 'echo "$TEST_EESSI_NO_PS1_UPDATE" | grep "$pattern"' 1
163168
# Also check when we explicitly ask for it not to be updated
164-
TEST_EESSI_EXPLICIT_NO_PS1_UPDATE=$($shell -c "unset PS1 ; PS1='test> ' ; export EESSI_MODULE_UPDATE_PS1=0 ; . init/lmod/$shell 2>/dev/null ; echo \"\$PS1\"")
165-
TEST_EESSI_EXPLICIT_NO_PS1_UPDATE_CALLED_TWICE=$($shell -c "unset PS1 ; PS1='$ ' ; export EESSI_MODULE_UPDATE_PS1=0 ; . init/lmod/$shell 2>/dev/null ; . init/lmod/$shell 2>/dev/null ; echo \"\$PS1\"")
169+
TEST_EESSI_EXPLICIT_NO_PS1_UPDATE=$($shell -c "unset PS1 ; PS1='test> ' ; export EESSI_MODULE_UPDATE_PS1=0 ; . init/lmod/$shell.test 2>/dev/null ; echo \"\$PS1\"")
170+
TEST_EESSI_EXPLICIT_NO_PS1_UPDATE_CALLED_TWICE=$($shell -c "unset PS1 ; PS1='$ ' ; export EESSI_MODULE_UPDATE_PS1=0 ; . init/lmod/$shell.test 2>/dev/null ; . init/lmod/$shell.test 2>/dev/null ; echo \"\$PS1\"")
166171
assert_raises 'echo "$TEST_EESSI_EXPLICIT_NO_PS1_UPDATE" | grep "$pattern"' 1
167172
assert_raises 'echo "$TEST_EESSI_EXPLICIT_NO_PS1_UPDATE_CALLED_TWICE" | grep "$pattern"' 1
173+
# Also check complex prompts, and unloading/purging the EESSI module
174+
prompt="\$(echo '\['✘) $ "
175+
promptstr="\[✘ $ "
176+
updated_promptstr="{EESSI/${EESSI_VERSION}} \[✘ $ "
177+
TEST_EESSI_PS1_UPDATE=$($shell -c "unset PS1 ; PS1=\"$prompt\" ; . init/lmod/$shell.test >/dev/null ; echo \"\$PS1\"")
178+
TEST_EESSI_PS1_REVERT=$($shell -c "unset PS1 ; PS1=\"$prompt\" ; . init/lmod/$shell.test >/dev/null ; module purge; echo \"\$PS1\"")
179+
assert 'echo "$TEST_EESSI_PS1_UPDATE"' "$updated_promptstr"
180+
assert 'echo "$TEST_EESSI_PS1_REVERT"' "$promptstr"
168181
fi
169182

170183
# End Test Suite

create_lmodsitepackage.py

Lines changed: 36 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,6 @@
172172
else
173173
cudaDriverDir = eessi_eprefix .. "/lib/nvidia"
174174
end
175-
local cudaVersionFile = cudaDriverDir .. "/cuda_version.txt"
176175
local cudaDriverFile = cudaDriverDir .. "/libcuda.so"
177176
local cudaDriverExists = isFile(cudaDriverFile)
178177
local singularityCudaExists = isFile("/.singularity.d/libs/libcuda.so")
@@ -189,28 +188,46 @@
189188
else
190189
-- CUDA driver exists, now we check its version to see if an update is needed
191190
if cudaDriverExists then
192-
local cudaVersion = read_file(cudaVersionFile)
193-
if not cudaVersion then
194-
LmodError("No CUDA version file\\n" .. cudaVersionFile .. "\\nfound. " .. refer_to_docs)
191+
local cudaVersion = os.getenv("EESSI_CUDA_DRIVER_VERSION")
192+
if not cudaVersion or cudaVersion == "" then
193+
local eessi_prefix = os.getenv("EESSI_PREFIX")
194+
local script = pathJoin(eessi_prefix, 'scripts', 'gpu_support', 'nvidia', 'get_cuda_driver_version.sh')
195+
source_sh("bash", script)
195196
end
197+
cudaVersion = os.getenv("EESSI_CUDA_DRIVER_VERSION")
196198
local cudaVersion_req = os.getenv("EESSICUDAVERSION")
197-
-- driver CUDA versions don't give a patch version for CUDA
198-
local major, minor = string.match(cudaVersion, "(%d+)%.(%d+)")
199-
local major_req, minor_req, patch_req = string.match(cudaVersion_req, "(%d+)%.(%d+)%.(%d+)")
200-
local driver_libs_need_update = false
201-
if major < major_req then
202-
driver_libs_need_update = true
203-
elseif major == major_req then
204-
if minor < minor_req then
199+
-- Account for the fact that the script sourced above was designed to never return a non-zero exit
200+
-- even if it fails to set EESSI_CUDA_DRIVER_VERSION
201+
-- Essentially, we handle that case here by raising an error, which can be suppressed
202+
if not cudaVersion or cudaVersion == "" then
203+
local suppress_var = "EESSI_CUDA_DRIVER_VERSION_SUPPRESS_WARNING"
204+
local warn = "Environment variable EESSI_CUDA_DRIVER_VERSION not found. "
205+
warn = warn .. "Cannot ensure that driver version is new enough for CUDA toolkit version: '"
206+
warn = warn .. cudaVersion_req .. "'. This module will still be loaded, but may not function "
207+
warn = warn .. "as expected. Export " .. suppress_var .. "=1"
208+
local suppress_warn = os.getenv(suppress_var)
209+
if not suppress_warn or suppress_warn == 1 then
210+
LmodWarning(warn)
211+
end
212+
else
213+
-- driver CUDA versions don't give a patch version for CUDA
214+
local major, minor = string.match(cudaVersion, "(%d+)%.(%d+)")
215+
local major_req, minor_req, patch_req = string.match(cudaVersion_req, "(%d+)%.(%d+)%.(%d+)")
216+
local driver_libs_need_update = false
217+
if tonumber(major) < tonumber(major_req) then
205218
driver_libs_need_update = true
219+
elseif tonumber(major) == tonumber(major_req) then
220+
if tonumber(minor) < tonumber(minor_req) then
221+
driver_libs_need_update = true
222+
end
223+
end
224+
if driver_libs_need_update == true then
225+
local advice = "but the module you want to load requires CUDA " .. cudaVersion_req .. ". "
226+
advice = advice .. "Please update your CUDA driver libraries and then "
227+
advice = advice .. "let EESSI know about the update.\\n"
228+
advice = advice .. refer_to_docs
229+
LmodError("\\nYour driver CUDA version is ", cudaVersion, " ", advice)
206230
end
207-
end
208-
if driver_libs_need_update == true then
209-
local advice = "but the module you want to load requires CUDA " .. cudaVersion_req .. ". "
210-
advice = advice .. "Please update your CUDA driver libraries and then "
211-
advice = advice .. "let EESSI know about the update.\\n"
212-
advice = advice .. refer_to_docs
213-
LmodError("\\nYour driver CUDA version is ", cudaVersion, " ", advice)
214231
end
215232
end
216233
end

eb_hooks.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1127,6 +1127,31 @@ def pre_configure_hook_score_p(self, *args, **kwargs):
11271127
raise EasyBuildError("Score-P-specific hook triggered for non-Score-P easyconfig?!")
11281128

11291129

1130+
def pre_configure_hook_dyninst(self, *args, **kwargs):
1131+
"""
1132+
Pre-configure hook for Dyninst
1133+
- specify correct path to binutils (in compat layer)
1134+
"""
1135+
if self.name == 'Dyninst':
1136+
1137+
# determine path to Prefix installation in compat layer via $EPREFIX
1138+
eprefix = get_eessi_envvar('EPREFIX')
1139+
1140+
binutils_lib_path_glob_pattern = os.path.join(eprefix, 'usr', 'lib*', 'binutils', '*-linux-gnu', '2.*')
1141+
binutils_lib_path = glob.glob(binutils_lib_path_glob_pattern)
1142+
if len(binutils_lib_path) == 1:
1143+
print_msg("Defining LibIberty variables for Dyninst...")
1144+
self.cfg.update('configopts', '-DLibIberty_ROOT_DIR=' + binutils_lib_path[0])
1145+
self.cfg.update('configopts', '-DLibIberty_INCLUDE_DIRS=' + os.path.join(binutils_lib_path[0], 'include'))
1146+
self.cfg.update('configopts', '-DLibIberty_LIBRARIES=' + os.path.join(binutils_lib_path[0], 'libiberty.a'))
1147+
else:
1148+
raise EasyBuildError("Failed to isolate path for binutils libraries using %s, got %s",
1149+
binutils_lib_path_glob_pattern, binutils_lib_path)
1150+
1151+
else:
1152+
raise EasyBuildError("Dyninst-specific hook triggered for non-Dyninst easyconfig?!")
1153+
1154+
11301155
def pre_configure_hook_extrae(self, *args, **kwargs):
11311156
"""
11321157
Pre-configure hook for Extrae
@@ -2004,6 +2029,7 @@ def pre_run_shell_cmd_hook(cmd, work_dir=None, **kwargs):
20042029
'WRF': pre_configure_hook_wrf_aarch64,
20052030
'LAMMPS': pre_configure_hook_LAMMPS_zen4_and_aarch64_cuda,
20062031
'Score-P': pre_configure_hook_score_p,
2032+
'Dyninst': pre_configure_hook_dyninst,
20072033
'CMake': pre_configure_hook_cmake_system,
20082034
}
20092035

eessi_container.sh

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,9 @@ display_help() {
113113
echo " when a directory is provided, the format of the"
114114
echo " tarball's name will be {REPO_ID}-{TIMESTAMP}.tgz"
115115
echo " [default: not set]"
116+
echo " -S | --sandbox - use sandbox mode (i.e. convert .sif image to sandbox and then run"
117+
echo " it instead)"
118+
echo " [default: not set]"
116119
echo " -v | --verbose - display more information [default: false]"
117120
echo " -x | --http-proxy URL - provides URL for the env variable http_proxy"
118121
echo " [default: not set]; uses env var \$http_proxy if set"
@@ -275,6 +278,10 @@ while [[ $# -gt 0 ]]; do
275278
SAVE="$2"
276279
shift 2
277280
;;
281+
-S|--sandbox)
282+
SANDBOX=1
283+
shift 1
284+
;;
278285
-u|--resume)
279286
RESUME="$2"
280287
shift 2
@@ -1039,10 +1046,23 @@ for arg in "${PASS_THROUGH[@]}"; do
10391046
ADDITIONAL_CONTAINER_OPTIONS+=(${arg})
10401047
done
10411048

1042-
echo "Launching container with command (next line):"
1043-
echo "singularity ${RUN_QUIET} ${MODE} ${ADDITIONAL_CONTAINER_OPTIONS[@]} ${EESSI_FUSE_MOUNTS[@]} ${CONTAINER} $@"
1044-
singularity ${RUN_QUIET} ${MODE} "${ADDITIONAL_CONTAINER_OPTIONS[@]}" "${EESSI_FUSE_MOUNTS[@]}" ${CONTAINER} "$@"
1045-
exit_code=$?
1049+
# EESSI_SINGULARITY_SANDBOX is an environment variable (typically set in site_config.sh, if needed)
1050+
if [[ -n "${EESSI_SINGULARITY_SANDBOX}" || ${SANDBOX} -eq 1 ]]; then
1051+
# using a sandbox image mode is more robust at the cleanup phase at the end
1052+
CONTAINER_SANDBOX="${CONTAINER%.sif}.sandbox"
1053+
echo "Building a sandbox image with command (next line):"
1054+
echo "singularity build --sandbox --force ${CONTAINER_SANDBOX} ${CONTAINER}"
1055+
singularity build --sandbox --force ${CONTAINER_SANDBOX} ${CONTAINER}
1056+
echo "Launching sandbox container with command (next line):"
1057+
echo "singularity ${RUN_QUIET} ${MODE} ${ADDITIONAL_CONTAINER_OPTIONS[@]} ${EESSI_FUSE_MOUNTS[@]} ${CONTAINER_SANDBOX} $@"
1058+
singularity ${RUN_QUIET} ${MODE} "${ADDITIONAL_CONTAINER_OPTIONS[@]}" "${EESSI_FUSE_MOUNTS[@]}" ${CONTAINER_SANDBOX} "$@"
1059+
exit_code=$?
1060+
else
1061+
echo "Launching container with command (next line):"
1062+
echo "singularity ${RUN_QUIET} ${MODE} ${ADDITIONAL_CONTAINER_OPTIONS[@]} ${EESSI_FUSE_MOUNTS[@]} ${CONTAINER} $@"
1063+
singularity ${RUN_QUIET} ${MODE} "${ADDITIONAL_CONTAINER_OPTIONS[@]}" "${EESSI_FUSE_MOUNTS[@]}" ${CONTAINER} "$@"
1064+
exit_code=$?
1065+
fi
10461066

10471067
# 6. save tmp if requested (arg -s|--save)
10481068
if [[ ! -z ${SAVE} ]]; then

init/modules/EESSI/2023.06.lua

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,14 @@ local quiet_load = false
222222
if os.getenv("EESSI_MODULE_UPDATE_PS1") then
223223
local prompt = os.getenv("PS1")
224224
if prompt then
225-
pushenv("PS1", "{EESSI/" .. eessi_version .. "} " .. prompt)
225+
local prefix = "{EESSI/" .. eessi_version .. "} "
226+
if mode() == "load" then
227+
-- Prepend prefix to PS1 without evaluating its contents
228+
execute{cmd="PS1=\"" .. prefix .. "$PS1\"", modeA={"load"}}
229+
elseif mode() == "unload" then
230+
-- Strip the prefix from beginning of PS1
231+
execute{cmd="PS1=\"${PS1#\"" .. prefix .. "\"}\"", modeA={"unload"}}
232+
end
226233
end
227234
end
228235

install_scripts.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,7 @@ nvidia_files=(
211211
install_cuda_and_libraries.sh
212212
install_cuda_host_injections.sh
213213
link_nvidia_host_libraries.sh
214+
get_cuda_driver_version.sh
214215
)
215216
copy_files_by_list ${TOPDIR}/scripts/gpu_support/nvidia ${INSTALL_PREFIX}/scripts/gpu_support/nvidia "${nvidia_files[@]}"
216217

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# This can be leveraged by the source_sh() feature of Lmod
2+
# Because we want to source this without immediately raising an LmodError upon failure, this script
3+
# is designed to ALWAYS return a 0 exit code
4+
EESSI_CUDA_DRIVER_VERSION=$(nvidia-smi --query | grep -oP 'CUDA Version\s*:\s*\K[0-9.]+') || return 0
5+
# The || return 0 shouldn't be needed, but just to be overly sure that this script always returns 0
6+
export EESSI_CUDA_DRIVER_VERSION || return 0

0 commit comments

Comments
 (0)