Skip to content

Commit 6639df6

Browse files
authored
Merge branch 'main' into extrae_5.0.0
2 parents 91b0ed5 + 28af25e commit 6639df6

4 files changed

Lines changed: 34 additions & 19 deletions

File tree

EESSI-extend-easybuild.eb

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -93,27 +93,31 @@ working_dir = os.getenv("WORKING_DIR") or pathJoin("/tmp", os.getenv("USER"))
9393
-- Gather the EPREFIX to use as a sysroot
9494
sysroot = os.getenv("EESSI_EPREFIX")
9595
96-
-- Check if we have GPU capabilities and configure CUDA compute capabilities
96+
-- Check if we have GPU capabilities and configure AMD/CUDA compute capabilities
9797
eessi_accelerator_target = os.getenv("EESSI_ACCELERATOR_TARGET")
9898
if (eessi_accelerator_target ~= nil) then
9999
cuda_compute_capability = string.match(eessi_accelerator_target, "^accel/nvidia/cc([0-9]+)$")
100+
amd_compute_capability = string.match(eessi_accelerator_target, "^accel/amd/(gfx[0-9a-f]+)$")
101+
100102
if (cuda_compute_capability ~= nil) then
101103
-- The last digit should be the minor version, insert a dot in the one-but-last position
102104
major_version = cuda_compute_capability:sub(1, #cuda_compute_capability - 1)
103105
minor_version = cuda_compute_capability:sub(#cuda_compute_capability)
104106
easybuild_cuda_compute_capabilities = string.format("%s.%s", major_version, minor_version)
107+
108+
-- If architectures are 9.0, 10.0 or 12.0, enable architecture or family-specific optimizations
109+
if easybuild_cuda_compute_capabilities == '9.0' then
110+
easybuild_cuda_compute_capabilities = '9.0a'
111+
elseif easybuild_cuda_compute_capabilities == '10.0' then
112+
easybuild_cuda_compute_capabilities = '10.0f'
113+
elseif easybuild_cuda_compute_capabilities == '12.0' then
114+
easybuild_cuda_compute_capabilities = '12.0f'
115+
end
116+
elseif (amd_compute_capability ~= nil) then
117+
easybuild_amdgcn_compute_capabilities = amd_compute_capability
105118
else
106119
LmodError("Incorrect value for $EESSI_ACCELERATOR_TARGET: " .. eessi_accelerator_target)
107120
end
108-
109-
-- If architectures are 9.0, 10.0 or 12.0, enable architecture or family-specific optimizations
110-
if easybuild_cuda_compute_capabilities == '9.0' then
111-
easybuild_cuda_compute_capabilities = '9.0a'
112-
elseif easybuild_cuda_compute_capabilities == '10.0' then
113-
easybuild_cuda_compute_capabilities = '10.0f'
114-
elseif easybuild_cuda_compute_capabilities == '12.0' then
115-
easybuild_cuda_compute_capabilities = '12.0f'
116-
end
117121
end
118122
119123
-- Some environment variables affect behaviour, let's gather them once
@@ -130,7 +134,7 @@ if eessi_cvmfs_install then
130134
end
131135
easybuild_installpath = os.getenv("EESSI_SOFTWARE_PATH")
132136
-- enforce accelerator subdirectory usage for CVMFS installs (only if an accelerator install is requested)
133-
if (eessi_accelerator_target ~= nil) and (cuda_compute_capability ~= nil) and (os.getenv("EESSI_ACCELERATOR_INSTALL") ~= nil) then
137+
if (eessi_accelerator_target ~= nil) and (cuda_compute_capability ~= nil or amd_compute_capability ~= nil) and (os.getenv("EESSI_ACCELERATOR_INSTALL") ~= nil) then
134138
easybuild_installpath = pathJoin(easybuild_installpath, eessi_accelerator_target)
135139
end
136140
elseif eessi_site_install then
@@ -202,6 +206,11 @@ if (easybuild_cuda_compute_capabilities ~= nil) then
202206
setenv ("EASYBUILD_CUDA_COMPUTE_CAPABILITIES", easybuild_cuda_compute_capabilities)
203207
end
204208
209+
-- Set environment variables if building for AMDGCN compute capabilities
210+
if (easybuild_amdgcn_compute_capabilities ~= nil) then
211+
setenv ("EASYBUILD_AMDGCN_CAPABILITIES", easybuild_amdgcn_compute_capabilities)
212+
end
213+
205214
-- Set all related environment variables if we have project or user installations (including extending MODULEPATH)
206215
if (user_modulepath ~= nil) then
207216
-- Use a more restrictive umask for this case

eb_hooks.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -717,13 +717,18 @@ def pre_fetch_hook(self, *args, **kwargs):
717717
def pre_fetch_hook_check_installation_path(self, *args, **kwargs):
718718
# When we know the CUDA status, we will need to verify the installation path
719719
# if we are doing an EESSI or host_injections installation
720-
accelerator_deps = ['CUDA']
720+
accelerator_deps = ['CUDA', 'ROCm-LLVM']
721+
accelerator_toolchains = ['rocm-compilers', 'rompi', 'rfbf', 'rfoss']
721722
strict_eessi_installation = (
722723
bool(re.search(EESSI_INSTALLATION_REGEX, self.installdir)) or
723724
self.installdir.startswith(HOST_INJECTIONS_LOCATION))
724725
if strict_eessi_installation and not os.getenv("EESSI_OVERRIDE_STRICT_INSTALLPATH_CHECK"):
725726
dependency_names = self.cfg.dependency_names()
726-
if self.cfg.name in accelerator_deps or any(dep in dependency_names for dep in accelerator_deps):
727+
if (
728+
self.cfg.name in accelerator_deps
729+
or any(dep in dependency_names for dep in accelerator_deps)
730+
or self.toolchain.name in accelerator_toolchains
731+
):
727732
# Make sure the path is an accelerator location
728733
if "/accel/" not in self.installdir:
729734
raise EasyBuildError(

init/eessi_archdetect.sh

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -179,13 +179,14 @@ accelpath() {
179179
# If EESSI_ACCELERATOR_TARGET_OVERRIDE is set, use it
180180
log "DEBUG" "accelpath: Override variable set as '$EESSI_ACCELERATOR_TARGET_OVERRIDE' "
181181
if [ ! -z $EESSI_ACCELERATOR_TARGET_OVERRIDE ]; then
182-
if [[ "$EESSI_ACCELERATOR_TARGET_OVERRIDE" =~ ^accel/nvidia/cc[0-9]+$ ]]; then
183-
echo ${EESSI_ACCELERATOR_TARGET_OVERRIDE}
182+
# Regex that allows both NVIDIA and AMD overrides
183+
if [[ "$EESSI_ACCELERATOR_TARGET_OVERRIDE" =~ ^accel/(nvidia/cc[0-9]+|amd/gfx[0-9a-f]+)$ ]]; then
184+
echo "$EESSI_ACCELERATOR_TARGET_OVERRIDE"
184185
return 0
185186
else
186-
log "ERROR" "Value of \$EESSI_ACCELERATOR_TARGET_OVERRIDE should match 'accel/nvidia/cc[0-9]+', but it does not: '$EESSI_ACCELERATOR_TARGET_OVERRIDE'"
187+
log "ERROR" "Value of \$EESSI_ACCELERATOR_TARGET_OVERRIDE should match 'accel/nvidia/cc[0-9]+' or 'accel/amd/gfx[0-9a-f]+', but it does not: '$EESSI_ACCELERATOR_TARGET_OVERRIDE'"
188+
return 1
187189
fi
188-
return 0
189190
fi
190191

191192
# check for NVIDIA GPUs via nvidia-smi command

scripts/gpu_support/nvidia/install_cuda_and_libraries.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ echo "Created temporary directory '${tmpdir}'"
9393
SAVE_MODULEPATH=${MODULEPATH}
9494

9595
for EASYSTACK_FILE in ${TOPDIR}/easystacks/eessi-*CUDA*.yml; do
96-
echo -e "Processing easystack file ${easystack_file}...\n\n"
96+
echo -e "Processing easystack file ${EASYSTACK_FILE}...\n\n"
9797

9898
# determine version of EasyBuild module to load based on EasyBuild version included in name of easystack file
9999
eb_version=$(echo ${EASYSTACK_FILE} | sed 's/.*eb-\([0-9.]*\).*.yml/\1/g')
@@ -104,7 +104,7 @@ for EASYSTACK_FILE in ${TOPDIR}/easystacks/eessi-*CUDA*.yml; do
104104
if [[ $? -eq 0 ]]; then
105105
echo_green ">> Found an EasyBuild/${eb_version} module"
106106
else
107-
echo_yellow ">> No EasyBuild/${eb_version} module found: skipping step to install easystack file ${easystack_file} (see output in ${module_avail_out})"
107+
echo_yellow ">> No EasyBuild/${eb_version} module found: skipping step to install easystack file ${EASYSTACK_FILE} (see output in ${module_avail_out})"
108108
continue
109109
fi
110110
# Safer to unload EESSI-extend before loading an EasyBuild version, in case unload behavior ever becomes dependent on EasyBuild version

0 commit comments

Comments
 (0)