Skip to content

Commit a2f5544

Browse files
author
Caspar van Leeuwen
committed
Try to change the subdir in which the CUDA toolkit is installed so that it also doesnt include the CPU microarchitecture
1 parent 41f3775 commit a2f5544

3 files changed

Lines changed: 39 additions & 7 deletions

File tree

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# We'll build all CUDA software, for various reasons
2+
# 1. We now have a proper CUDA sanity check, and if anything was 'wrong' with our current CUDA installs, we'd like
3+
# to know about it
4+
# 2. The PR implementing a CI to check for differences between officially supported CUDA Compute Capabilities shows
5+
# that there are a lot of missing installations https://github.com/EESSI/software-layer/pull/1087 . A rebuild PR like
6+
# this will have the convenient side effect of filling all those holes
7+
easyconfigs:
8+
- CUDA-12.1.1.eb:
9+
options:
10+
accept-eula-for: CUDA

eb_hooks.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ def parse_list_of_dicts_env(var_name):
151151
if not re.match(r'^[A-Za-z_][A-Za-z0-9_]*$', var_name):
152152
raise ValueError(f"Invalid environment variable name: {var_name}")
153153
list_string = os.getenv(var_name, '[]')
154-
154+
155155
list_of_dicts = []
156156
try:
157157
# Try JSON format first
@@ -162,7 +162,7 @@ def parse_list_of_dicts_env(var_name):
162162
list_of_dicts = ast.literal_eval(list_string)
163163
except (ValueError, SyntaxError):
164164
raise ValueError(f"Environment variable '{var_name}' does not contain a valid list of dictionaries.")
165-
165+
166166
return list_of_dicts
167167

168168

@@ -211,7 +211,7 @@ def post_ready_hook(self, *args, **kwargs):
211211
parallel = self.parallel
212212
else:
213213
parallel = self.cfg['parallel']
214-
214+
215215
if parallel == 1:
216216
return # no need to limit if already using 1 core
217217

@@ -733,7 +733,7 @@ def pre_configure_hook_score_p(self, *args, **kwargs):
733733
def pre_configure_hook_vsearch(self, *args, **kwargs):
734734
"""
735735
Pre-configure hook for VSEARCH
736-
- Workaround for a Zlib macro being renamed in Gentoo, see https://bugs.gentoo.org/383179
736+
- Workaround for a Zlib macro being renamed in Gentoo, see https://bugs.gentoo.org/383179
737737
(solves "expected initializer before 'OF'" errors)
738738
"""
739739
if self.name == 'VSEARCH':
@@ -1301,13 +1301,28 @@ def replace_non_distributable_files_with_symlinks(log, install_dir, pkg_name, al
13011301
log.debug("%s is not found in allowlist, so replacing it with symlink: %s",
13021302
print_name, full_path)
13031303
# the host_injections path is under a fixed repo/location for CUDA or cuDNN
1304+
# full_path is something similar to
1305+
# /cvmfs/software.eessi.io/version/.../x86_64/amd/zen4/accel/nvidia/cc90/.../CUDA/bin/nvcc
1306+
# host_inj_path will then be
1307+
# /cvmfs/software.eessi.io/host_injections/.../x86_64/amd/zen4/accel/nvidia/cc90/.../CUDA/bin/nvcc
13041308
host_inj_path = re.sub(EESSI_INSTALLATION_REGEX, HOST_INJECTIONS_LOCATION, full_path)
13051309
# CUDA and cu* libraries themselves don't care about compute capability so remove this
13061310
# duplication from under host_injections (symlink to a single CUDA or cu* library
13071311
# installation for all compute capabilities)
13081312
accel_subdir = get_eessi_envvar("EESSI_ACCELERATOR_TARGET")
1313+
software_subdir = get_eessi_envvar("EESSI_SOFTWARE_SUBDIR")
1314+
cpu_family = get_eessi_envvar("EESSI_CPU_FAMILY")
1315+
# If accel_subdir is defined, remove it from the full path
1316+
# After removal of accel_subdir, host_inj_path will be something like
1317+
# /cvmfs/software.eessi.io/host_injections/.../x86_64/amd/zen4/.../CUDA/bin/nvcc
13091318
if accel_subdir:
1310-
host_inj_path = host_inj_path.replace("/accel/%s" % accel_subdir, '')
1319+
host_inj_path = host_inj_path.replace(accel_subdir, '')
1320+
# /cvmfs/software.eessi.io/host_injections/.../x86_64/amd/zen4/.../CUDA/bin/nvcc
1321+
# If software_subdir is defined (it should always be...), replace it by only the cpu_family
1322+
# After this substitution, host_inj_path will be something like
1323+
# /cvmfs/software.eessi.io/host_injections/.../x86_64/.../CUDA/bin/nvcc
1324+
if software_subdir and cpu_family:
1325+
host_inj_path.replace(software_subdir, cpu_family)
13111326
# make sure source and target of symlink are not the same
13121327
if full_path == host_inj_path:
13131328
raise EasyBuildError("Source (%s) and target (%s) are the same location, are you sure you "

scripts/gpu_support/nvidia/install_cuda_and_libraries.sh

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,8 +132,15 @@ for EASYSTACK_FILE in ${TOPDIR}/easystacks/eessi-*CUDA*.yml; do
132132

133133
# If there is a GPU on the node, the installation path will by default have an
134134
# accelerator subdirectory. For CUDA and cu*, these are binary installations and
135-
# don't care about the target compute capability. Our hooks are aware of this and
136-
# therefore expect CUDA to be available under EESSI_SITE_SOFTWARE_PATH
135+
# don't care about the target compute capability nor the CPU microarchitecture.
136+
# Our hooks are aware of this and therefore expect CUDA to be available under
137+
# something like EESSI_SITE_SOFTWARE_PATH, but then with the CPU micro-architecture
138+
# stripped
139+
# This sed command will capture everything from the EESSI_SITE_SOFTWARE_PATH up until
140+
# the EESSI_SOFTWARE_SUBDIR in a capture group. It will the replace that with the content
141+
# of the capture group and then have the EESSI_CPU_FAMILY appended
142+
# Thus EESSI_SITE_CPU_FAMILY_PATH is something like /cvmfs/software.eessi.io/host_injections/.../x86_64
143+
EESSI_SITE_CPU_FAMILY_PATH=$(echo "$EESSI_SITE_SOFTWARE_PATH" | sed 's/\(.*\)'"$EESSI_SOFTWARE_SUBDIR"'/\1'"$EESSI_CPU_FAMILY"'/')
137144
export EASYBUILD_INSTALLPATH=$EESSI_SITE_SOFTWARE_PATH
138145

139146
# Install modules in hidden .modules dir to keep track of what was installed before

0 commit comments

Comments
 (0)