Skip to content

Commit 079e04c

Browse files
author
Caspar van Leeuwen
committed
Create env var to skip CC toolkit compatibility check completely
1 parent d1ccc89 commit 079e04c

1 file changed

Lines changed: 33 additions & 29 deletions

File tree

eb_hooks.py

Lines changed: 33 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -196,11 +196,13 @@ def is_cuda_cc_supported_by_toolkit(cuda_cc, toolkit_version):
196196

197197
# Raise informative error if `toolkit_version` is not yet covered in CUDA_SUPPORTED_CCS
198198
if not toolkit_version in CUDA_SUPPORTED_CCS:
199-
msg = f"Trying to determine compatibility between requested CUDA Compute Capability ({cuda_cc})"
200-
msg +=f" and CUDA toolkit version {toolkit_version} failed: support for CUDA Compute Capabilities"
201-
msg +=" not known for this toolkit version. Please install the toolkit version manually, run"
202-
msg +=" 'nvcc --list-gpu-arch' to determine he supported CUDA Compute Capabilities, and then add these"
203-
msg +=f" to the CUDA_SUPPORTED_CCS table in the EasyBuild hooks ({build_option('hooks')})"
199+
msg = f"Trying to determine compatibility between requested CUDA Compute Capability ({cuda_cc}) "
200+
msg +=f"and CUDA toolkit version {toolkit_version} failed: support for CUDA Compute Capabilities "
201+
msg +="not known for this toolkit version. Please install the toolkit version manually, run "
202+
msg +="'nvcc --list-gpu-arch' to determine he supported CUDA Compute Capabilities, and then add these "
203+
msg +=f"to the CUDA_SUPPORTED_CCS table in the EasyBuild hooks ({build_option('hooks')}). "
204+
msg += "Alternatively, you can skip the compatiblity check alltogether by setting the "
205+
msg += "EESSI_OVERRIDE_CUDA_CC_TOOLKIT_CHECK environment variable."
204206
raise EasyBuildError(msg)
205207

206208
if cuda_cc in CUDA_SUPPORTED_CCS[toolkit_version]:
@@ -739,30 +741,32 @@ def is_unsupported_module(self):
739741

740742
# If the CUDA toolkit is a dependency, check that it supports (all) requested CUDA Compute Capabilities
741743
# Otherwise, mark this as unsupported
742-
cudaver = get_cuda_version(ec=self.cfg, check_deps=True, check_builddeps=True)
743-
if cudaver:
744-
# cuda_ccs_string is e.g. "8.0,9.0"
745-
cuda_ccs_string = self.cfg.get_cuda_cc_template_value('cuda_compute_capabilities', required=False)
746-
# cuda_ccs is empty if none are defined
747-
if cuda_ccs_string:
748-
# cuda_ccs is a comma-seperated string. Convert to list for easier handling
749-
cuda_ccs = cuda_ccs_string.split(',')
750-
# Check if any of the CUDA CCs is unsupported. If so, append the error
751-
if any(
752-
[not is_cuda_cc_supported_by_toolkit(cuda_cc=cuda_cc, toolkit_version=cudaver) for cuda_cc in cuda_ccs]
753-
):
754-
msg = f"Requested a CUDA Compute Capability ({cuda_ccs}) that is not supported by the CUDA "
755-
msg += f"toolkit version ({cudaver}) used by this software. Switching to '--module-only --force' "
756-
msg += "and injectiong an LmodError into the modulefile."
757-
print_warning(msg)
758-
# Use a normalized variable name for the CUDA ccs: strip any suffix, and replace commas
759-
cuda_ccs_string = re.sub(r'[a-zA-Z]', '', cuda_ccs_string).replace(',', '_')
760-
# Also replace periods, those are not officially supported in environment variable names
761-
var=f"EESSI_IGNORE_CUDA_{cudaver}_CC_{cuda_ccs_string}".replace('.', '_')
762-
errmsg = f"EasyConfigs using CUDA {cudaver} or older are not supported for (all) requested Compute "
763-
errmsg +=f"Capabilities: {cuda_ccs}.\\n"
764-
setattr(self, EESSI_UNSUPPORTED_MODULE_ATTR, UnsupportedModule(envvar=var,errmsg=errmsg))
765-
return True
744+
if not os.getenv("EESSI_OVERRIDE_CUDA_CC_TOOLKIT_CHECK"):
745+
cudaver = get_cuda_version(ec=self.cfg, check_deps=True, check_builddeps=True)
746+
if cudaver:
747+
# cuda_ccs_string is e.g. "8.0,9.0"
748+
cuda_ccs_string = self.cfg.get_cuda_cc_template_value('cuda_compute_capabilities', required=False)
749+
# cuda_ccs is empty if none are defined
750+
if cuda_ccs_string:
751+
# cuda_ccs is a comma-seperated string. Convert to list for easier handling
752+
cuda_ccs = cuda_ccs_string.split(',')
753+
# Check if any of the CUDA CCs is unsupported. If so, append the error
754+
if any(
755+
[not is_cuda_cc_supported_by_toolkit(cuda_cc=cuda_cc, toolkit_version=cudaver) for cuda_cc in cuda_ccs]
756+
):
757+
msg = f"Requested a CUDA Compute Capability ({cuda_ccs}) that is not supported by the CUDA "
758+
msg += f"toolkit version ({cudaver}) used by this software. Switching to '--module-only --force' "
759+
msg += "and injectiong an LmodError into the modulefile. You can override this behaviour by "
760+
msg += "setting the EESSI_OVERRIDE_CUDA_CC_TOOLKIT_CHECK environment variable."
761+
print_warning(msg)
762+
# Use a normalized variable name for the CUDA ccs: strip any suffix, and replace commas
763+
cuda_ccs_string = re.sub(r'[a-zA-Z]', '', cuda_ccs_string).replace(',', '_')
764+
# Also replace periods, those are not officially supported in environment variable names
765+
var=f"EESSI_IGNORE_CUDA_{cudaver}_CC_{cuda_ccs_string}".replace('.', '_')
766+
errmsg = f"EasyConfigs using CUDA {cudaver} or older are not supported for (all) requested Compute "
767+
errmsg +=f"Capabilities: {cuda_ccs}.\\n"
768+
setattr(self, EESSI_UNSUPPORTED_MODULE_ATTR, UnsupportedModule(envvar=var,errmsg=errmsg))
769+
return True
766770

767771
# If all the above logic passed, this module is supported
768772
setattr(self, EESSI_SUPPORTED_MODULE_ATTR, True)

0 commit comments

Comments
 (0)