Skip to content
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
3c9618b
Add some initial changes to the hooks to make sure to install with --…
Dec 24, 2025
d096160
Make mechanism to generate modules that print LmodErrors for unsuppor…
Jan 7, 2026
b5fa942
Remove some variables that have become obsolete, and make sure get_cu…
Jan 7, 2026
74351d4
Remove the now obsolete zen4 parse hook - we now inject the lmodfoote…
Jan 7, 2026
2d2cdff
Remove zen4-specific pre and post prepare hooks, as these were replac…
Jan 7, 2026
e5f5cd2
Remove the prepare_hooks that were specific to zen4, as they were rep…
Jan 7, 2026
0d40193
Actually implement is_cuda_cc_supported_by_toolkit. Also, make sure e…
Jan 8, 2026
5a2256b
Move import to the top
Jan 8, 2026
0d745e7
Fix description for 'is_supported_module' as it no longer returns an …
Jan 8, 2026
a6d52c6
Add hook to handle stripping suffix from --cuda-compute-capabilities …
Jan 12, 2026
e69b665
Use eb_hooks.py from the cloned software-layer-scripts, rather than t…
Jan 12, 2026
04a7f4e
Add more debugging output
Jan 12, 2026
791dcdd
Set later
Jan 12, 2026
c68c495
Clarify messageing
Jan 12, 2026
8292fa3
Use EB 5.2.0 instead to install the CUDA host injections, so that the…
Jan 12, 2026
5a6b43c
Use the GPU install scripts from the cloned software-layer-scripts so…
Jan 13, 2026
2422804
Apply suggestion from @casparvl
casparvl Jan 13, 2026
26425be
Revert changes that were now split off to https://github.com/EESSI/so…
Jan 13, 2026
2e8b965
Merge branch 'CUDA_cuDNN_hooks_202506' of github.com:casparvl/softwar…
Jan 13, 2026
ffed74a
Apply suggestions from code review
casparvl Jan 13, 2026
d1ccc89
Raise EB error if the toolkit version isn't defined (yet) in the look…
Jan 13, 2026
e719983
Apply suggestion from @casparvl
bedroge Jan 13, 2026
15324ec
Apply suggestion from @casparvl
bedroge Jan 13, 2026
079e04c
Create env var to skip CC toolkit compatibility check completely
Jan 13, 2026
e274fbe
Merge branch 'CUDA_cuDNN_hooks_202506' of github.com:casparvl/softwar…
Jan 13, 2026
451931c
Apply suggestions from code review
casparvl Jan 13, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
267 changes: 206 additions & 61 deletions eb_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import json
import os
import re
from typing import NamedTuple

import easybuild.tools.environment as env
from easybuild.easyblocks.generic.configuremake import obtain_config_guess
Expand Down Expand Up @@ -43,6 +44,8 @@
EESSI_RPATH_OVERRIDE_ATTR = 'orig_rpath_override_dirs'
EESSI_MODULE_ONLY_ATTR = 'orig_module_only'
EESSI_FORCE_ATTR = 'orig_force'
EESSI_SUPPORTED_MODULE_ATTR = 'eessi_supported_module'
EESSI_UNSUPPORTED_MODULE_ATTR = 'eessi_unsupported_module'

SYSTEM = EASYCONFIG_CONSTANTS['SYSTEM'][0]

Expand Down Expand Up @@ -71,6 +74,40 @@
{'name': 'lfoss', 'version': '2025b'}
)

# Supported compute capabilities by CUDA toolkit version
# Obtained by installing all CUDAs from 12.0.0 to 13.1.0, then using:

# #!/bin/bash
Copy link
Copy Markdown
Contributor Author

@casparvl casparvl Jan 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's worth leaving this here as a breadcrumb to future contributors, since we'll have to update this list occasionally and doing it manually is silly - especially if you want to add compatibility for a range of toolkit versions

#
# CUDA_VERS=(12.0.0 12.1.0 12.1.1 12.2.0 12.2.2 12.3.0 12.3.2 12.4.0 12.5.0 12.6.0 12.8.0 12.9.0 12.9.1 13.0.0 13.0.1 13.0.2 13.1.0)
#
# for ver in ${CUDA_VERS[@]}; do
# module load CUDA/${ver}
# ccs=$(nvcc --list-gpu-arch)
# ccs=$(echo ${ccs} | sed "s/ /', /g" | sed "s/compute_/'/g")
# echo " '${ver}': [${ccs}'],"
# module unload CUDA
# done

CUDA_SUPPORTED_CCS = {
'12.0.0': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90'],
'12.1.0': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90'],
'12.1.1': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90'],
'12.2.0': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90'],
'12.2.2': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90'],
'12.3.0': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90'],
'12.3.2': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90'],
'12.4.0': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90'],
'12.5.0': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90'],
'12.6.0': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90'],
'12.8.0': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90', '100', '101', '120'],
'12.9.0': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90', '100', '101', '103', '120', '121'],
'12.9.1': ['50', '52', '53', '60', '61', '62', '70', '72', '75', '80', '86', '87', '89', '90', '100', '101', '103', '120', '121'],
'13.0.0': ['75', '80', '86', '87', '88', '89', '90', '100', '110', '103', '120', '121'],
'13.0.1': ['75', '80', '86', '87', '88', '89', '90', '100', '110', '103', '120', '121'],
'13.0.2': ['75', '80', '86', '87', '88', '89', '90', '100', '110', '103', '120', '121'],
'13.1.0': ['75', '80', '86', '87', '88', '89', '90', '100', '110', '103', '120', '121'],
}

# Ensure that we don't print any messages in --terse mode
# Note that --terse was introduced in EB 4.9.1
Expand Down Expand Up @@ -114,6 +151,55 @@ def is_gcccore_1220_based(**kwargs):
)



def get_cuda_version(ec, check_deps=True, check_builddeps=True):
"""
Returns the CUDA version that this EasyConfig (ec) uses as a (build)dependency.
If (ec) is simply CUDA itself, it will return the version.
If no CUDA is used as (build)dependency, this function returns None.
"""
cudaver = None
Comment thread
bedroge marked this conversation as resolved.
ec_dict = ec.asdict()

# Is this CUDA itself?
if ec.name == 'CUDA':
cudaver = ec.version

# At this point, CUDA should be a builddependency due to inject_gpu_property
# changing any CUDA dep to a builddependency. But, for robustness, just check both
deps = []
if check_deps:
deps = deps + ec_dict['dependencies'][:]
if check_builddeps:
deps = deps + ec_dict['builddependencies'][:]

# Provide default
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't seem relevant here? (or I'm just misunderstanding the comment)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope, that applied to the cuda_ver = None before I moved that...

Comment thread
bedroge marked this conversation as resolved.
Outdated
for dep in deps:
if dep['name'] == 'CUDA':
cudaver = dep['version']

return cudaver


def is_cuda_cc_supported_by_toolkit(cuda_cc, toolkit_version):
"""
Checks if the CUDA Compute Capability passed in cuda_cc is supported by the CUDA toolkit version toolkit_version
Returns True if supported or False if not supported
"""
# Clean cuda_cc of any suffixes like the 'a' in '9.0a'
# The regex expects one or more digits, a dot, one or more digits, and then optionally any number of characters
# It will strip all characters by only return the first capture group (the digits and dot)
cuda_cc = re.sub(r'^(\d+\.\d+)[a-zA-Z]*$', r'\1', cuda_cc)
Copy link
Copy Markdown
Contributor Author

@casparvl casparvl Jan 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The lookup table contains CCs in the format of 90, 100, etc, so no periods, and no suffixes. The CUDA compute capabilities passed to EasyBuild contain periods (for sure) and can contain suffixes. So to compare, we need to strip the suffix from EB's CUDA CC, and remove the ..


# Strip the dot
cuda_cc = cuda_cc.replace('.', '')

if cuda_cc in CUDA_SUPPORTED_CCS[toolkit_version]:
Comment thread
casparvl marked this conversation as resolved.
return True
else:
return False


def get_eessi_envvar(eessi_envvar):
"""Get an EESSI environment variable from the environment"""

Expand Down Expand Up @@ -155,11 +241,6 @@ def parse_hook(ec, *args, **kwargs):
if ec.name in PARSE_HOOKS:
PARSE_HOOKS[ec.name](ec, eprefix)

# Always trigger this one, regardless of ec.name
cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR')
if cpu_target == CPU_TARGET_ZEN4:
parse_hook_zen4_module_only(ec, eprefix)
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is now handled in the pre_module_hook_unsupported_modules.


# inject the GPU property (if required)
ec = inject_gpu_property(ec)

Expand Down Expand Up @@ -288,6 +369,22 @@ def post_ready_hook(self, *args, **kwargs):
print_msg(msg % (new_parallel, curr_parallel, session_parallel, self.name, cpu_target), log=self.log)


def pre_prepare_hook_unsupported_modules(self, *args, **kwargs):
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Replaces the specific pre_prepare_hook_ignore_zen4_gcccore1220_error we had before.

"""Set env var to ignore specific LmodErrors from dependencies if this module is know to be unsupported"""
if is_unsupported_module(self):
unsup_mod = getattr(self, EESSI_UNSUPPORTED_MODULE_ATTR)
print_msg(f"Setting {unsup_mod.envvar} to allow loading dependencies that otherwise throw an LmodError")
os.environ[unsup_mod.envvar] = "1"


def post_prepare_hook_unsupported_modules(self, *args, **kwargs):
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Replaces the post_prepare_hook_ignore_zen4_gcccore1220_error hook we had before

"""Unset env var to ignore specific LmodErrors from dependencies if this module is know to be unsupported"""
if is_unsupported_module(self):
unsup_mod = getattr(self, EESSI_UNSUPPORTED_MODULE_ATTR)
print_msg(f"Unsetting {unsup_mod.envvar}")
del os.environ[unsup_mod.envvar]


def pre_prepare_hook(self, *args, **kwargs):
"""Main pre-prepare hook: trigger custom functions."""

Expand Down Expand Up @@ -318,10 +415,8 @@ def pre_prepare_hook(self, *args, **kwargs):
if self.name in PRE_PREPARE_HOOKS:
PRE_PREPARE_HOOKS[self.name](self, *args, **kwargs)

# Always trigger this one, regardless of ec.name
cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR')
if cpu_target == CPU_TARGET_ZEN4:
pre_prepare_hook_ignore_zen4_gcccore1220_error(self, *args, **kwargs)
# Always trigger this, regardless of ec.name
pre_prepare_hook_unsupported_modules(self, *args, **kwargs)
Copy link
Copy Markdown
Contributor Author

@casparvl casparvl Jan 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Run the new hook instead of the old one. All the logic to check if something is an unsupported module is now contained within is_unsupported_module, so no more use for checking the cpu_target.



def post_prepare_hook_gcc_prefixed_ld_rpath_wrapper(self, *args, **kwargs):
Expand Down Expand Up @@ -387,10 +482,8 @@ def post_prepare_hook(self, *args, **kwargs):
if self.name in POST_PREPARE_HOOKS:
POST_PREPARE_HOOKS[self.name](self, *args, **kwargs)

# Always trigger this one, regardless of ec.name
cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR')
if cpu_target == CPU_TARGET_ZEN4:
post_prepare_hook_ignore_zen4_gcccore1220_error(self, *args, **kwargs)
# Always trigger this, regardless of ec.name
post_prepare_hook_unsupported_modules(self, *args, **kwargs)
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Run the new hook instead of the old one. All the logic to check if something is an unsupported module is now contained within is_unsupported_module, so no more use for checking the cpu_target.



def parse_hook_casacore_disable_vectorize(ec, eprefix):
Expand Down Expand Up @@ -556,24 +649,6 @@ def parse_hook_freeimage_aarch64(ec, *args, **kwargs):
print_msg("Changed toolchainopts for %s: %s", ec.name, ec['toolchainopts'])


def parse_hook_zen4_module_only(ec, eprefix):
Copy link
Copy Markdown
Contributor Author

@casparvl casparvl Jan 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding the LmodError to the modluafooter is now done in the generic pre_module_hook_unsupported_module hook

"""
Use --force --module-only if building a foss-2022b-based EasyConfig for Zen4.
This toolchain will not be supported on Zen4, so we will generate a modulefile
and have it print an LmodError.
"""
if is_gcccore_1220_based(ecname=ec['name'], ecversion=ec['version'], tcname=ec['toolchain']['name'],
tcversion=ec['toolchain']['version']):
env_varname = EESSI_IGNORE_ZEN4_GCC1220_ENVVAR
# TODO: create a docs page to which we can refer for more info here
# TODO: then update the link to the known issues page to the _specific_ issue
# Need to escape the newline character so that the newline character actually ends up in the module file
# (otherwise, it splits the string, and a 2-line string ends up in the modulefile, resulting in syntax error)
errmsg = "EasyConfigs using toolchains based on GCCcore-12.2.0 are not supported for the Zen4 architecture.\\n"
errmsg += "See https://www.eessi.io/docs/known_issues/eessi-<EESSI_VERSION>/#gcc-1220-and-foss-2022b-based-modules-cannot-be-loaded-on-zen4-architecture"
ec['modluafooter'] = 'if (not os.getenv("%s")) then LmodError("%s") end' % (env_varname, errmsg)


def pre_fetch_hook(self, *args, **kwargs):
"""Main pre fetch hook: trigger custom functions based on software name."""
if self.name in PRE_FETCH_HOOKS:
Expand Down Expand Up @@ -614,17 +689,74 @@ def pre_fetch_hook_check_installation_path(self, *args, **kwargs):
)


def is_unsupported_module(ec):
class UnsupportedModule(NamedTuple):
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a named tuple so that we can have access to the environment variable name and error message through clearly named attributes. That's less sensitive to messing up compared to a regular tuple, where you'd have to remember what is stored in the first and what is stored in the second element of the tuple.

"""
Environment variable and error message for an unsupported module.
envvar: the name of the environment variable that needs to be set to ignore the LmodError
that this unsupported module would otherwise generate
errmsg: the actual LmodError message that should be printed
"""
envvar: str
errmsg: str


def is_unsupported_module(self):
"""
Determine if the given module is unsupported in EESSI, and hence if a dummy module needs to be built that just prints an LmodError.
If true, this function returns the name of the environment variable that can be used to ignore that particular LmodError,
as this is still required to actually build the module itself (EasyBuild will load/test the module).
Otherwise, it returns False.
If a module is unsupported, this function will set the EESSI_UNSUPPORTED_MODULE_ATTR attribute on `self`,
and assign an `UnsupportedModule` NamedTuple to it.
If a module is supported, this function will set the EESSI_SUPPORTED_MODULE_ATTR attribut on `self`
(and set it to True).
"""
cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR')

if cpu_target == CPU_TARGET_ZEN4 and is_gcccore_1220_based(ecname=ec.name, ecversion=ec.version, tcname=ec.toolchain.name, tcversion=ec.toolchain.version):
return EESSI_IGNORE_ZEN4_GCC1220_ENVVAR
# If this function was already called by an earlier hook, evaluation of whether this is an unsupported module was
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At this point in time, the is_unsupported_module function is called 6 or 7 times. Since it may become quite lengthy with lots of logic if we keep adding cases for modules that are unsupported, we want an early return for optimization in case this has already been evaluated before. We can easily do that by checking if either the EESSI_SUPPORTED_MODULE_ATTR or EESSI_UNSUPPORTED_MODULE_ATTR have been set.

If neither has been set, this is the first time we are evaluating this function and we should go through the full logic.

# already done. No need to redo it: save time and return early
if hasattr(self, EESSI_SUPPORTED_MODULE_ATTR):
return False
elif hasattr(self, EESSI_UNSUPPORTED_MODULE_ATTR):
return True

# Foss-2022b is not supported on Zen4
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Next time we have unsupported modules, this function is the only one that needs changing: we simply add a case to it. A case typically has:

  • Logic (if statements) to determine if this is an unsupported module
  • Print a warning message to stdout to make it clear we're doing something out-of-the-ordinary in this installation
  • Define the LmodError message that should be embedded in the modulefile
  • Define the environment variable name that can be used to suppress the LmodError

cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR')
if cpu_target == CPU_TARGET_ZEN4 and is_gcccore_1220_based(ecname=self.name, ecversion=self.version, tcname=self.toolchain.name, tcversion=self.toolchain.version):
msg = "EasyConfigs using toolchains based on GCCcore-12.2.0 are not supported on Zen4 architectures. "
msg += "Building with '--module-only --force' and injecting an LmodError into the modulefile."
print_warning(msg)
errmsg = "EasyConfigs using toolchains based on GCCcore-12.2.0 are not supported for the Zen4 architecture.\\n"
errmsg += "See https://www.eessi.io/docs/known_issues/eessi-<EESSI_VERSION>/#gcc-1220-and-foss-2022b-based-modules-cannot-be-loaded-on-zen4-architecture"
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Gave this a try myself, and noticed that the URL in the error message had <EESSI_VERSION>. We can use $EESSI_VERSION here, or even just hardcode it to 2023.06 (since this only applies to that version).

Comment thread
casparvl marked this conversation as resolved.
Outdated
var=EESSI_IGNORE_ZEN4_GCC1220_ENVVAR
setattr(self, EESSI_UNSUPPORTED_MODULE_ATTR, UnsupportedModule(envvar=var, errmsg=errmsg))
return True

# If the CUDA toolkit is a dependency, check that it supports (all) requested CUDA Compute Capabilities
# Otherwise, mark this as unsupported
cudaver = get_cuda_version(ec=self.cfg, check_deps=True, check_builddeps=True)
if cudaver:
# cuda_ccs_string is e.g. "8.0,9.0"
cuda_ccs_string = self.cfg.get_cuda_cc_template_value('cuda_compute_capabilities', required=False)
# cuda_ccs is empty if none are defined
if cuda_ccs_string:
# cuda_ccs is a comma-seperated string. Convert to list for easier handling
cuda_ccs = cuda_ccs_string.split(',')
# Check if any of the CUDA CCs is unsupported. If so, append the error
if any(
[not is_cuda_cc_supported_by_toolkit(cuda_cc=cuda_cc, toolkit_version=cudaver) for cuda_cc in cuda_ccs]
):
msg = f"Requested a CUDA Compute Capability ({cuda_ccs}) that is not supported by the CUDA "
msg += f"toolkit version ({cudaver}) used by this software. Switching to '--module-only --force' "
msg += "and injectiong an LmodError into the modulefile."
print_warning(msg)
# Use a normalized variable name for the CUDA ccs: strip any suffix, and replace commas
cuda_ccs_string = re.sub(r'[a-zA-Z]', '', cuda_ccs_string).replace(',', '_')
# Also replace periods, those are not officially supported in environment variable names
var=f"EESSI_IGNORE_CUDA_{cudaver}_CC_{cuda_ccs_string}".replace('.', '_')
errmsg = f"EasyConfigs using CUDA {cudaver} or older are not supported for (all) requested Compute "
errmsg +=f"Capabilities: {cuda_ccs}.\\n"
setattr(self, EESSI_UNSUPPORTED_MODULE_ATTR, UnsupportedModule(envvar=var,errmsg=errmsg))
return True

# If all the above logic passed, this module is supported
setattr(self, EESSI_SUPPORTED_MODULE_ATTR, True)
return False


Expand All @@ -651,18 +783,21 @@ def pre_fetch_hook_unsupported_modules(self, *args, **kwargs):

def pre_module_hook_unsupported_module(self, *args, **kwargs):
"""Make module load-able during module step"""
ignore_lmoderror_envvar = is_unsupported_module(self)
if ignore_lmoderror_envvar:
if is_unsupported_module(self):
unsup_mod = getattr(self, EESSI_UNSUPPORTED_MODULE_ATTR)
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

get the UnsupportedModule tuple, so we can use it to set the environment variable that suppresses the LmodError.

if hasattr(self, 'initial_environ'):
# Allow the module to be loaded in the module step (which uses initial environment)
print_msg(f"Setting {ignore_lmoderror_envvar} in initial environment")
self.initial_environ[ignore_lmoderror_envvar] = "1"
print_msg(f"Setting {unsup_mod.envvar} in initial environment")
self.initial_environ[unsup_mod.envvar] = "1"
extra_footer='if (not os.getenv("%s")) then LmodError("%s") end' % (unsup_mod.envvar, unsup_mod.errmsg)
# Append extra_footer if a modluafooter already exists. Otherwise, simply assign
self.cfg['modluafooter'] = self.cfg['modluafooter'] + '\n' + extra_footer if self.cfg['modluafooter'] else extra_footer


def post_module_hook_unsupported_module(self, *args, **kwargs):
"""Revert changes from pre_fetch_hook_unsupported_modules"""
ignore_lmoderror_envvar = is_unsupported_module(self)
if ignore_lmoderror_envvar:
if is_unsupported_module(self):
unsup_mod = getattr(self, EESSI_UNSUPPORTED_MODULE_ATTR)
if hasattr(self, EESSI_MODULE_ONLY_ATTR):
update_build_option('module_only', getattr(self, EESSI_MODULE_ONLY_ATTR))
print_msg("Restored original build option 'module_only' to %s" % getattr(self, EESSI_MODULE_ONLY_ATTR))
Expand All @@ -679,9 +814,9 @@ def post_module_hook_unsupported_module(self, *args, **kwargs):

# If the variable to allow loading is set, remove it
if hasattr(self, 'initial_environ'):
if self.initial_environ.get(ignore_lmoderror_envvar, False):
print_msg(f"Removing {ignore_lmoderror_envvar} in initial environment")
del self.initial_environ[ignore_lmoderror_envvar]
if self.initial_environ.get(unsup_mod.envvar, False):
print_msg(f"Removing {unsup_mod.envvar} in initial environment")
del self.initial_environ[unsup_mod.envvar]


def post_easyblock_hook_copy_easybuild_subdir(self, *args, **kwargs):
Expand All @@ -698,21 +833,30 @@ def post_easyblock_hook_copy_easybuild_subdir(self, *args, **kwargs):
copy_dir(app_easybuild_dir, app_reprod_dir)


# Modules for dependencies are loaded in the prepare step. Thus, that's where we need this variable to be set
# so that the modules can be succesfully loaded without printing the error (so that we can create a module
# _with_ the warning for the current software being installed)
def pre_prepare_hook_ignore_zen4_gcccore1220_error(self, *args, **kwargs):
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Replaced by generic pre_prepare_hook_unsupported_modules

"""Set environment variable to ignore the LmodError from parse_hook_zen4_module_only during build phase"""
if is_gcccore_1220_based(ecname=self.name, ecversion=self.version, tcname=self.toolchain.name,
tcversion=self.toolchain.version):
os.environ[EESSI_IGNORE_ZEN4_GCC1220_ENVVAR] = "1"

def pre_prepare_hook_cudnn(self, *args, **kwargs):
"""
cuDNN is a binary install, that doesn't always have the device code for the suffixed CUDA
Compute Capabilities such as 9.0a, 10.0f, 12.0f etc. This hooks strips the suffices for
cuDNN versions that don't have suffix-specific device code embedded in (all) their files,
as retaining the suffixes would lead to the EasyBuild CUDA sanity check failing.
"""

def post_prepare_hook_ignore_zen4_gcccore1220_error(self, *args, **kwargs):
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Replaced by generic post_prepare_hook_unsupported_modules

"""Unset environment variable to ignore the LmodError from parse_hook_zen4_module_only during build phase"""
if is_gcccore_1220_based(ecname=self.name, ecversion=self.version, tcname=self.toolchain.name,
tcversion=self.toolchain.version):
del os.environ[EESSI_IGNORE_ZEN4_GCC1220_ENVVAR]
if self.name == 'cuDNN':
# cuDNN 9.5.0.50 doesn't have support for 9.0a in all binaries
if self.version == "9.5.0.50":
cuda_cc = build_option('cuda_compute_capabilities')
if cuda_cc and '9.0a' in cuda_cc:
updated_cuda_cc = [v.replace('9.0a', '9.0') for v in cuda_cc]
update_build_option('cuda_compute_capabilities', updated_cuda_cc)
# cuDNN 9.10.1.4 doesn't have support for 10.0f and 12.0f in all binaries
elif self.version == "9.10.1.4":
cuda_cc = build_option('cuda_compute_capabilities')
if cuda_cc and '10.0f' in cuda_cc:
updated_cuda_cc = [v.replace('10.0f', '10.0') for v in cuda_cc]
update_build_option('cuda_compute_capabilities', updated_cuda_cc)
elif cuda_cc and '12.0f' in cuda_cc:
updated_cuda_cc = [v.replace('12.0f', '12.0') for v in cuda_cc]
update_build_option('cuda_compute_capabilities', updated_cuda_cc)


def pre_prepare_hook_highway_handle_test_compilation_issues(self, *args, **kwargs):
Expand Down Expand Up @@ -1640,6 +1784,7 @@ def post_easyblock_hook(self, *args, **kwargs):
PRE_FETCH_HOOKS = {}

PRE_PREPARE_HOOKS = {
'cuDNN': pre_prepare_hook_cudnn,
'Highway': pre_prepare_hook_highway_handle_test_compilation_issues,
'LLVM': pre_prepare_hook_llvm_a64fx,
'Rust': pre_prepare_hook_llvm_a64fx,
Expand Down
Loading