EESSI · bedroge · Aug 4, 2025 · Apr 17, 2025 · Apr 17, 2025 · Jun 30, 2025
diff --git a/app.cfg.example b/app.cfg.example
@@ -306,19 +306,78 @@ signing =
 
 [architecturetargets]
 # defines for which architectures the bot will build and what job submission
-# defines for which architectures the bot will build and what job submission
+# defines for which architectures (CPU and/or GPU) the bot can build and what job submission
-# defines for which architectures the bot will build and what job submission
+# defines for which architectures (CPU and/or GPU) the bot can build and what job submission
-#   parameters shall be used to allocate a compute node with the correct
+# parameters shall be used to allocate a compute node with the correct
-# parameters shall be used to allocate a compute node with the correct
+# parameters shall be used to allocate a compute node with the correct CPU(+GPU) architecture
-# parameters shall be used to allocate a compute node with the correct
+# parameters shall be used to allocate a compute node with the correct CPU(+GPU) architecture
+# The keys of the arch_target_map are virtual partition names. They don't have any meaning in the bot code,
-# The keys of the arch_target_map are virtual partition names. They don't have any meaning in the bot code,
+# The keys of the arch_target_map are just strings. They don't have any meaning in the bot code,
-# The keys of the arch_target_map are virtual partition names. They don't have any meaning in the bot code,
+# The keys of the arch_target_map are just strings. They don't have any meaning in the bot code,
+# and can thus be chosen as desired.
-# and can thus be chosen as desired.
+# and can thus be chosen as desired (however, they could be standardised across different bot instances run by an organisation, so users easily understand what they mean).
-# and can thus be chosen as desired.
+# and can thus be chosen as desired (however, they could be standardised across different bot instances run by an organisation, so users easily understand what they mean).
+# Note that you are responsible that ANY bot:build command ONLY matches a single virtual partition!
-# Note that you are responsible that ANY bot:build command ONLY matches a single virtual partition!
+# Note that you are responsible that ANY bot:build command ONLY matches a single key for the architecture!
-# Note that you are responsible that ANY bot:build command ONLY matches a single virtual partition!
+# Note that you are responsible that ANY bot:build command ONLY matches a single key for the architecture!
+# If multiple partitions match the same bot:build command, a failure will be triggered in the job dir preparation
 arch_target_map = {
-    "linux/x86_64/generic": "--partition x86-64-generic-node",
-    "linux/x86_64/amd/zen2": "--partition x86-64-amd-zen2-node" }
-
+    # This is a CPU-based partition. We do not specify an "accel" property explicitly. In this case, invoking the bot
+    # with ANY accelerator command will trigger a build on this partition, as long as the CPU type matches. E.g. 
+    # bot: build instance:xyz repo:eessi.io-2023.06-software arch:zen2 accel:nvidia/cc90
+    # will cause the event_filter to mark this virtual partition as a valid match, and will use it to start building
+    # for zen2 + nvidia/cc90. Thus, by not specifying an "accel" property, this partition may be used for
+    # cross-compilation for any accelerator.
+    "cpu_zen2": {
+        "os": "linux",
+        "cpu_subdir": "x86_64/amd/zen2",
+        "slurm_params": "-p rome --nodes 1 --ntasks-per-node 16 --cpus-per-task 1",
+        "repo_targets": ["eessi.io-2023.06-compat","eessi.io-2023.06-software"]
+    },
+    # This is a CPU partition. We specify an explicit "accel": "None" property. Thus, this partition will only be
+    # used if the bot build command does NOT contain an accel argument, e.g.
+    # bot: build instance:xyz repo:eessi.io-2023.06-software arch:zen4
+    # will cause the event_filter to mark this virtual partition as a valid match, and will use it to start building
+    # for zen4.
+    # When invoking the bot with an accelerator command, such as 
+    # bot: build instance:xyz repo:eessi.io-2023.06-software arch:zen4 accel:nvidia/cc90
+    # the event_filter will NOT mark this virtual partition as a valid match. This is intentional, as this particular
+    # (example) cluster has a native zen4+cc90 partition (gpu_h100) and we want this command to trigger a native build
+    # on that partition, rather than cross-compiling on this cpu_zen4 partition.
+    # One could still allow cross-compilation for other accelerator architectures, e.g. cc70 and cc80 by defining
+    # "accel": ["nvidia/cc70", "nvidia/cc80"]
+    "cpu_zen4": {
+        "os": "linux",
+        "cpu_subdir": "x86_64/amd/zen4",
+        "accel": ["None"],
+        "slurm_params": "-p genoa --nodes 1 --ntasks-per-node 24 --cpus-per-task 1",
+        "repo_targets": ["eessi.io-2023.06-compat","eessi.io-2023.06-software"]
+    },
+    # This is a GPU partition. We specify an explicit "accel" property. Thus, only if the bot build command
+    # specifies that explicit accelerator in combination with the relevant CPU type,
+    # bot: build instance:xyz repo:eessi.io-2023.06-software arch:icelake accel:nvidia/cc80
+    # will a build be triggered on this partition
+    # If you want to use this partition also for CPU only builds, you can alter the "accel" property to
+    # "accel": ["None", "nvidia/cc80"]
+    "gpu_a100": {
+        "os": "linux",
+        "cpu_subdir": "x86_64/intel/icelake",
+        "accel": ["nvidia/cc80"],
+        "slurm_params": "-p gpu_a100 --nodes 1 --tasks-per-node 18 --cpus-per-task 1 --gpus-per-node 1",
+        "repo_targets": ["eessi.io-2023.06-compat","eessi.io-2023.06-software"]
+    },
+    # This is a GPU partition. We specify an explicit "accel" property. Thus, only if the bot build command
+    # specifies that explicit accelerator in combination with the relevant CPU type,
+    # bot: build instance:xyz repo:eessi.io-2023.06-software arch:zen4 accel:nvidia/cc90
+    # will a build be triggered on this partition
+    # If you want to use this partition also for cross-compiling for cc70 and cc80 architectures, you can alter
+    # the "accel" property to 
+    # "accel": ["nvidia/cc70", "nvidia/cc80", "nvidia/cc90"]
+    # Note that setting:
+    # "accel": ["None", "nvidia/cc90"]
+    # is invalid here, since it would lead to both the cpu_zen4 and the gpu_h100 partitions matching the build command
+    # bot: build instance:xyz repo:eessi.io-2023.06-software arch:zen4 accel:nvidia/cc90
+    # This would cause the same job dir to be prepared twice, for different virtual partitions, which will lead
+    # to an error in the job preparation step
+    "gpu_h100": {
+        "os": "linux",
+        "cpu_subdir": "x86_64/amd/zen4",
+        "accel": ["nvidia/cc90"],
+        "slurm_params": "-p gpu_h100 --nodes 1 --tasks-per-node 16 --cpus-per-task 1 --gpus-per-node 1",
+        "repo_targets": ["eessi.io-2023.06-compat","eessi.io-2023.06-software"]
+    }}
 
 [repo_targets]
-# defines for which repository a arch_target should be build for
-#
-# EESSI/2023.06 and EESSI/2025.06
-repo_target_map = {
-    "linux/x86_64/amd/zen2" : ["eessi.io-2023.06-software","eessi.io-2025.06-software"] }
-
 # points to definition of repositories (default repository defined by build container)
 repos_cfg_dir = PATH_TO_SHARED_DIRECTORY/repos
 

diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py
@@ -29,8 +29,8 @@
 
 # Local application imports (anything from EESSI/eessi-bot-software-layer)
 from connections import github
-from tasks.build import check_build_permission, get_architecture_targets, get_repo_cfg, \
-    request_bot_build_issue_comments, submit_build_jobs
+from tasks.build import check_build_permission, get_architecture_targets, request_bot_build_issue_comments, \
+    submit_build_jobs
 from tasks.deploy import deploy_built_artefacts, determine_job_dirs
 from tasks.clean_up import move_to_trash_bin
 from tools import config
@@ -104,7 +104,6 @@
     config.SECTION_JOB_MANAGER: [
         config.JOB_MANAGER_SETTING_POLL_INTERVAL],                 # required
     config.SECTION_REPO_TARGETS: [
-        config.REPO_TARGETS_SETTING_REPO_TARGET_MAP,               # required
         config.REPO_TARGETS_SETTING_REPOS_CFG_DIR],                # required
     config.SECTION_SUBMITTED_JOB_COMMENTS: [
         config.SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT,     # required
@@ -412,22 +411,22 @@ def handle_pull_request_opened_event(self, event_info, pr, req_chatlevel=ChatLev
         # TODO check if PR already has a comment with arch targets and
         # repositories
         arch_map = get_architecture_targets(self.cfg)
-        repo_cfg = get_repo_cfg(self.cfg)
 
         comment = f"Instance `{app_name}` is configured to build for:"
-        architectures = ['/'.join(arch.split('/')[1:]) for arch in arch_map.keys()]
-        comment += "\n- architectures: "
-        if len(architectures) > 0:
-            comment += f"{', '.join([f'`{arch}`' for arch in architectures])}"
-        else:
-            comment += "none"
-        repositories = list(set([repo_id for repo_ids in repo_cfg[config.REPO_TARGETS_SETTING_REPO_TARGET_MAP].values()
-                            for repo_id in repo_ids]))
-        comment += "\n- repositories: "
-        if len(repositories) > 0:
-            comment += f"{', '.join([f'`{repo_id}`' for repo_id in repositories])}"
-        else:
-            comment += "none"
+        for partition_num, arch in enumerate(arch_map):
+            # Do not print virtual partition names, a bot admin may not want to share those
+            # Instead, just number them
+            comment += f"\n- Partition {partition_num+1}:"
+            current_partition = arch_map[arch]
+            if "os" in current_partition:
+                comment += f"\n  - OS: {current_partition['os']}"
+            if "cpu_subdir" in current_partition:
+                comment += f"\n  - CPU architecture: {current_partition['cpu_subdir']}"
+            if "repo_targets" in current_partition:
+                comment += f"\n  - Repositories: {current_partition['repo_targets']}"
+            if "accel" in current_partition:
+                comment += f"\n  - Accelerators: {current_partition['accel']}"
+            comment += "\n"
 
         self.log(f"PR opened: comment '{comment}'")
 

diff --git a/tasks/build.py b/tasks/build.py
@@ -241,8 +241,6 @@ def get_repo_cfg(cfg):
     Returns:
         (dict): dictionary containing repository settings as follows
            - {config.REPO_TARGETS_SETTING_REPOS_CFG_DIR: path to repository config directory as defined in 'app.cfg'}
-           - {config.REPO_TARGETS_SETTING_REPO_TARGET_MAP: json of
-               config.REPO_TARGETS_SETTING_REPO_TARGET_MAP value as defined in 'app.cfg'}
            - for all sections [repo_id] defined in config.REPO_TARGETS_SETTING_REPOS_CFG_DIR/repos.cfg add a
              mapping {repo_id: dictionary containing settings of that section}
     """
@@ -259,21 +257,6 @@ def get_repo_cfg(cfg):
     settings_repos_cfg_dir = config.REPO_TARGETS_SETTING_REPOS_CFG_DIR
     repo_cfg[settings_repos_cfg_dir] = repo_cfg_org.get(settings_repos_cfg_dir, None)
 
-    repo_map = {}
-    try:
-        repo_map_str = repo_cfg_org.get(config.REPO_TARGETS_SETTING_REPO_TARGET_MAP)
-        log(f"{fn}(): repo_map '{repo_map_str}'")
-
-        if repo_map_str is not None:
-            repo_map = json.loads(repo_map_str)
-
-        log(f"{fn}(): repo_map '{json.dumps(repo_map)}'")
-    except json.JSONDecodeError as err:
-        print(err)
-        error(f"{fn}(): Value for repo_map ({repo_map_str}) could not be decoded.")
-
-    repo_cfg[config.REPO_TARGETS_SETTING_REPO_TARGET_MAP] = repo_map
-
     if repo_cfg[config.REPO_TARGETS_SETTING_REPOS_CFG_DIR] is None:
         return repo_cfg
 
@@ -627,13 +610,34 @@ def prepare_jobs(pr, cfg, event_info, action_filter):
             return []
 
     jobs = []
-    for arch, slurm_opt in arch_map.items():
-        arch_dir = arch.replace('/', '_')
-        # check if repo_target_map contains an entry for {arch}
-        if arch not in repocfg[config.REPO_TARGETS_SETTING_REPO_TARGET_MAP]:
-            log(f"{fn}(): skipping arch {arch} because repo target map does not define repositories to build for")
+    # This loop assumes the following structure for arch_target_map
+    # Note that 'accel' is a list, to easily allow a single CPU partition to be used for cross compilation
+    # for a lot of accelerator targets
+    # arch_target_map = {
+    #     'virtual_partition_name': {
+    #         'os': 'linux',
+    #         'cpu_subdir': 'x86_64/amd/zen4',
+    #         'accel': ['nvidia/cc90'],
+    #         'slurm_params': '-p genoa <etc>',
+    #         'repo_targets': ["eessi.io-2023.06-compat","eessi.io-2023.06-software"],
+    #      },
+    #     'virtual_partition_name2': {
+    # ... etc
+    for virtual_partition_name, partition_info in arch_map.items():
+        log(f"{fn}(): virtual_partition_name is {virtual_partition_name}, partition_info is {partition_info}")
+        # Unpack for convenience
+        arch_dir = partition_info['cpu_subdir']
+        if 'accel' in partition_info and accelerator is not None:
+            # Use the accelerator as defined by the action_filter. We check if this is valid for the current
+            # virtual partition later
+            arch_dir += accelerator
+        arch_dir.replace('/', '_')
+        # check if repo_targets is defined for this virtual partition
+        if 'repo_targets' not in partition_info:
+            log(f"{fn}(): skipping arch {virtual_partition_name}, "
+                "because no repo_targets were defined for this (virtual) partition")
             continue
-        for repo_id in repocfg[config.REPO_TARGETS_SETTING_REPO_TARGET_MAP][arch]:
+        for repo_id in partition_info['repo_targets']:
             # ensure repocfg contains information about the repository repo_id if repo_id != EESSI
             # Note, EESSI is a bad/misleading name, it should be more like AS_IN_CONTAINER
             if (repo_id != "EESSI" and repo_id != "EESSI-pilot") and repo_id not in repocfg:
@@ -646,13 +650,40 @@ def prepare_jobs(pr, cfg, event_info, action_filter):
             #   false --> log & continue to next iteration of for loop
             if action_filter:
                 log(f"{fn}(): checking filter {action_filter.to_string()}")
-                context = {"architecture": arch, "repository": repo_id, "instance": app_name}
-                log(f"{fn}(): context is '{json.dumps(context, indent=4)}'")
-                if not action_filter.check_filters(context):
-                    log(f"{fn}(): context does NOT satisfy filter(s), skipping")
-                    continue
+                context = {
+                    "architecture": partition_info['cpu_subdir'],
+                    "repository": repo_id,
+                    "instance": app_name
+                }
+                # Optionally add accelerator to the context
+                if 'accel' in partition_info:
+                    match = False
+                    # Create a context for each accelerator defined in app.cfg, then
+                    # check if _any_ of them is valid (one is enough to continue)
+                    for accel in partition_info['accel']:
+                        context['accelerator'] = accel
+                        log(f"{fn}(): context is '{json.dumps(context, indent=4)}'")
+                        if not action_filter.check_filters(context):
+                            log(f"{fn}(): context does NOT satisfy filter(s), skipping")
+                            continue
+                        # check = check | action_filter.check_filters(context)
+                        else:
+                            log(f"{fn}(): context DOES satisfy filter(s), going on with job")
+                            match = True
+                            # Break as soon as we have found a valid context, it means the build args are valid
+                            # for at least one of the accelerators in this virtual partition, that's enough
+                            break
+                    # If we get to this point, and none of the contexts matched the filter, we should continue to the
+                    # next iteration of the partition_info['repo_targets'] loop
+                    if not match:
+                        continue
                 else:
-                    log(f"{fn}(): context DOES satisfy filter(s), going on with job")
+                    log(f"{fn}(): context is '{json.dumps(context, indent=4)}'")
+                    if not action_filter.check_filters(context):
+                        log(f"{fn}(): context does NOT satisfy filter(s), skipping")
+                        continue
+                    else:
+                        log(f"{fn}(): context DOES satisfy filter(s), going on with job")
             # we reached this point when the filter matched (otherwise we
             # 'continue' with the next repository)
             # for each match of the filter we create a specific job directory
@@ -673,19 +704,23 @@ def prepare_jobs(pr, cfg, event_info, action_filter):
                 )
             comment_download_pr(base_repo_name, pr, download_pr_exit_code, download_pr_error, error_stage)
             # prepare job configuration file 'job.cfg' in directory <job_dir>/cfg
-            cpu_target = '/'.join(arch.split('/')[1:])
-            os_type = arch.split('/')[0]
-
-            log(f"{fn}(): arch = '{arch}' => cpu_target = '{cpu_target}' , os_type = '{os_type}'"
-                f", accelerator = '{accelerator}'")
+            msg = f"{fn}(): virtual partition = '{virtual_partition_name}' => "
+            msg += f"configured cpu_target = '{partition_info['cpu_subdir']}' , "
+            msg += f"configured os = '{partition_info['os']}', "
+            if 'accel' in partition_info:
+                msg += f"configured accelerator(s) = '{partition_info['accel']}, "
+            msg += f"requested accelerator = '{accelerator}'"
+            log(msg)
 
-            prepare_job_cfg(job_dir, build_env_cfg, repocfg, repo_id, cpu_target, os_type, accelerator)
+            prepare_job_cfg(job_dir, build_env_cfg, repocfg, repo_id, partition_info['cpu_subdir'],
+                            partition_info['os'], accelerator)
 
             if exportvars:
                 prepare_export_vars_file(job_dir, exportvars)
 
             # enlist jobs to proceed
-            job = Job(job_dir, arch, repo_id, slurm_opt, year_month, pr_id, accelerator)
+            job = Job(job_dir, partition_info['cpu_subdir'], repo_id, partition_info['slurm_params'], year_month,
+                      pr_id, accelerator)
             jobs.append(job)
 
     log(f"{fn}(): {len(jobs)} jobs to proceed after applying white list")

diff --git a/tools/config.py b/tools/config.py
@@ -110,7 +110,6 @@
 NEW_JOB_COMMENTS_SETTING_AWAITS_LAUNCH = 'awaits_launch'
 
 SECTION_REPO_TARGETS = 'repo_targets'
-REPO_TARGETS_SETTING_REPO_TARGET_MAP = 'repo_target_map'
 REPO_TARGETS_SETTING_REPOS_CFG_DIR = 'repos_cfg_dir'
 
 SECTION_RUNNING_JOB_COMMENTS = 'running_job_comments'

diff --git a/tools/filter.py b/tools/filter.py
@@ -303,4 +303,17 @@ def check_filters(self, context):
                 else:
                     check = False
                     break
+
+        # If the context declares an accelerator, but the build command did not (i.e. no action filter is defined
+        # for the accelerator component) then the check should only return True if "None" was the accelerator defined
+        # in the context. This ensures that no CPU-only builds are done on accelerated partitions, unless these
+        # partitions are explicitly configured with "None" as _one_ of the valid accelerators in their `accel:` list
+        # in app.cfg
+        if (
+            FILTER_COMPONENT_ACCEL in context and not
+            any(af.component == FILTER_COMPONENT_ACCEL for af in self.action_filters)
+        ):
+            if not context[FILTER_COMPONENT_ACCEL] == "None":
+                check = False
+
         return check