From 4415afc3ac65aba3df5cb9f3cebc9ef72dc227ef Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Thu, 17 Apr 2025 16:42:10 +0200 Subject: [PATCH 01/47] Make a first attempt to change the expected arch_target_map. Make sure keys don't have meaning, and all meaning is in the values --- tasks/build.py | 70 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 54 insertions(+), 16 deletions(-) diff --git a/tasks/build.py b/tasks/build.py index 7a0b1c83..bba65e76 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -600,13 +600,35 @@ def prepare_jobs(pr, cfg, event_info, action_filter): return [] jobs = [] - for arch, slurm_opt in arch_map.items(): - arch_dir = arch.replace('/', '_') + # This loop assumes the following structure for arch_target_map: + # arch_target_map = { + # 'virtual_partition_name': { + # 'os': 'linux', + # 'cpu_subdir': 'x86_64/amd/zen4', + # 'accel': ['nvidia/cc90'], # Make this a list, so that we can easily cross compile for a large list with one defined virtual partition + # 'slurm_params': '-p genoa ', + # 'repo_targets': ["eessi.io-2023.06-compat","eessi.io-2023.06-software"], + # }, + # 'virtual_partition_name2': { + # ... etc + for virtual_partition_name, partition_info in arch_map.items(): + # Unpack for convenience + arch_dir = partition_info['cpu_subdir'] + if partition_info['accel']: + # Use the accelerator as defined by the action_filter. We check if this is valid for the current + # virtual partition later + arch_dir += accelerator + arch_dir.replace('/', '_') + # check if repo_targets is defined for this virtual partition + if not 'repo_targets' in partition_info: + log(f"{fn}(): skipping arch {virtual_partition_name}, " + "because no repo_targets were defined for this (virtual) partition") + continue # check if repo_target_map contains an entry for {arch} if arch not in repocfg[config.REPO_TARGETS_SETTING_REPO_TARGET_MAP]: log(f"{fn}(): skipping arch {arch} because repo target map does not define repositories to build for") continue - for repo_id in repocfg[config.REPO_TARGETS_SETTING_REPO_TARGET_MAP][arch]: + for repo_id in partition_info['repo_targets']: # ensure repocfg contains information about the repository repo_id if repo_id != EESSI # Note, EESSI is a bad/misleading name, it should be more like AS_IN_CONTAINER if (repo_id != "EESSI" and repo_id != "EESSI-pilot") and repo_id not in repocfg: @@ -619,13 +641,30 @@ def prepare_jobs(pr, cfg, event_info, action_filter): # false --> log & continue to next iteration of for loop if action_filter: log(f"{fn}(): checking filter {action_filter.to_string()}") - context = {"architecture": arch, "repository": repo_id, "instance": app_name} - log(f"{fn}(): context is '{json.dumps(context, indent=4)}'") - if not action_filter.check_filters(context): - log(f"{fn}(): context does NOT satisfy filter(s), skipping") - continue + context = { + "architecture": partition_info['cpu_subdir'], + "repository": repo_id, + "instance": app_name + } + # Optionally add accelerator to the context + check = False + if partition_info['accel']: + # Create a context for each accelerator, check if _any_ of them is valid + # (one is enough to continue) + for accel in partition_info['accel']: + context['accelerator'] = accel + log(f"{fn}(): context is '{json.dumps(context, indent=4)}'") + check = check | action_filter.check_filters(context) + if not check: + log(f"{fn}(): none of the contexts satisfy filter(s), skipping") + continue else: - log(f"{fn}(): context DOES satisfy filter(s), going on with job") + log(f"{fn}(): context is '{json.dumps(context, indent=4)}'") + if not action_filter.check_filters(context): + log(f"{fn}(): context does NOT satisfy filter(s), skipping") + continue + else: + log(f"{fn}(): context DOES satisfy filter(s), going on with job") # we reached this point when the filter matched (otherwise we # 'continue' with the next repository) # for each match of the filter we create a specific job directory @@ -645,19 +684,18 @@ def prepare_jobs(pr, cfg, event_info, action_filter): ) comment_download_pr(base_repo_name, pr, download_pr_exit_code, download_pr_error, error_stage) # prepare job configuration file 'job.cfg' in directory /cfg - cpu_target = '/'.join(arch.split('/')[1:]) - os_type = arch.split('/')[0] - - log(f"{fn}(): arch = '{arch}' => cpu_target = '{cpu_target}' , os_type = '{os_type}'" - f", accelerator = '{accelerator}'") + log(f"{fn}(): arch = '{arch}' => cpu_target = '{partition_info['cpu_subdir']}' , " + f"os_type = '{partition_info['os']}', accelerator = '{accelerator}'") - prepare_job_cfg(job_dir, build_env_cfg, repocfg, repo_id, cpu_target, os_type, accelerator) + prepare_job_cfg(job_dir, build_env_cfg, repocfg, repo_id, partition_info['cpu_subdir'], + partition_info['os'], accelerator) if exportvars: prepare_export_vars_file(job_dir, exportvars) # enlist jobs to proceed - job = Job(job_dir, arch, repo_id, slurm_opt, year_month, pr_id, accelerator) + job = Job(job_dir, partition_info['cpu_subdir'], repo_id, partition_info['slurm_params'], year_month, + pr_id, accelerator) jobs.append(job) log(f"{fn}(): {len(jobs)} jobs to proceed after applying white list") From d3240e4738fcfe4884b32c1c41d22350447a3680 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Thu, 17 Apr 2025 16:45:07 +0200 Subject: [PATCH 02/47] Remove old code that was replaced --- tasks/build.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tasks/build.py b/tasks/build.py index bba65e76..e4e9dfbc 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -624,10 +624,6 @@ def prepare_jobs(pr, cfg, event_info, action_filter): log(f"{fn}(): skipping arch {virtual_partition_name}, " "because no repo_targets were defined for this (virtual) partition") continue - # check if repo_target_map contains an entry for {arch} - if arch not in repocfg[config.REPO_TARGETS_SETTING_REPO_TARGET_MAP]: - log(f"{fn}(): skipping arch {arch} because repo target map does not define repositories to build for") - continue for repo_id in partition_info['repo_targets']: # ensure repocfg contains information about the repository repo_id if repo_id != EESSI # Note, EESSI is a bad/misleading name, it should be more like AS_IN_CONTAINER From b54fdf4d19695d4a7052d29a9e289b5a2fa6f71e Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Mon, 30 Jun 2025 13:57:19 +0200 Subject: [PATCH 03/47] Fix xome small issues with non-existing keys --- tasks/build.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tasks/build.py b/tasks/build.py index 6fdf2fac..168e3246 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -638,10 +638,14 @@ def prepare_jobs(pr, cfg, event_info, action_filter): # }, # 'virtual_partition_name2': { # ... etc + # DEBUG LOGGING + log(f"arch_map: {arch_map})") for virtual_partition_name, partition_info in arch_map.items(): + # DEBUG LOGGING + log(f"virtual_partition_name: {virtual_partition_name}, partition_info={partition_info}") # Unpack for convenience arch_dir = partition_info['cpu_subdir'] - if partition_info['accel']: + if 'accel' in partition_info: # Use the accelerator as defined by the action_filter. We check if this is valid for the current # virtual partition later arch_dir += accelerator @@ -671,7 +675,7 @@ def prepare_jobs(pr, cfg, event_info, action_filter): } # Optionally add accelerator to the context check = False - if partition_info['accel']: + if 'accel' in partition_info: # Create a context for each accelerator, check if _any_ of them is valid # (one is enough to continue) for accel in partition_info['accel']: @@ -708,7 +712,7 @@ def prepare_jobs(pr, cfg, event_info, action_filter): ) comment_download_pr(base_repo_name, pr, download_pr_exit_code, download_pr_error, error_stage) # prepare job configuration file 'job.cfg' in directory /cfg - log(f"{fn}(): arch = '{arch}' => cpu_target = '{partition_info['cpu_subdir']}' , " + log(f"{fn}(): virtual partition = '{virtual_partition_name}' => cpu_target = '{partition_info['cpu_subdir']}' , " f"os_type = '{partition_info['os']}', accelerator = '{accelerator}'") prepare_job_cfg(job_dir, build_env_cfg, repocfg, repo_id, partition_info['cpu_subdir'], From 1e6c3d9f2b2eee991b27e1e8c8c7199ce27e7912 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Mon, 30 Jun 2025 17:32:24 +0200 Subject: [PATCH 04/47] Avoid doing string += None for the arch_dir if accelerator = None. Also, make sure to print the same information to the logs, regardless of whether this is about a partition that has accelerators defined or not. Finally, make sure that if we have not hit a match by the end of the loop over all accelerators, we continue to the next iteration of the loop over the repo-targets, so that the job-dir preparation is skipped for the current one --- tasks/build.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/tasks/build.py b/tasks/build.py index 168e3246..757423bd 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -645,7 +645,7 @@ def prepare_jobs(pr, cfg, event_info, action_filter): log(f"virtual_partition_name: {virtual_partition_name}, partition_info={partition_info}") # Unpack for convenience arch_dir = partition_info['cpu_subdir'] - if 'accel' in partition_info: + if 'accel' in partition_info and accelerator is not None: # Use the accelerator as defined by the action_filter. We check if this is valid for the current # virtual partition later arch_dir += accelerator @@ -676,14 +676,26 @@ def prepare_jobs(pr, cfg, event_info, action_filter): # Optionally add accelerator to the context check = False if 'accel' in partition_info: - # Create a context for each accelerator, check if _any_ of them is valid - # (one is enough to continue) + match = False + # Create a context for each accelerator defined in app.cfg, then + # check if _any_ of them is valid (one is enough to continue) for accel in partition_info['accel']: context['accelerator'] = accel log(f"{fn}(): context is '{json.dumps(context, indent=4)}'") - check = check | action_filter.check_filters(context) - if not check: - log(f"{fn}(): none of the contexts satisfy filter(s), skipping") + # TODO: it seems the check_filters does not enforce the accelerator to be present in the context - that should be implemented + if not action_filter.check_filters(context): + log(f"{fn}(): context does NOT satisfy filter(s), skipping") + continue + # check = check | action_filter.check_filters(context) + else: + log(f"{fn}(): context DOES satisfy filter(s), going on with job") + match = True + # Break as soon as we have found a valid context, it means the build args are valid + # for at least one of the accelerators in this virtual partition, that's enough + break + # If we get to this point, and none of the contexts matched the filter, we should continue to the + # next iteration of the partition_info['repo_targets'] loop + if not match: continue else: log(f"{fn}(): context is '{json.dumps(context, indent=4)}'") @@ -713,7 +725,7 @@ def prepare_jobs(pr, cfg, event_info, action_filter): comment_download_pr(base_repo_name, pr, download_pr_exit_code, download_pr_error, error_stage) # prepare job configuration file 'job.cfg' in directory /cfg log(f"{fn}(): virtual partition = '{virtual_partition_name}' => cpu_target = '{partition_info['cpu_subdir']}' , " - f"os_type = '{partition_info['os']}', accelerator = '{accelerator}'") + f"os_type = '{partition_info['os']}', requested accelerator = '{accelerator}'") prepare_job_cfg(job_dir, build_env_cfg, repocfg, repo_id, partition_info['cpu_subdir'], partition_info['os'], accelerator) From e000c91768dbdde73e0025985edf21cf0b2a9d70 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Mon, 30 Jun 2025 17:33:54 +0200 Subject: [PATCH 05/47] Make sure that if the context (i.e. app.cfg) defines AN accelerator, AND if the build command does NOT, we explicitely check that the defined accelerator is 'None'. This allows skipping CPU-only builds on accelerated partitions. --- tools/filter.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/filter.py b/tools/filter.py index 0caa2af8..9665c7c5 100644 --- a/tools/filter.py +++ b/tools/filter.py @@ -303,4 +303,16 @@ def check_filters(self, context): else: check = False break + + # If the context declares an accelerator, but the build command did not (i.e. no action filter is defined for the accelerator component) + # then the check should only return True if "None" was the accelerator defined in the context + # This ensures that on partitions that no CPU-only builds are done on accelerated partitions, unless these partitions are + # explicitly configured with "None" as _one_ of the valid accelerators in their `accel:` list in app.cfg + if ( + FILTER_COMPONENT_ACCEL in context and not + any(af.component == FILTER_COMPONENT_ACCEL for af in self.action_filters) + ): + if not context[FILTER_COMPONENT_ACCEL] == "None": + check = False + return check From 40ceefe938a0f831f7122c0c06865ee69e16626b Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Mon, 30 Jun 2025 17:35:07 +0200 Subject: [PATCH 06/47] Some cleanup --- tasks/build.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tasks/build.py b/tasks/build.py index 757423bd..30cfcabc 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -638,11 +638,8 @@ def prepare_jobs(pr, cfg, event_info, action_filter): # }, # 'virtual_partition_name2': { # ... etc - # DEBUG LOGGING - log(f"arch_map: {arch_map})") for virtual_partition_name, partition_info in arch_map.items(): - # DEBUG LOGGING - log(f"virtual_partition_name: {virtual_partition_name}, partition_info={partition_info}") + log(f"{fn}(): virtual_partition_name is {virtual_partition_name}, partition_info is {partition_info}") # Unpack for convenience arch_dir = partition_info['cpu_subdir'] if 'accel' in partition_info and accelerator is not None: @@ -682,7 +679,6 @@ def prepare_jobs(pr, cfg, event_info, action_filter): for accel in partition_info['accel']: context['accelerator'] = accel log(f"{fn}(): context is '{json.dumps(context, indent=4)}'") - # TODO: it seems the check_filters does not enforce the accelerator to be present in the context - that should be implemented if not action_filter.check_filters(context): log(f"{fn}(): context does NOT satisfy filter(s), skipping") continue From fad4f47babc490cb8e8711ec565f993e22ae8931 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 1 Jul 2025 16:54:50 +0200 Subject: [PATCH 07/47] Remove repo_target_map from config, and all occurences that import it, as it is now replaced by the repo list in the arch_target_map. Also, fix hound issues --- app.cfg.example | 79 +++++++++++++++++++++++++++++++++----- eessi_bot_event_handler.py | 26 ++++++------- tasks/build.py | 37 +++++++----------- tools/config.py | 1 - tools/filter.py | 9 +++-- 5 files changed, 98 insertions(+), 54 deletions(-) diff --git a/app.cfg.example b/app.cfg.example index 63905832..bf7911b5 100644 --- a/app.cfg.example +++ b/app.cfg.example @@ -306,19 +306,78 @@ signing = [architecturetargets] # defines for which architectures the bot will build and what job submission -# parameters shall be used to allocate a compute node with the correct +# parameters shall be used to allocate a compute node with the correct +# The keys of the arch_target_map are virtual partition names. They don't have any meaning in the bot code, +# and can thus be chosen as desired. +# Note that you are responsible that ANY bot:build command ONLY matches a single virtual partition! +# If multiple partitions match the same bot:build command, a failure will be triggered in the job dir preparation arch_target_map = { - "linux/x86_64/generic": "--partition x86-64-generic-node", - "linux/x86_64/amd/zen2": "--partition x86-64-amd-zen2-node" } - + # This is a CPU-based partition. We do not specify an "accel" property explicitly. In this case, invoking the bot + # with ANY accelerator command will trigger a build on this partition, as long as the CPU type matches. E.g. + # bot: build instance:xyz repo:eessi.io-2023.06-software arch:zen2 accel:nvidia/cc90 + # will cause the event_filter to mark this virtual partition as a valid match, and will use it to start building + # for zen2 + nvidia/cc90. Thus, by not specifying an "accel" property, this partition may be used for + # cross-compilation for any accelerator. + "cpu_zen2": { + "os": "linux", + "cpu_subdir": "x86_64/amd/zen2", + "slurm_params": "-p rome --nodes 1 --ntasks-per-node 16 --cpus-per-task 1", + "repo_targets": ["eessi.io-2023.06-compat","eessi.io-2023.06-software"] + }, + # This is a CPU partition. We specify an explicit "accel": "None" property. Thus, this partition will only be + # used if the bot build command does NOT contain an accel argument, e.g. + # bot: build instance:xyz repo:eessi.io-2023.06-software arch:zen4 + # will cause the event_filter to mark this virtual partition as a valid match, and will use it to start building + # for zen4. + # When invoking the bot with an accelerator command, such as + # bot: build instance:xyz repo:eessi.io-2023.06-software arch:zen4 accel:nvidia/cc90 + # the event_filter will NOT mark this virtual partition as a valid match. This is intentional, as this particular + # (example) cluster has a native zen4+cc90 partition (gpu_h100) and we want this command to trigger a native build + # on that partition, rather than cross-compiling on this cpu_zen4 partition. + # One could still allow cross-compilation for other accelerator architectures, e.g. cc70 and cc80 by defining + # "accel": ["nvidia/cc70", "nvidia/cc80"] + "cpu_zen4": { + "os": "linux", + "cpu_subdir": "x86_64/amd/zen4", + "accel": ["None"], + "slurm_params": "-p genoa --nodes 1 --ntasks-per-node 24 --cpus-per-task 1", + "repo_targets": ["eessi.io-2023.06-compat","eessi.io-2023.06-software"] + }, + # This is a GPU partition. We specify an explicit "accel" property. Thus, only if the bot build command + # specifies that explicit accelerator in combination with the relevant CPU type, + # bot: build instance:xyz repo:eessi.io-2023.06-software arch:icelake accel:nvidia/cc80 + # will a build be triggered on this partition + # If you want to use this partition also for CPU only builds, you can alter the "accel" property to + # "accel": ["None", "nvidia/cc80"] + "gpu_a100": { + "os": "linux", + "cpu_subdir": "x86_64/intel/icelake", + "accel": ["nvidia/cc80"], + "slurm_params": "-p gpu_a100 --nodes 1 --tasks-per-node 18 --cpus-per-task 1 --gpus-per-node 1", + "repo_targets": ["eessi.io-2023.06-compat","eessi.io-2023.06-software"] + }, + # This is a GPU partition. We specify an explicit "accel" property. Thus, only if the bot build command + # specifies that explicit accelerator in combination with the relevant CPU type, + # bot: build instance:xyz repo:eessi.io-2023.06-software arch:zen4 accel:nvidia/cc90 + # will a build be triggered on this partition + # If you want to use this partition also for cross-compiling for cc70 and cc80 architectures, you can alter + # the "accel" property to + # "accel": ["nvidia/cc70", "nvidia/cc80", "nvidia/cc90"] + # Note that setting: + # "accel": ["None", "nvidia/cc90"] + # is invalid here, since it would lead to both the cpu_zen4 and the gpu_h100 partitions matching the build command + # bot: build instance:xyz repo:eessi.io-2023.06-software arch:zen4 accel:nvidia/cc90 + # This would cause the same job dir to be prepared twice, for different virtual partitions, which will lead + # to an error in the job preparation step + "gpu_h100": { + "os": "linux", + "cpu_subdir": "x86_64/amd/zen4", + "accel": ["nvidia/cc90"], + "slurm_params": "-p gpu_h100 --nodes 1 --tasks-per-node 16 --cpus-per-task 1 --gpus-per-node 1", + "repo_targets": ["eessi.io-2023.06-compat","eessi.io-2023.06-software"] + }} [repo_targets] -# defines for which repository a arch_target should be build for -# -# EESSI/2023.06 and EESSI/2025.06 -repo_target_map = { - "linux/x86_64/amd/zen2" : ["eessi.io-2023.06-software","eessi.io-2025.06-software"] } - # points to definition of repositories (default repository defined by build container) repos_cfg_dir = PATH_TO_SHARED_DIRECTORY/repos diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index b1d4123a..df777914 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -104,7 +104,6 @@ config.SECTION_JOB_MANAGER: [ config.JOB_MANAGER_SETTING_POLL_INTERVAL], # required config.SECTION_REPO_TARGETS: [ - config.REPO_TARGETS_SETTING_REPO_TARGET_MAP, # required config.REPO_TARGETS_SETTING_REPOS_CFG_DIR], # required config.SECTION_SUBMITTED_JOB_COMMENTS: [ config.SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT, # required @@ -412,22 +411,19 @@ def handle_pull_request_opened_event(self, event_info, pr, req_chatlevel=ChatLev # TODO check if PR already has a comment with arch targets and # repositories arch_map = get_architecture_targets(self.cfg) - repo_cfg = get_repo_cfg(self.cfg) comment = f"Instance `{app_name}` is configured to build for:" - architectures = ['/'.join(arch.split('/')[1:]) for arch in arch_map.keys()] - comment += "\n- architectures: " - if len(architectures) > 0: - comment += f"{', '.join([f'`{arch}`' for arch in architectures])}" - else: - comment += "none" - repositories = list(set([repo_id for repo_ids in repo_cfg[config.REPO_TARGETS_SETTING_REPO_TARGET_MAP].values() - for repo_id in repo_ids])) - comment += "\n- repositories: " - if len(repositories) > 0: - comment += f"{', '.join([f'`{repo_id}`' for repo_id in repositories])}" - else: - comment += "none" + for partition_num, arch in enumerate(arch_map): + comment += f"\n- partition {partition_num+1}:" + if "os" in arch: + comment += f"\n - os: {arch[os]}" + if "cpu_subdir" in arch: + comment += f"\n - architecture: {arch[cpu_subdir]}" + if "repo_targets" in arch: + comment += f"\n - repositories: {arch[repo_targets]}" + if "accel" in arch: + comment += f"\n - accelerators: {arch[accel]}" + comment += "\n" self.log(f"PR opened: comment '{comment}'") diff --git a/tasks/build.py b/tasks/build.py index 30cfcabc..bc5e5fb3 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -241,8 +241,6 @@ def get_repo_cfg(cfg): Returns: (dict): dictionary containing repository settings as follows - {config.REPO_TARGETS_SETTING_REPOS_CFG_DIR: path to repository config directory as defined in 'app.cfg'} - - {config.REPO_TARGETS_SETTING_REPO_TARGET_MAP: json of - config.REPO_TARGETS_SETTING_REPO_TARGET_MAP value as defined in 'app.cfg'} - for all sections [repo_id] defined in config.REPO_TARGETS_SETTING_REPOS_CFG_DIR/repos.cfg add a mapping {repo_id: dictionary containing settings of that section} """ @@ -259,21 +257,6 @@ def get_repo_cfg(cfg): settings_repos_cfg_dir = config.REPO_TARGETS_SETTING_REPOS_CFG_DIR repo_cfg[settings_repos_cfg_dir] = repo_cfg_org.get(settings_repos_cfg_dir, None) - repo_map = {} - try: - repo_map_str = repo_cfg_org.get(config.REPO_TARGETS_SETTING_REPO_TARGET_MAP) - log(f"{fn}(): repo_map '{repo_map_str}'") - - if repo_map_str is not None: - repo_map = json.loads(repo_map_str) - - log(f"{fn}(): repo_map '{json.dumps(repo_map)}'") - except json.JSONDecodeError as err: - print(err) - error(f"{fn}(): Value for repo_map ({repo_map_str}) could not be decoded.") - - repo_cfg[config.REPO_TARGETS_SETTING_REPO_TARGET_MAP] = repo_map - if repo_cfg[config.REPO_TARGETS_SETTING_REPOS_CFG_DIR] is None: return repo_cfg @@ -627,12 +610,14 @@ def prepare_jobs(pr, cfg, event_info, action_filter): return [] jobs = [] - # This loop assumes the following structure for arch_target_map: + # This loop assumes the following structure for arch_target_map + # Note that 'accel' is a list, to easily allow a single CPU partition to be used for cross compilation + # for a lot of accelerator targets # arch_target_map = { # 'virtual_partition_name': { # 'os': 'linux', # 'cpu_subdir': 'x86_64/amd/zen4', - # 'accel': ['nvidia/cc90'], # Make this a list, so that we can easily cross compile for a large list with one defined virtual partition + # 'accel': ['nvidia/cc90'], # 'slurm_params': '-p genoa ', # 'repo_targets': ["eessi.io-2023.06-compat","eessi.io-2023.06-software"], # }, @@ -648,9 +633,9 @@ def prepare_jobs(pr, cfg, event_info, action_filter): arch_dir += accelerator arch_dir.replace('/', '_') # check if repo_targets is defined for this virtual partition - if not 'repo_targets' in partition_info: + if 'repo_targets' not in partition_info: log(f"{fn}(): skipping arch {virtual_partition_name}, " - "because no repo_targets were defined for this (virtual) partition") + "because no repo_targets were defined for this (virtual) partition") continue for repo_id in partition_info['repo_targets']: # ensure repocfg contains information about the repository repo_id if repo_id != EESSI @@ -671,7 +656,6 @@ def prepare_jobs(pr, cfg, event_info, action_filter): "instance": app_name } # Optionally add accelerator to the context - check = False if 'accel' in partition_info: match = False # Create a context for each accelerator defined in app.cfg, then @@ -720,8 +704,13 @@ def prepare_jobs(pr, cfg, event_info, action_filter): ) comment_download_pr(base_repo_name, pr, download_pr_exit_code, download_pr_error, error_stage) # prepare job configuration file 'job.cfg' in directory /cfg - log(f"{fn}(): virtual partition = '{virtual_partition_name}' => cpu_target = '{partition_info['cpu_subdir']}' , " - f"os_type = '{partition_info['os']}', requested accelerator = '{accelerator}'") + msg = f"{fn}(): virtual partition = '{virtual_partition_name}' => " + msg += f"configured cpu_target = '{partition_info['cpu_subdir']}' , " + msg += f"configured os = '{partition_info['os']}', " + if 'accel' in partition_info: + msg += f"configured accelerator(s) = '{partition_info['accel']}, " + msg += f"requested accelerator = '{accelerator}'" + log(msg) prepare_job_cfg(job_dir, build_env_cfg, repocfg, repo_id, partition_info['cpu_subdir'], partition_info['os'], accelerator) diff --git a/tools/config.py b/tools/config.py index fc790d56..d0ec6498 100644 --- a/tools/config.py +++ b/tools/config.py @@ -110,7 +110,6 @@ NEW_JOB_COMMENTS_SETTING_AWAITS_LAUNCH = 'awaits_launch' SECTION_REPO_TARGETS = 'repo_targets' -REPO_TARGETS_SETTING_REPO_TARGET_MAP = 'repo_target_map' REPO_TARGETS_SETTING_REPOS_CFG_DIR = 'repos_cfg_dir' SECTION_RUNNING_JOB_COMMENTS = 'running_job_comments' diff --git a/tools/filter.py b/tools/filter.py index 9665c7c5..2c4146bb 100644 --- a/tools/filter.py +++ b/tools/filter.py @@ -304,10 +304,11 @@ def check_filters(self, context): check = False break - # If the context declares an accelerator, but the build command did not (i.e. no action filter is defined for the accelerator component) - # then the check should only return True if "None" was the accelerator defined in the context - # This ensures that on partitions that no CPU-only builds are done on accelerated partitions, unless these partitions are - # explicitly configured with "None" as _one_ of the valid accelerators in their `accel:` list in app.cfg + # If the context declares an accelerator, but the build command did not (i.e. no action filter is defined + # for the accelerator component) then the check should only return True if "None" was the accelerator defined + # in the context. This ensures that no CPU-only builds are done on accelerated partitions, unless these + # partitions are explicitly configured with "None" as _one_ of the valid accelerators in their `accel:` list + # in app.cfg if ( FILTER_COMPONENT_ACCEL in context and not any(af.component == FILTER_COMPONENT_ACCEL for af in self.action_filters) From b4032a6becd6596a5377ca5049b1bc8dc40b1e5c Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 1 Jul 2025 16:56:42 +0200 Subject: [PATCH 08/47] Fix quotation of keys --- eessi_bot_event_handler.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index df777914..78bb8e0c 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -416,13 +416,13 @@ def handle_pull_request_opened_event(self, event_info, pr, req_chatlevel=ChatLev for partition_num, arch in enumerate(arch_map): comment += f"\n- partition {partition_num+1}:" if "os" in arch: - comment += f"\n - os: {arch[os]}" + comment += f"\n - os: {arch['os']}" if "cpu_subdir" in arch: - comment += f"\n - architecture: {arch[cpu_subdir]}" + comment += f"\n - architecture: {arch['cpu_subdir']}" if "repo_targets" in arch: - comment += f"\n - repositories: {arch[repo_targets]}" + comment += f"\n - repositories: {arch['repo_targets']}" if "accel" in arch: - comment += f"\n - accelerators: {arch[accel]}" + comment += f"\n - accelerators: {arch['accel']}" comment += "\n" self.log(f"PR opened: comment '{comment}'") From 3898df7a4fb25dbc45d064907bff6390ff4dde40 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 1 Jul 2025 16:58:00 +0200 Subject: [PATCH 09/47] Fix flake8 issue --- eessi_bot_event_handler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index 78bb8e0c..10767c99 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -29,8 +29,8 @@ # Local application imports (anything from EESSI/eessi-bot-software-layer) from connections import github -from tasks.build import check_build_permission, get_architecture_targets, get_repo_cfg, \ - request_bot_build_issue_comments, submit_build_jobs +from tasks.build import check_build_permission, get_architecture_targets, request_bot_build_issue_comments, \ + submit_build_jobs from tasks.deploy import deploy_built_artefacts, determine_job_dirs from tasks.clean_up import move_to_trash_bin from tools import config From ef0c43045ac2ef8f2b73e20be254f87ddd7e2e5a Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 1 Jul 2025 17:10:10 +0200 Subject: [PATCH 10/47] Unpack the actual arch_target_map by accessing it with a key to get to individual partitions when printing the config --- eessi_bot_event_handler.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index 10767c99..dfefe5df 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -414,15 +414,18 @@ def handle_pull_request_opened_event(self, event_info, pr, req_chatlevel=ChatLev comment = f"Instance `{app_name}` is configured to build for:" for partition_num, arch in enumerate(arch_map): - comment += f"\n- partition {partition_num+1}:" - if "os" in arch: - comment += f"\n - os: {arch['os']}" - if "cpu_subdir" in arch: - comment += f"\n - architecture: {arch['cpu_subdir']}" - if "repo_targets" in arch: - comment += f"\n - repositories: {arch['repo_targets']}" - if "accel" in arch: - comment += f"\n - accelerators: {arch['accel']}" + # Do not print virtual partition names, a bot admin may not want to share those + # Instead, just number them + comment += f"\n- Partition {partition_num+1}:" + current_partition = arch_map[arch] + if "os" in current_partition: + comment += f"\n - OS: {current_partition['os']}" + if "cpu_subdir" in current_partition: + comment += f"\n - CPU architecture: {current_partition['cpu_subdir']}" + if "repo_targets" in current_partition: + comment += f"\n - Repositories: {current_partition['repo_targets']}" + if "accel" in current_partition: + comment += f"\n - Accelerators: {current_partition['accel']}" comment += "\n" self.log(f"PR opened: comment '{comment}'") From e3df69044a3eff188da8b3a8c67dfa08b0500bc7 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 8 Jul 2025 15:40:34 +0200 Subject: [PATCH 11/47] Fix mistake in build path --- tasks/build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/build.py b/tasks/build.py index bc5e5fb3..3031901e 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -630,7 +630,7 @@ def prepare_jobs(pr, cfg, event_info, action_filter): if 'accel' in partition_info and accelerator is not None: # Use the accelerator as defined by the action_filter. We check if this is valid for the current # virtual partition later - arch_dir += accelerator + arch_dir += f"/{accelerator}" arch_dir.replace('/', '_') # check if repo_targets is defined for this virtual partition if 'repo_targets' not in partition_info: From 424a0012309152c66bfbb5506db7622284d9aba2 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 9 Jul 2025 14:46:42 +0200 Subject: [PATCH 12/47] Parse on: and for: options, and pass the correct values on to the command filters --- tools/commands.py | 74 ++++++++++++++++++++++++++++++++++++++++------- tools/filter.py | 11 +++---- 2 files changed, 67 insertions(+), 18 deletions(-) diff --git a/tools/commands.py b/tools/commands.py index b842cc19..18f3f228 100644 --- a/tools/commands.py +++ b/tools/commands.py @@ -85,19 +85,71 @@ def __init__(self, cmd_str): """ # TODO add function name to log messages cmd_as_list = cmd_str.split() - self.command = cmd_as_list[0] + self.command = cmd_as_list[0] # E.g. 'build' or 'help' + # TODO always init self.action_filters with empty EESSIBotActionFilter? if len(cmd_as_list) > 1: - arg_str = " ".join(cmd_as_list[1:]) - try: - self.action_filters = EESSIBotActionFilter(arg_str) - except EESSIBotActionFilterError as err: - log(f"ERROR: EESSIBotActionFilterError - {err.args}") - self.action_filters = None - raise EESSIBotCommandError("invalid action filter") - except Exception as err: - log(f"Unexpected err={err}, type(err)={type(err)}") - raise + + # Extract arguments for the action filters + # By default, everything that follows the 'on:' argument (until the next space) is + # considered part of the argument list for the action filters + target_args = [] + other_filter_args = [] + on_found = False + for arg in cmd_as_list[1:]: + if arg.startswith('on:'): + on_found = True + # Extract everything after 'on:' and split by comma + filter_content = arg[3:] # Remove 'on:' prefix + target_args.extend(filter_content.split(',')) + elif not arg.startswith('off:'): + # Anything that is not 'on:' or 'for:' should just be passed on as normal + # No further parsing of the value is needed + other_filter_args.extend([arg]) + + # If no 'on:' is found in the argument list, everything that follows the 'for:' argument + # (until the next space) is considered the argument list for the action filters + # Essentially, this represents a native build, i.e. the hardware we build on should be the + # hardware we build on + if not on_found: + for arg in cmd_as_list[1:]: + if arg.startswith('for:'): + # Extract everything after the 'for:' suffix and split by comma + filter_content=arg[4:] + target_args.extend(filter_content.split(',')) + + # Join the filter arguments and pass to EESSIBotActionFilter + # At this point, target_args is e.g. ["arch=amd/zen2","accel=nvidia/cc90"] + # But EESSIBotActionFilter expects e.g. "arch:amd/zen2 accel:nvidia/cc90" + # First, normalize to the ["arch:amd/zen2", "accel:nvidia/cc90"] format + normalized_filters = [] + if target_args: + for filter_item in target_args: + if '=' in filter_item: + component, pattern = filter_item.split('=', 1) + normalized_filters.append(f"{component}:{pattern}") + + # Add the other filter args to the normalized filters. The other_filter_args are already colon-separated + # so no special parsing needed there + log(f"Extracted filter arguments related to hardware target: {normalized_filters}") + log(f"Other extracted filter arguments: {other_filter_args}") + normalized_filters += other_filter_args + + # Finally, change into a space-separated string, as expected by EESSIBotActionFilter + # e.g "arch:amd/zen2 accel:nvidia/cc90 repo:my.repo.io" + if normalized_filters: + arg_str = " ".join(normalized_filters) + try: + log(f"Passing the following arguments to the EESSIBotActionFilter: {arg_str}") + self.action_filters = EESSIBotActionFilter(arg_str) + except EESSIBotActionFilterError as err: + log(f"ERROR: EESSIBotActionFilterError - {err.args}") + self.action_filters = None + raise EESSIBotCommandError("invalid action filter") + except Exception as err: + log(f"Unexpected err={err}, type(err)={type(err)}") + raise + # No arguments were passed to the command self.command else: self.action_filters = EESSIBotActionFilter("") diff --git a/tools/filter.py b/tools/filter.py index 2c4146bb..21984b1b 100644 --- a/tools/filter.py +++ b/tools/filter.py @@ -304,16 +304,13 @@ def check_filters(self, context): check = False break - # If the context declares an accelerator, but the build command did not (i.e. no action filter is defined - # for the accelerator component) then the check should only return True if "None" was the accelerator defined - # in the context. This ensures that no CPU-only builds are done on accelerated partitions, unless these - # partitions are explicitly configured with "None" as _one_ of the valid accelerators in their `accel:` list - # in app.cfg + # If the context declares an accelerator, enforce that a filter is defined for this component as well + # I.e. this enforces that a context with accelerator will only be used if an accelerator is explicitely + # requested in the build command, thus preventing CPU-only builds on GPU nodes (unless explicitely intended) if ( FILTER_COMPONENT_ACCEL in context and not any(af.component == FILTER_COMPONENT_ACCEL for af in self.action_filters) ): - if not context[FILTER_COMPONENT_ACCEL] == "None": - check = False + check = False return check From 3f00e5196dbecc726e3a9955c01a390ea44328e7 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Thu, 10 Jul 2025 12:51:35 +0200 Subject: [PATCH 13/47] Make sure that the for: arguments are used as build parameters --- eessi_bot_event_handler.py | 2 +- tasks/build.py | 21 ++++++++++++--------- tools/commands.py | 11 ++++++++--- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index dfefe5df..320d0ad8 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -531,7 +531,7 @@ def handle_bot_command_build(self, event_info, bot_command): build_msg = '' if check_build_permission(pr, event_info): # use filter from command - submitted_jobs = submit_build_jobs(pr, event_info, bot_command.action_filters) + submitted_jobs = submit_build_jobs(pr, event_info, bot_command.action_filters, bot_command.build_params) if submitted_jobs is None or len(submitted_jobs) == 0: build_msg = "\n - no jobs were submitted" else: diff --git a/tasks/build.py b/tasks/build.py index 3031901e..797e95d8 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -33,7 +33,7 @@ from tools import config, cvmfs_repository, job_metadata, pr_comments, run_cmd import tools.filter as tools_filter from tools.pr_comments import ChatLevels, create_comment - +from tools.build_params import BUILD_PARAM_ARCH, BUILD_PARAM_ACCEL # defaults (used if not specified via, eg, 'app.cfg') DEFAULT_JOB_TIME_LIMIT = "24:00:00" @@ -551,7 +551,7 @@ def prepare_export_vars_file(job_dir, exportvars): log(f"{fn}(): created exported variables file {export_vars_path}") -def prepare_jobs(pr, cfg, event_info, action_filter): +def prepare_jobs(pr, cfg, event_info, action_filter, build_params): """ Prepare all jobs whose context matches the given filter. Preparation includes creating a working directory for a job, downloading the pull request into @@ -562,6 +562,7 @@ def prepare_jobs(pr, cfg, event_info, action_filter): cfg (ConfigParser): instance holding full configuration (typically read from 'app.cfg') event_info (dict): event received by event_handler action_filter (EESSIBotActionFilter): used to filter which jobs shall be prepared + build_params (EESSIBotBuildParams): dict that contains the build parameters for the job Returns: (list): list of the prepared jobs @@ -705,15 +706,16 @@ def prepare_jobs(pr, cfg, event_info, action_filter): comment_download_pr(base_repo_name, pr, download_pr_exit_code, download_pr_error, error_stage) # prepare job configuration file 'job.cfg' in directory /cfg msg = f"{fn}(): virtual partition = '{virtual_partition_name}' => " - msg += f"configured cpu_target = '{partition_info['cpu_subdir']}' , " + msg += f"requested cpu_target = '{partition_info['cpu_subdir']}, " + msg += f"build cpu_target = '{build_params[BUILD_PARAM_ARCH]}', " msg += f"configured os = '{partition_info['os']}', " if 'accel' in partition_info: - msg += f"configured accelerator(s) = '{partition_info['accel']}, " - msg += f"requested accelerator = '{accelerator}'" + msg += f"requested accelerator(s) = '{partition_info['accel']}, " + msg += f"build accelerator = '{build_params[BUILD_PARAM_ACCEL]}'" log(msg) - prepare_job_cfg(job_dir, build_env_cfg, repocfg, repo_id, partition_info['cpu_subdir'], - partition_info['os'], accelerator) + prepare_job_cfg(job_dir, build_env_cfg, repocfg, repo_id, build_params[BUILD_PARAM_ARCH], + partition_info['os'], build_params[BUILD_PARAM_ACCEL]) if exportvars: prepare_export_vars_file(job_dir, exportvars) @@ -1032,7 +1034,7 @@ def create_pr_comment(job, job_id, app_name, pr, symlink): return None -def submit_build_jobs(pr, event_info, action_filter): +def submit_build_jobs(pr, event_info, action_filter, build_params): """ Create build jobs for a pull request by preparing jobs which match the given filters, submitting them, adding comments to the pull request on GitHub and @@ -1042,6 +1044,7 @@ def submit_build_jobs(pr, event_info, action_filter): pr (github.PullRequest.PullRequest): instance representing the pull request event_info (dict): event received by event_handler action_filter (EESSIBotActionFilter): used to filter which jobs shall be prepared + build_params (EESSIBotBuildParams): dict that contains the build parameters for the job Returns: (dict): dictionary mapping a job id to a github.IssueComment.IssueComment @@ -1054,7 +1057,7 @@ def submit_build_jobs(pr, event_info, action_filter): app_name = cfg[config.SECTION_GITHUB].get(config.GITHUB_SETTING_APP_NAME) # setup job directories (one per element in product of architecture x repositories) - jobs = prepare_jobs(pr, cfg, event_info, action_filter) + jobs = prepare_jobs(pr, cfg, event_info, action_filter, build_params) # return if there are no jobs to be submitted if not jobs: diff --git a/tools/commands.py b/tools/commands.py index 18f3f228..360279f5 100644 --- a/tools/commands.py +++ b/tools/commands.py @@ -18,7 +18,7 @@ # Local application imports (anything from EESSI/eessi-bot-software-layer) from tools.filter import EESSIBotActionFilter, EESSIBotActionFilterError - +from tools.build_params import EESSIBotBuildParams def contains_any_bot_command(body): """ @@ -89,7 +89,6 @@ def __init__(self, cmd_str): # TODO always init self.action_filters with empty EESSIBotActionFilter? if len(cmd_as_list) > 1: - # Extract arguments for the action filters # By default, everything that follows the 'on:' argument (until the next space) is # considered part of the argument list for the action filters @@ -102,7 +101,13 @@ def __init__(self, cmd_str): # Extract everything after 'on:' and split by comma filter_content = arg[3:] # Remove 'on:' prefix target_args.extend(filter_content.split(',')) - elif not arg.startswith('off:'): + elif arg.startswith('for:'): + # Anything listed as 'for:' is build parameters + build_params = arg[4:] + # EESSIBotBuildParams is essentially a dict, but parses the input argument + # according to the expected argument format for 'for:' + self.build_params = EESSIBotBuildParams(build_params) + else: # Anything that is not 'on:' or 'for:' should just be passed on as normal # No further parsing of the value is needed other_filter_args.extend([arg]) From 2625b304a62a718f0f807cdd8a775aac43190205 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Thu, 10 Jul 2025 14:10:47 +0200 Subject: [PATCH 14/47] Change path for job dir so that it represents the 'for' architectures --- tasks/build.py | 26 +++++++++++++------------- tools/job_metadata.py | 2 ++ 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/tasks/build.py b/tasks/build.py index 797e95d8..ce563a2f 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -615,27 +615,25 @@ def prepare_jobs(pr, cfg, event_info, action_filter, build_params): # Note that 'accel' is a list, to easily allow a single CPU partition to be used for cross compilation # for a lot of accelerator targets # arch_target_map = { - # 'virtual_partition_name': { + # 'node_type_name': { # 'os': 'linux', # 'cpu_subdir': 'x86_64/amd/zen4', # 'accel': ['nvidia/cc90'], # 'slurm_params': '-p genoa ', # 'repo_targets': ["eessi.io-2023.06-compat","eessi.io-2023.06-software"], # }, - # 'virtual_partition_name2': { + # 'node_type_name2': { # ... etc - for virtual_partition_name, partition_info in arch_map.items(): - log(f"{fn}(): virtual_partition_name is {virtual_partition_name}, partition_info is {partition_info}") + for node_type_name, partition_info in arch_map.items(): + log(f"{fn}(): node_type_name is {node_type_name}, partition_info is {partition_info}") # Unpack for convenience - arch_dir = partition_info['cpu_subdir'] - if 'accel' in partition_info and accelerator is not None: - # Use the accelerator as defined by the action_filter. We check if this is valid for the current - # virtual partition later - arch_dir += f"/{accelerator}" + arch_dir = build_params[BUILD_PARAM_ARCH] + if BUILD_PARAM_ACCEL in build_params: + arch_dir += f"/{build_params[BUILD_PARAM_ACCEL]}" arch_dir.replace('/', '_') # check if repo_targets is defined for this virtual partition if 'repo_targets' not in partition_info: - log(f"{fn}(): skipping arch {virtual_partition_name}, " + log(f"{fn}(): skipping arch {node_type_name}, " "because no repo_targets were defined for this (virtual) partition") continue for repo_id in partition_info['repo_targets']: @@ -705,7 +703,7 @@ def prepare_jobs(pr, cfg, event_info, action_filter, build_params): ) comment_download_pr(base_repo_name, pr, download_pr_exit_code, download_pr_error, error_stage) # prepare job configuration file 'job.cfg' in directory /cfg - msg = f"{fn}(): virtual partition = '{virtual_partition_name}' => " + msg = f"{fn}(): node type = '{node_type_name}' => " msg += f"requested cpu_target = '{partition_info['cpu_subdir']}, " msg += f"build cpu_target = '{build_params[BUILD_PARAM_ARCH]}', " msg += f"configured os = '{partition_info['os']}', " @@ -715,7 +713,7 @@ def prepare_jobs(pr, cfg, event_info, action_filter, build_params): log(msg) prepare_job_cfg(job_dir, build_env_cfg, repocfg, repo_id, build_params[BUILD_PARAM_ARCH], - partition_info['os'], build_params[BUILD_PARAM_ACCEL]) + partition_info['os'], build_params[BUILD_PARAM_ACCEL], node_type_name) if exportvars: prepare_export_vars_file(job_dir, exportvars) @@ -732,7 +730,7 @@ def prepare_jobs(pr, cfg, event_info, action_filter, build_params): return jobs -def prepare_job_cfg(job_dir, build_env_cfg, repos_cfg, repo_id, software_subdir, os_type, accelerator): +def prepare_job_cfg(job_dir, build_env_cfg, repos_cfg, repo_id, software_subdir, os_type, accelerator, node_type_name): """ Set up job configuration file 'job.cfg' in directory /cfg @@ -744,6 +742,7 @@ def prepare_job_cfg(job_dir, build_env_cfg, repos_cfg, repo_id, software_subdir, software_subdir (string): software subdirectory to build for (e.g., 'x86_64/generic') os_type (string): type of the os (e.g., 'linux') accelerator (string): defines accelerator to build for (e.g., 'nvidia/cc80') + node_type_name (string): the node type name, as configured in app.cfg Returns: None (implicitly) @@ -814,6 +813,7 @@ def prepare_job_cfg(job_dir, build_env_cfg, repos_cfg, repo_id, software_subdir, job_cfg_arch_section = job_metadata.JOB_CFG_ARCHITECTURE_SECTION job_cfg[job_cfg_arch_section] = {} + job_cfg[job_cfg_arch_section][job_metadata.JOB_CFG_ARCHITECTURE_NODE_TYPE] = node_type_name job_cfg[job_cfg_arch_section][job_metadata.JOB_CFG_ARCHITECTURE_SOFTWARE_SUBDIR] = software_subdir job_cfg[job_cfg_arch_section][job_metadata.JOB_CFG_ARCHITECTURE_OS_TYPE] = os_type job_cfg[job_cfg_arch_section][job_metadata.JOB_CFG_ARCHITECTURE_ACCELERATOR] = accelerator if accelerator else '' diff --git a/tools/job_metadata.py b/tools/job_metadata.py index 7b7b8d0a..f5ee21ce 100644 --- a/tools/job_metadata.py +++ b/tools/job_metadata.py @@ -34,7 +34,9 @@ JOB_CFG_UPLOAD_STEP = "upload_step" # JWD/cfg/$JOB_CFG_FILENAME + JOB_CFG_ARCHITECTURE_SECTION = "architecture" +JOB_CFG_ARCHITECTURE_NODE_TYPE = "node_type" JOB_CFG_ARCHITECTURE_OS_TYPE = "os_type" JOB_CFG_ARCHITECTURE_SOFTWARE_SUBDIR = "software_subdir" JOB_CFG_ARCHITECTURE_ACCELERATOR = "accelerator" From ffa2303cfc777151a2bef1de83e21198736a06bf Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Mon, 14 Jul 2025 12:38:18 +0200 Subject: [PATCH 15/47] More extensive reporting by the bot on what to build for/on --- app.cfg.example | 2 +- tasks/build.py | 48 ++++++++++++++++++++++++++++++++++-------------- 2 files changed, 35 insertions(+), 15 deletions(-) diff --git a/app.cfg.example b/app.cfg.example index bf7911b5..453cdb64 100644 --- a/app.cfg.example +++ b/app.cfg.example @@ -419,7 +419,7 @@ scontrol_command = /usr/bin/scontrol # awaits_release = job id `{job_id}` awaits release by job manager awaits_release_delayed_begin_msg = job id `{job_id}` will be eligible to start in about {delay_seconds} seconds awaits_release_hold_release_msg = job id `{job_id}` awaits release by job manager -initial_comment = New job on instance `{app_name}` for CPU micro-architecture `{arch_name}`{accelerator_spec} for repository `{repo_id}` in job dir `{symlink}` +initial_comment = New job on instance `{app_name}` for repository `{repo_id}`\nBuilding on: `{on_arch}`{on_accelerator}\nBuilding for: `{for_arch}`{for_accelerator}\nJob dir: `{symlink}` with_accelerator =  and accelerator `{accelerator}` diff --git a/tasks/build.py b/tasks/build.py index ce563a2f..006dc17a 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -20,6 +20,7 @@ # Standard library imports from collections import namedtuple import configparser +import codecs from datetime import datetime, timezone import json import os @@ -952,7 +953,7 @@ def submit_job(job, cfg): return job_id, symlink -def create_pr_comment(job, job_id, app_name, pr, symlink): +def create_pr_comment(job, job_id, app_name, pr, symlink, build_params): """ Create a comment to the pull request for a newly submitted job @@ -962,6 +963,7 @@ def create_pr_comment(job, job_id, app_name, pr, symlink): app_name (string): name of the app pr (github.PullRequest.PullRequest): instance representing the pull request symlink (string): symlink from main pr_ dir to job dir + build_params (EESSIBotBuildParams): dict that contains the build parameters for the job Returns: github.IssueComment.IssueComment instance or None (note, github refers to @@ -969,16 +971,24 @@ def create_pr_comment(job, job_id, app_name, pr, symlink): """ fn = sys._getframe().f_code.co_name - # obtain arch from job.arch_target which has the format OS/ARCH - arch_name = '-'.join(job.arch_target.split('/')[1:]) + # Obtain the architecture on which we are building from job.arch_target, which has the format OS/ARCH + on_arch = '-'.join(job.arch_target.split('/')[1:]) + + # Obtain the architecture to build for + for_arch = build_params[BUILD_PARAM_ARCH] submitted_job_comments_cfg = config.read_config()[config.SECTION_SUBMITTED_JOB_COMMENTS] - # set string for accelerator if job.accelerator is defined/set (e.g., not None) - accelerator_spec_str = '' + # Set string for accelerator to build on + accelerator_spec = f"{submitted_job_comments_cfg[config.SUBMITTED_JOB_COMMENTS_SETTING_WITH_ACCELERATOR]}" + on_accelerator_str = '' if job.accelerator: - accelerator_spec = f"{submitted_job_comments_cfg[config.SUBMITTED_JOB_COMMENTS_SETTING_WITH_ACCELERATOR]}" - accelerator_spec_str = accelerator_spec.format(accelerator=job.accelerator) + on_accelerator_str = accelerator_spec.format(accelerator=job.accelerator) + + # Set string for accelerator to build for + for_accelerator_str = '' + if BUILD_PARAM_ACCEL in build_params: + for_accelerator_str = accelerator_spec.format(accelerator=build_params[BUILD_PARAM_ACCEL]) # get current date and time dt = datetime.now(timezone.utc) @@ -986,6 +996,9 @@ def create_pr_comment(job, job_id, app_name, pr, symlink): # construct initial job comment buildenv = config.read_config()[config.SECTION_BUILDENV] job_handover_protocol = buildenv.get(config.BUILDENV_SETTING_JOB_HANDOVER_PROTOCOL) + raw_comment_template = submitted_job_comments_cfg[config.SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT] + # Support using escape chars in the INITIAL_COMMENT, that means \n should be interpreted as unicode + initial_comment_template = codecs.decode(raw_comment_template, 'unicode_escape') if job_handover_protocol == config.JOB_HANDOVER_PROTOCOL_DELAYED_BEGIN: release_msg_string = config.SUBMITTED_JOB_COMMENTS_SETTING_AWAITS_RELEASE_DELAYED_BEGIN_MSG release_comment_template = submitted_job_comments_cfg[release_msg_string] @@ -994,34 +1007,41 @@ def create_pr_comment(job, job_id, app_name, pr, symlink): poll_interval = int(job_manager_cfg.get(config.JOB_MANAGER_SETTING_POLL_INTERVAL)) delay_factor = float(buildenv.get(config.BUILDENV_SETTING_JOB_DELAY_BEGIN_FACTOR, 2)) eligible_in_seconds = int(poll_interval * delay_factor) - job_comment = (f"{submitted_job_comments_cfg[config.SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT]}" + job_comment = (f"{initial_comment_template}" f"\n|date|job status|comment|\n" f"|----------|----------|------------------------|\n" f"|{dt.strftime('%b %d %X %Z %Y')}|" f"submitted|" f"{release_comment_template}|").format( app_name=app_name, - arch_name=arch_name, + on_arch=on_arch, + for_arch=for_arch, symlink=symlink, repo_id=job.repo_id, job_id=job_id, delay_seconds=eligible_in_seconds, - accelerator_spec=accelerator_spec_str) + on_accelerator=on_accelerator_str, + for_accelerator=for_accelerator_str) else: release_msg_string = config.SUBMITTED_JOB_COMMENTS_SETTING_AWAITS_RELEASE_HOLD_RELEASE_MSG release_comment_template = submitted_job_comments_cfg[release_msg_string] - job_comment = (f"{submitted_job_comments_cfg[config.SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT]}" + job_comment = (f"{initial_comment_template}" f"\n|date|job status|comment|\n" f"|----------|----------|------------------------|\n" f"|{dt.strftime('%b %d %X %Z %Y')}|" f"submitted|" f"{release_comment_template}|").format( app_name=app_name, - arch_name=arch_name, + on_arch=on_arch, + for_arch=for_arch, symlink=symlink, repo_id=job.repo_id, job_id=job_id, - accelerator_spec=accelerator_spec_str) + on_accelerator=on_accelerator_str, + for_accelerator=for_accelerator_str) + + # Make sure newline characters are taken as new line characters, not as literal \n + job_comment='\n'.join(job_comment.split('\n')) # create comment to pull request repo_name = pr.base.repo.full_name @@ -1072,7 +1092,7 @@ def submit_build_jobs(pr, event_info, action_filter, build_params): job_id, symlink = submit_job(job, cfg) # create pull request comment to report about the submitted job - pr_comment = create_pr_comment(job, job_id, app_name, pr, symlink) + pr_comment = create_pr_comment(job, job_id, app_name, pr, symlink, build_params) job_id_to_comment_map[job_id] = pr_comment pr_comment = pr_comments.PRComment(pr.base.repo.full_name, pr.number, pr_comment.id) From c98e7e89b5e816f17d51db2df7485d5a19633ecd Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Mon, 14 Jul 2025 12:45:00 +0200 Subject: [PATCH 16/47] This is no longer needed, as it is done with the codecs (decode) now --- tasks/build.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tasks/build.py b/tasks/build.py index 006dc17a..08bafe21 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -1040,9 +1040,6 @@ def create_pr_comment(job, job_id, app_name, pr, symlink, build_params): on_accelerator=on_accelerator_str, for_accelerator=for_accelerator_str) - # Make sure newline characters are taken as new line characters, not as literal \n - job_comment='\n'.join(job_comment.split('\n')) - # create comment to pull request repo_name = pr.base.repo.full_name issue_comment = create_comment(repo_name, pr.number, job_comment, ChatLevels.MINIMAL) From 5904f11c4644c39269d9fcf139b78dc1839b1354 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Mon, 14 Jul 2025 17:02:53 +0200 Subject: [PATCH 17/47] Print real arch_target_map keys when doing show_config --- app.cfg.example | 2 +- eessi_bot_event_handler.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/app.cfg.example b/app.cfg.example index 453cdb64..9ee912eb 100644 --- a/app.cfg.example +++ b/app.cfg.example @@ -308,7 +308,7 @@ signing = # defines for which architectures the bot will build and what job submission # parameters shall be used to allocate a compute node with the correct # The keys of the arch_target_map are virtual partition names. They don't have any meaning in the bot code, -# and can thus be chosen as desired. +# and can thus be chosen as desired. They are publicly visible though if a bot:show_config command is issued. # Note that you are responsible that ANY bot:build command ONLY matches a single virtual partition! # If multiple partitions match the same bot:build command, a failure will be triggered in the job dir preparation arch_target_map = { diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index 320d0ad8..ce35d362 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -413,10 +413,10 @@ def handle_pull_request_opened_event(self, event_info, pr, req_chatlevel=ChatLev arch_map = get_architecture_targets(self.cfg) comment = f"Instance `{app_name}` is configured to build for:" - for partition_num, arch in enumerate(arch_map): + for arch in arch_map: # Do not print virtual partition names, a bot admin may not want to share those # Instead, just number them - comment += f"\n- Partition {partition_num+1}:" + comment += f"\n- Partition {arch}:" current_partition = arch_map[arch] if "os" in current_partition: comment += f"\n - OS: {current_partition['os']}" From 7c869f00a869150ec8d2ad0cfa1f1ea1ed1f8390 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Mon, 14 Jul 2025 17:20:18 +0200 Subject: [PATCH 18/47] Reduce number of possible accelerators per node type to one. Nodes with multiple types of accelerators in a single node can be configured as two separate node types if needed --- tasks/build.py | 37 ++++++++++--------------------------- 1 file changed, 10 insertions(+), 27 deletions(-) diff --git a/tasks/build.py b/tasks/build.py index 08bafe21..8e30cfb4 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -657,26 +657,14 @@ def prepare_jobs(pr, cfg, event_info, action_filter, build_params): } # Optionally add accelerator to the context if 'accel' in partition_info: - match = False - # Create a context for each accelerator defined in app.cfg, then - # check if _any_ of them is valid (one is enough to continue) - for accel in partition_info['accel']: - context['accelerator'] = accel - log(f"{fn}(): context is '{json.dumps(context, indent=4)}'") - if not action_filter.check_filters(context): - log(f"{fn}(): context does NOT satisfy filter(s), skipping") - continue - # check = check | action_filter.check_filters(context) - else: - log(f"{fn}(): context DOES satisfy filter(s), going on with job") - match = True - # Break as soon as we have found a valid context, it means the build args are valid - # for at least one of the accelerators in this virtual partition, that's enough - break - # If we get to this point, and none of the contexts matched the filter, we should continue to the - # next iteration of the partition_info['repo_targets'] loop - if not match: + context['accelerator'] = accel + log(f"{fn}(): context is '{json.dumps(context, indent=4)}'") + if not action_filter.check_filters(context): + log(f"{fn}(): context does NOT satisfy filter(s), skipping") continue + # check = check | action_filter.check_filters(context) + else: + log(f"{fn}(): context DOES satisfy filter(s), going on with job") else: log(f"{fn}(): context is '{json.dumps(context, indent=4)}'") if not action_filter.check_filters(context): @@ -686,13 +674,7 @@ def prepare_jobs(pr, cfg, event_info, action_filter, build_params): log(f"{fn}(): context DOES satisfy filter(s), going on with job") # we reached this point when the filter matched (otherwise we # 'continue' with the next repository) - # for each match of the filter we create a specific job directory - # however, matching CPU architectures works differently to handling - # accelerators; multiple CPU architectures defined in arch_target_map - # can match the (CPU) architecture component of a filter; in - # contrast, the value of the accelerator filter is just passed down - # to scripts in bot/ directory of the pull request (see function - # prepare_job_cfg and creation of Job tuple below) + # We create a specific job directory for the architecture that is going to be build 'for:' job_dir = os.path.join(run_dir, arch_dir, repo_id) os.makedirs(job_dir, exist_ok=True) log(f"{fn}(): job_dir '{job_dir}'") @@ -710,7 +692,8 @@ def prepare_jobs(pr, cfg, event_info, action_filter, build_params): msg += f"configured os = '{partition_info['os']}', " if 'accel' in partition_info: msg += f"requested accelerator(s) = '{partition_info['accel']}, " - msg += f"build accelerator = '{build_params[BUILD_PARAM_ACCEL]}'" + if BUILD_PARAM_ACCEL in build_params: + msg += f"build accelerator = '{build_params[BUILD_PARAM_ACCEL]}'" log(msg) prepare_job_cfg(job_dir, build_env_cfg, repocfg, repo_id, build_params[BUILD_PARAM_ARCH], From 179ab0ae13415042974d7822e88b4fa5da27c288 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Mon, 14 Jul 2025 17:24:22 +0200 Subject: [PATCH 19/47] Fix app.cfg for the fact that partition_info['accel'] is now a string, not a list --- app.cfg.example | 6 +++--- tasks/build.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/app.cfg.example b/app.cfg.example index 9ee912eb..10f97d6b 100644 --- a/app.cfg.example +++ b/app.cfg.example @@ -339,7 +339,7 @@ arch_target_map = { "cpu_zen4": { "os": "linux", "cpu_subdir": "x86_64/amd/zen4", - "accel": ["None"], + "accel": "None", "slurm_params": "-p genoa --nodes 1 --ntasks-per-node 24 --cpus-per-task 1", "repo_targets": ["eessi.io-2023.06-compat","eessi.io-2023.06-software"] }, @@ -352,7 +352,7 @@ arch_target_map = { "gpu_a100": { "os": "linux", "cpu_subdir": "x86_64/intel/icelake", - "accel": ["nvidia/cc80"], + "accel": "nvidia/cc80", "slurm_params": "-p gpu_a100 --nodes 1 --tasks-per-node 18 --cpus-per-task 1 --gpus-per-node 1", "repo_targets": ["eessi.io-2023.06-compat","eessi.io-2023.06-software"] }, @@ -372,7 +372,7 @@ arch_target_map = { "gpu_h100": { "os": "linux", "cpu_subdir": "x86_64/amd/zen4", - "accel": ["nvidia/cc90"], + "accel": "nvidia/cc90", "slurm_params": "-p gpu_h100 --nodes 1 --tasks-per-node 16 --cpus-per-task 1 --gpus-per-node 1", "repo_targets": ["eessi.io-2023.06-compat","eessi.io-2023.06-software"] }} diff --git a/tasks/build.py b/tasks/build.py index 8e30cfb4..7e41ef44 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -657,7 +657,7 @@ def prepare_jobs(pr, cfg, event_info, action_filter, build_params): } # Optionally add accelerator to the context if 'accel' in partition_info: - context['accelerator'] = accel + context['accelerator'] = partition_info['accel'] log(f"{fn}(): context is '{json.dumps(context, indent=4)}'") if not action_filter.check_filters(context): log(f"{fn}(): context does NOT satisfy filter(s), skipping") From a9a25850b2640eda1351ede973426d87e477af9c Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Mon, 14 Jul 2025 17:38:49 +0200 Subject: [PATCH 20/47] Make sure that we don't access a dict item that doesn't exist --- tasks/build.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tasks/build.py b/tasks/build.py index 7e41ef44..e69aea73 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -631,6 +631,9 @@ def prepare_jobs(pr, cfg, event_info, action_filter, build_params): arch_dir = build_params[BUILD_PARAM_ARCH] if BUILD_PARAM_ACCEL in build_params: arch_dir += f"/{build_params[BUILD_PARAM_ACCEL]}" + build_for_accel = build_params[BUILD_PARAM_ACCEL] + else: + build_for_accel = '' arch_dir.replace('/', '_') # check if repo_targets is defined for this virtual partition if 'repo_targets' not in partition_info: @@ -692,12 +695,11 @@ def prepare_jobs(pr, cfg, event_info, action_filter, build_params): msg += f"configured os = '{partition_info['os']}', " if 'accel' in partition_info: msg += f"requested accelerator(s) = '{partition_info['accel']}, " - if BUILD_PARAM_ACCEL in build_params: - msg += f"build accelerator = '{build_params[BUILD_PARAM_ACCEL]}'" + msg += f"build accelerator = '{build_for_accel}'" log(msg) prepare_job_cfg(job_dir, build_env_cfg, repocfg, repo_id, build_params[BUILD_PARAM_ARCH], - partition_info['os'], build_params[BUILD_PARAM_ACCEL], node_type_name) + partition_info['os'], build_for_accel, node_type_name) if exportvars: prepare_export_vars_file(job_dir, exportvars) From 51a9c740833746a24179e7e186e0107234d2c06d Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Mon, 14 Jul 2025 18:01:36 +0200 Subject: [PATCH 21/47] Make sure a context match fails if the context doesn't provide e.g. an accelerator, but such an accelerator is requested --- tools/filter.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/filter.py b/tools/filter.py index 21984b1b..ddc58352 100644 --- a/tools/filter.py +++ b/tools/filter.py @@ -303,6 +303,10 @@ def check_filters(self, context): else: check = False break + # Action filter wasn't found in the context, we won't allow this + else: + check = False + break # If the context declares an accelerator, enforce that a filter is defined for this component as well # I.e. this enforces that a context with accelerator will only be used if an accelerator is explicitely From b12c9114479dc81bd6baabfa9d13749ccdd672dc Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 15 Jul 2025 17:38:43 +0200 Subject: [PATCH 22/47] Make old config items invalid, rename to node_type and note_type_map, make sure we can do bot:status for the new formatted printing, etc --- app.cfg.example | 70 +++++--------- eessi_bot_event_handler.py | 17 ++-- eessi_bot_job_manager.py | 3 +- tasks/build.py | 181 ++++++++++++++++++++++++++++++------- tools/config.py | 43 ++++++++- 5 files changed, 222 insertions(+), 92 deletions(-) diff --git a/app.cfg.example b/app.cfg.example index 10f97d6b..7da0147d 100644 --- a/app.cfg.example +++ b/app.cfg.example @@ -305,37 +305,29 @@ signing = [architecturetargets] -# defines for which architectures the bot will build and what job submission -# parameters shall be used to allocate a compute node with the correct -# The keys of the arch_target_map are virtual partition names. They don't have any meaning in the bot code, -# and can thus be chosen as desired. They are publicly visible though if a bot:show_config command is issued. -# Note that you are responsible that ANY bot:build command ONLY matches a single virtual partition! -# If multiple partitions match the same bot:build command, a failure will be triggered in the job dir preparation -arch_target_map = { - # This is a CPU-based partition. We do not specify an "accel" property explicitly. In this case, invoking the bot - # with ANY accelerator command will trigger a build on this partition, as long as the CPU type matches. E.g. - # bot: build instance:xyz repo:eessi.io-2023.06-software arch:zen2 accel:nvidia/cc90 - # will cause the event_filter to mark this virtual partition as a valid match, and will use it to start building - # for zen2 + nvidia/cc90. Thus, by not specifying an "accel" property, this partition may be used for - # cross-compilation for any accelerator. +# arch_target_map has been replaced by node_typ_map +# arch_target_map = { +# } + +# Each entry in node_type_map describes a node type: os, what CPU architecture it has (cpu_subdir) the SLURM parameters +# that need to be passed to submit to it, which repository targets (repo_targets) can be build for on this node type +# and (optionally) which accelerators ('accel') +# All are strings, except repo_targets, which is a list of strings +# Note that the Slurm parameters should typically be chosen such that a single type of node (with one specific type of +# CPU and one specific type of GPU) should be allocated. +# Below is an example configuration for a system that contains 4 types of nodes: zen2 CPU nodes, zen4 CPU nodes, +# GPU nodes with an icelake CPU and A100 GPU, GPu nodes with a zen4 CPU and an H100 GPU +# The 'on:' argument to the bot build command determines which node type will be allocated for the build job +# E.g. 'bot:build on:arch=zen4,accel=nvidia/cc90 for:...' will match the gpu_h100 node type below +# If no 'on:' argument is passed to the build command, the 'for:' argument is used instead +# E.g. 'bot:build for:arch=icelake,accel=nvidia/cc80' will match the gpu_a100 node type below +node_type_map = { "cpu_zen2": { "os": "linux", "cpu_subdir": "x86_64/amd/zen2", "slurm_params": "-p rome --nodes 1 --ntasks-per-node 16 --cpus-per-task 1", "repo_targets": ["eessi.io-2023.06-compat","eessi.io-2023.06-software"] }, - # This is a CPU partition. We specify an explicit "accel": "None" property. Thus, this partition will only be - # used if the bot build command does NOT contain an accel argument, e.g. - # bot: build instance:xyz repo:eessi.io-2023.06-software arch:zen4 - # will cause the event_filter to mark this virtual partition as a valid match, and will use it to start building - # for zen4. - # When invoking the bot with an accelerator command, such as - # bot: build instance:xyz repo:eessi.io-2023.06-software arch:zen4 accel:nvidia/cc90 - # the event_filter will NOT mark this virtual partition as a valid match. This is intentional, as this particular - # (example) cluster has a native zen4+cc90 partition (gpu_h100) and we want this command to trigger a native build - # on that partition, rather than cross-compiling on this cpu_zen4 partition. - # One could still allow cross-compilation for other accelerator architectures, e.g. cc70 and cc80 by defining - # "accel": ["nvidia/cc70", "nvidia/cc80"] "cpu_zen4": { "os": "linux", "cpu_subdir": "x86_64/amd/zen4", @@ -343,12 +335,6 @@ arch_target_map = { "slurm_params": "-p genoa --nodes 1 --ntasks-per-node 24 --cpus-per-task 1", "repo_targets": ["eessi.io-2023.06-compat","eessi.io-2023.06-software"] }, - # This is a GPU partition. We specify an explicit "accel" property. Thus, only if the bot build command - # specifies that explicit accelerator in combination with the relevant CPU type, - # bot: build instance:xyz repo:eessi.io-2023.06-software arch:icelake accel:nvidia/cc80 - # will a build be triggered on this partition - # If you want to use this partition also for CPU only builds, you can alter the "accel" property to - # "accel": ["None", "nvidia/cc80"] "gpu_a100": { "os": "linux", "cpu_subdir": "x86_64/intel/icelake", @@ -356,19 +342,6 @@ arch_target_map = { "slurm_params": "-p gpu_a100 --nodes 1 --tasks-per-node 18 --cpus-per-task 1 --gpus-per-node 1", "repo_targets": ["eessi.io-2023.06-compat","eessi.io-2023.06-software"] }, - # This is a GPU partition. We specify an explicit "accel" property. Thus, only if the bot build command - # specifies that explicit accelerator in combination with the relevant CPU type, - # bot: build instance:xyz repo:eessi.io-2023.06-software arch:zen4 accel:nvidia/cc90 - # will a build be triggered on this partition - # If you want to use this partition also for cross-compiling for cc70 and cc80 architectures, you can alter - # the "accel" property to - # "accel": ["nvidia/cc70", "nvidia/cc80", "nvidia/cc90"] - # Note that setting: - # "accel": ["None", "nvidia/cc90"] - # is invalid here, since it would lead to both the cpu_zen4 and the gpu_h100 partitions matching the build command - # bot: build instance:xyz repo:eessi.io-2023.06-software arch:zen4 accel:nvidia/cc90 - # This would cause the same job dir to be prepared twice, for different virtual partitions, which will lead - # to an error in the job preparation step "gpu_h100": { "os": "linux", "cpu_subdir": "x86_64/amd/zen4", @@ -415,12 +388,15 @@ scontrol_command = /usr/bin/scontrol # are removed, the output (in PR comments) will lack important # information. [submitted_job_comments] -# awaits_release is no longer used since bot release v0.7.0 -# awaits_release = job id `{job_id}` awaits release by job manager +awaits_release = job id `{job_id}` awaits release by job manager awaits_release_delayed_begin_msg = job id `{job_id}` will be eligible to start in about {delay_seconds} seconds awaits_release_hold_release_msg = job id `{job_id}` awaits release by job manager -initial_comment = New job on instance `{app_name}` for repository `{repo_id}`\nBuilding on: `{on_arch}`{on_accelerator}\nBuilding for: `{for_arch}`{for_accelerator}\nJob dir: `{symlink}` +new_job_instance_repo = New job on instance `{app_name}` for repository `{repo_id}` +build_on_arch = Building on: `{on_arch}`{on_accelerator} +build_for_arch = Building for: `{for_arch}`{for_accelerator} +jobdir = Job dir: `{symlink}` with_accelerator =  and accelerator `{accelerator}` +# initial_comment = New job on instance `{app_name}` for repository `{repo_id}`\nBuilding on: `{on_arch}`{on_accelerator}\nBuilding for: `{for_arch}`{for_accelerator}\nJob dir: `{symlink}` # no longer used [new_job_comments] diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index ce35d362..9bae8c48 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -29,7 +29,7 @@ # Local application imports (anything from EESSI/eessi-bot-software-layer) from connections import github -from tasks.build import check_build_permission, get_architecture_targets, request_bot_build_issue_comments, \ +from tasks.build import check_build_permission, get_node_types, request_bot_build_issue_comments, \ submit_build_jobs from tasks.deploy import deploy_built_artefacts, determine_job_dirs from tasks.clean_up import move_to_trash_bin @@ -43,7 +43,7 @@ REQUIRED_CONFIG = { config.SECTION_ARCHITECTURETARGETS: [ - config.ARCHITECTURETARGETS_SETTING_ARCH_TARGET_MAP], # required + config.NODE_TYPE_MAP], # required config.SECTION_BOT_CONTROL: [ # config.BOT_CONTROL_SETTING_CHATLEVEL, # optional config.BOT_CONTROL_SETTING_COMMAND_PERMISSION, # required @@ -106,7 +106,10 @@ config.SECTION_REPO_TARGETS: [ config.REPO_TARGETS_SETTING_REPOS_CFG_DIR], # required config.SECTION_SUBMITTED_JOB_COMMENTS: [ - config.SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT, # required + config.SUBMITTED_JOB_COMMENTS_SETTING_INSTANCE_REPO, # required + config.SUBMITTED_JOB_COMMENTS_SETTING_BUILD_ON_ARCH, # required + config.SUBMITTED_JOB_COMMENTS_SETTING_BUILD_FOR_ARCH, # required + config.SUBMITTED_JOB_COMMENTS_SETTING_JOBDIR, # required # config.SUBMITTED_JOB_COMMENTS_SETTING_AWAITS_RELEASE, # optional config.SUBMITTED_JOB_COMMENTS_SETTING_AWAITS_RELEASE_DELAYED_BEGIN_MSG, # required config.SUBMITTED_JOB_COMMENTS_SETTING_AWAITS_RELEASE_HOLD_RELEASE_MSG, # required @@ -410,14 +413,14 @@ def handle_pull_request_opened_event(self, event_info, pr, req_chatlevel=ChatLev app_name = self.cfg[config.SECTION_GITHUB][config.GITHUB_SETTING_APP_NAME] # TODO check if PR already has a comment with arch targets and # repositories - arch_map = get_architecture_targets(self.cfg) + node_map = get_node_types(self.cfg) comment = f"Instance `{app_name}` is configured to build for:" - for arch in arch_map: + for node in node_map: # Do not print virtual partition names, a bot admin may not want to share those # Instead, just number them comment += f"\n- Partition {arch}:" - current_partition = arch_map[arch] + current_partition = node_map[node] if "os" in current_partition: comment += f"\n - OS: {current_partition['os']}" if "cpu_subdir" in current_partition: @@ -691,7 +694,7 @@ def main(): opts = event_handler_parse() # config is read and checked for settings to raise an exception early when the event_handler starts. - if config.check_required_cfg_settings(REQUIRED_CONFIG): + if config.check_cfg_settings(REQUIRED_CONFIG): print("Configuration check: PASSED") else: print("Configuration check: FAILED") diff --git a/eessi_bot_job_manager.py b/eessi_bot_job_manager.py index 4fcf9af3..fedfb0ba 100644 --- a/eessi_bot_job_manager.py +++ b/eessi_bot_job_manager.py @@ -73,7 +73,6 @@ config.RUNNING_JOB_COMMENTS_SETTING_RUNNING_JOB] # required } - class EESSIBotSoftwareLayerJobManager: """ Class for representing the job manager of the build-and-deploy bot. It @@ -623,7 +622,7 @@ def main(): # config is read and checked for settings to raise an exception early when # the job_manager runs - if config.check_required_cfg_settings(REQUIRED_CONFIG): + if config.check_cfg_settings(REQUIRED_CONFIG): print("Configuration check: PASSED") else: print("Configuration check: FAILED") diff --git a/tasks/build.py b/tasks/build.py index e69aea73..1b229d96 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -24,7 +24,9 @@ from datetime import datetime, timezone import json import os +import re import shutil +import string import sys # Third party imports (anything installed into the local Python environment) @@ -180,26 +182,47 @@ def get_build_env_cfg(cfg): return config_data -def get_architecture_targets(cfg): - """ - Obtain mappings of architecture targets to Slurm parameters +def get_node_types(cfg): + """Obtain mappings of node types to Slurm parameters Args: cfg (ConfigParser): ConfigParser instance holding full configuration (typically read from 'app.cfg') Returns: - (dict): dictionary mapping architecture targets (format - OS/SOFTWARE_SUBDIR) to architecture specific Slurm job submission - parameters + (dict): Dictionary mapping node types names (arbitrary text) node properties + such as the OS, CPU software subdir, supported repositories, accelerator (optionally) + as well as the slurm parameters to allocate such a type of node """ fn = sys._getframe().f_code.co_name - architecture_targets = cfg[config.SECTION_ARCHITECTURETARGETS] + node_types = cfg[config.SECTION_ARCHITECTURETARGETS] + + node_type_map = json.loads(node_types.get(config.NODE_TYPE_MAP)) + log(f"{fn}(): node type map '{json.dumps(node_type_map)}'") + return node_type_map - arch_target_map = json.loads(architecture_targets.get(config.ARCHITECTURETARGETS_SETTING_ARCH_TARGET_MAP)) - log(f"{fn}(): arch target map '{json.dumps(arch_target_map)}'") - return arch_target_map +# Replaced by get_node_types +# def get_architecture_targets(cfg): +# """ +# Obtain mappings of architecture targets to Slurm parameters +# +# Args: +# cfg (ConfigParser): ConfigParser instance holding full configuration +# (typically read from 'app.cfg') +# +# Returns: +# (dict): dictionary mapping architecture targets (format +# OS/SOFTWARE_SUBDIR) to architecture specific Slurm job submission +# parameters +# """ +# fn = sys._getframe().f_code.co_name +# +# architecture_targets = cfg[config.SECTION_ARCHITECTURETARGETS] +# +# arch_target_map = json.loads(architecture_targets.get(config.ARCHITECTURETARGETS_SETTING_ARCH_TARGET_MAP)) +# log(f"{fn}(): arch target map '{json.dumps(arch_target_map)}'") +# return arch_target_map def get_allowed_exportvars(cfg): @@ -572,7 +595,7 @@ def prepare_jobs(pr, cfg, event_info, action_filter, build_params): app_name = cfg[config.SECTION_GITHUB].get(config.GITHUB_SETTING_APP_NAME) build_env_cfg = get_build_env_cfg(cfg) - arch_map = get_architecture_targets(cfg) + node_map = get_node_types(cfg) repocfg = get_repo_cfg(cfg) allowed_exportvars = get_allowed_exportvars(cfg) @@ -625,7 +648,7 @@ def prepare_jobs(pr, cfg, event_info, action_filter, build_params): # }, # 'node_type_name2': { # ... etc - for node_type_name, partition_info in arch_map.items(): + for node_type_name, partition_info in node_map.items(): log(f"{fn}(): node_type_name is {node_type_name}, partition_info is {partition_info}") # Unpack for convenience arch_dir = build_params[BUILD_PARAM_ARCH] @@ -846,7 +869,7 @@ def submit_job(job, cfg): # instances run on the same system job_name = cfg[config.SECTION_BUILDENV].get(config.BUILDENV_SETTING_JOB_NAME) - # add a default time limit of 24h to the job submit command if no other time + # add a default time limit of 24h to the job submit comnand if no other time # limit is specified already all_opts_str = " ".join([build_env_cfg[config.BUILDENV_SETTING_SLURM_PARAMS], job.slurm_opts]) all_opts_list = all_opts_str.split(" ") @@ -981,9 +1004,15 @@ def create_pr_comment(job, job_id, app_name, pr, symlink, build_params): # construct initial job comment buildenv = config.read_config()[config.SECTION_BUILDENV] job_handover_protocol = buildenv.get(config.BUILDENV_SETTING_JOB_HANDOVER_PROTOCOL) - raw_comment_template = submitted_job_comments_cfg[config.SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT] + # NO LONGER NEEDED now that we have cut up the sentence into different config items + # raw_comment_template = submitted_job_comments_cfg[config.SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT] + new_job_instance_repo = submitted_job_comments_cfg[config.SUBMITTED_JOB_COMMENTS_SETTING_INSTANCE_REPO] + build_on_arch = submitted_job_comments_cfg[config.SUBMITTED_JOB_COMMENTS_SETTING_BUILD_ON_ARCH] + build_for_arch = submitted_job_comments_cfg[config.SUBMITTED_JOB_COMMENTS_SETTING_BUILD_FOR_ARCH] + jobdir = submitted_job_comments_cfg[config.SUBMITTED_JOB_COMMENTS_SETTING_JOBDIR] + # NO LONGER NEEDED now that we have cut up the sentence into different config items # Support using escape chars in the INITIAL_COMMENT, that means \n should be interpreted as unicode - initial_comment_template = codecs.decode(raw_comment_template, 'unicode_escape') + # initial_comment_template = codecs.decode(raw_comment_template, 'unicode_escape') if job_handover_protocol == config.JOB_HANDOVER_PROTOCOL_DELAYED_BEGIN: release_msg_string = config.SUBMITTED_JOB_COMMENTS_SETTING_AWAITS_RELEASE_DELAYED_BEGIN_MSG release_comment_template = submitted_job_comments_cfg[release_msg_string] @@ -992,8 +1021,11 @@ def create_pr_comment(job, job_id, app_name, pr, symlink, build_params): poll_interval = int(job_manager_cfg.get(config.JOB_MANAGER_SETTING_POLL_INTERVAL)) delay_factor = float(buildenv.get(config.BUILDENV_SETTING_JOB_DELAY_BEGIN_FACTOR, 2)) eligible_in_seconds = int(poll_interval * delay_factor) - job_comment = (f"{initial_comment_template}" - f"\n|date|job status|comment|\n" + job_comment = (f"{new_job_instance_repo}\n" + f"{build_on_arch}\n" + f"{build_for_arch}\n" + f"{jobdir}\n" + f"|date|job status|comment|\n" f"|----------|----------|------------------------|\n" f"|{dt.strftime('%b %d %X %Z %Y')}|" f"submitted|" @@ -1010,8 +1042,11 @@ def create_pr_comment(job, job_id, app_name, pr, symlink, build_params): else: release_msg_string = config.SUBMITTED_JOB_COMMENTS_SETTING_AWAITS_RELEASE_HOLD_RELEASE_MSG release_comment_template = submitted_job_comments_cfg[release_msg_string] - job_comment = (f"{initial_comment_template}" - f"\n|date|job status|comment|\n" + job_comment = (f"{new_job_instance_repo}\n" + f"{build_on_arch}\n" + f"{build_for_arch}\n" + f"{jobdir}\n" + f"|date|job status|comment|\n" f"|----------|----------|------------------------|\n" f"|{dt.strftime('%b %d %X %Z %Y')}|" f"submitted|" @@ -1125,11 +1160,57 @@ def check_build_permission(pr, event_info): return True +def template_to_regex(format_str, with_eol=True): + """ + Converts a formatting string into a regex that can extract all the formatted + parts of the string. If with_eol is True, it assumes the formatted string is followed by an end-of-line + character. This is a requirement if it has to succesfully match a formatting string that ends with a formatting + field. + + Args: + format_str (string): a formatting string, with template placeholders. + with_eol (bool, optional): a boolean, indicating if the formatting string is expected to be followed by + an end of line character + + """ + + # string.Formatter returns a 4-tuple of literal text, field name, format spec, and conversion + # E.g if format_str = "This is my {app} it is currently {status}" + # formatter = [ + # ("This is my", "app", "", None), + # ("it is currently", "status", "", None), + # ("", None, None, None), + # ] + formatter = string.Formatter() + regex_parts = [] + + for literal_text, field_name, _, _ in formatter.parse(format_str): + # We use re.escape to escape any special characters in the literal_text, as we want to match those literally + regex_parts.append(re.escape(literal_text)) + if field_name is not None: + # Create a non-greedy, named capture group. Note that the name itself as an f-string + # So we get the actual field name as the name of the capture group + # We match any character, but in a non-greedy way. Thus, as soon as it can match the next + # literal text section, it will - thus assuming that that's the end of the field + # We use .* to allow for empty fields (such as the optional accelerator fields) + regex_parts.append(f"(?P<{field_name}>.*?)") + + # Finally, make sure we append a $ to the regex. This is necessary because of our non-greedy matching + # strategy. Otherwise, a formatting string that ends with a formatting item would only match the first letter + # of the field, because it doesn't find anything to match after (and it is non-greedy). With the $, it has + # something to match after the field, thus making sure it matches the whole field + # This does assume that + full_pattern = ''.join(regex_parts) + if with_eol: + full_pattern += "$" + return re.compile(full_pattern) + + def request_bot_build_issue_comments(repo_name, pr_number): """ Query the github API for the issue_comments in a pr. - Archs: + Args: repo_name (string): name of the repository (format USER_OR_ORGANISATION/REPOSITORY) pr_number (int): number og the pr @@ -1139,7 +1220,7 @@ def request_bot_build_issue_comments(repo_name, pr_number): """ fn = sys._getframe().f_code.co_name - status_table = {'arch': [], 'date': [], 'status': [], 'url': [], 'result': []} + status_table = {'on arch': [], 'for arch': [], 'for repo': [], 'date': [], 'status': [], 'url': [], 'result': []} cfg = config.read_config() # for loop because github has max 100 items per request. @@ -1154,19 +1235,55 @@ def request_bot_build_issue_comments(repo_name, pr_number): for comment in comments: # iterate through the comments to find the one where the status of the build was in submitted_job_comments_section = cfg[config.SECTION_SUBMITTED_JOB_COMMENTS] - initial_comment_fmt = submitted_job_comments_section[config.SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT] - if initial_comment_fmt[:20] in comment['body']: + instance_repo_fmt = submitted_job_comments_section[config.SUBMITTED_JOB_COMMENTS_SETTING_INSTANCE_REPO] + instance_repo_re = template_to_regex(instance_repo_fmt) + comment_body = comment['body'].split('\n') + print(f"Matching string {comment_body[0]} with re: {instance_repo_re}") + instance_repo_match = re.match(instance_repo_re, comment_body[0]) + # Check if this body starts with an initial comment from the bot (first item is always the instance + repo + # it is building for) + # Then, check that it has at least 4 lines so that we can safely index up to that number + if instance_repo_match and len(comment_body) >= 4: + print(f"Instance match: {instance_repo_match.groupdict()}") + on_arch_fmt = submitted_job_comments_section[config.SUBMITTED_JOB_COMMENTS_SETTING_BUILD_ON_ARCH] + on_arch_re = template_to_regex(on_arch_fmt) + print(f"Matching string {comment_body[1]} with re: {on_arch_re}") + on_arch_match = re.match(on_arch_re, comment_body[1]) + for_arch_fmt = submitted_job_comments_section[config.SUBMITTED_JOB_COMMENTS_SETTING_BUILD_FOR_ARCH] + for_arch_re = template_to_regex(for_arch_fmt) + print(f"Matching string {comment_body[2]} with re: {for_arch_re}") + for_arch_match = re.match(for_arch_re, comment_body[2]) + print(f"On arch match: {on_arch_match.groupdict()}") + print(f"For arch match: {for_arch_match.groupdict()}") + # Does everything match (it should, if we already had an instance_repo_match, but good to be sure) + if on_arch_match and for_arch_match: + instance_repo_dict = instance_repo_match.groupdict() + on_arch_dict = on_arch_match.groupdict() + for_arch_dict = for_arch_match.groupdict() + # Play it safe + # TODO: probably log something in the 'else' case + if 'on_arch' in on_arch_dict: + status_table['on arch'].append(on_arch_dict['on_arch']) + if 'for_arch' in for_arch_dict: + status_table['for arch'].append(for_arch_dict['for_arch']) + if 'repo_id' in instance_repo_dict: + status_table['for repo'].append(instance_repo_dict['repo_id']) + + # TODO: extract the building on, building for and repository name, and put those in the table + # NOTE: previously, we could use arch. We can't anymore, since we also want the 'for' architecture + # that is not available in this scope + # We should probably split the SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT into different components + # and create a regex out of those components? # get archictecture from comment['body'] - first_line = comment['body'].split('\n')[0] - arch_map = get_architecture_targets(cfg) - for arch in arch_map.keys(): - # drop the first element in arch (which names the OS type) and join the remaining items with '-' - target_arch = '-'.join(arch.split('/')[1:]) - if target_arch in first_line: - status_table['arch'].append(target_arch) - else: - log(f"{fn}(): target_arch '{target_arch}' not found in first line '{first_line}'") + node_map = get_node_types(cfg) + # for arch in node_map.keys(): + # # drop the first element in arch (which names the OS type) and join the remaining items with '-' + # target_arch = '-'.join(arch.split('/')[1:]) + # if target_arch in first_line: + # status_table['arch'].append(target_arch) + # else: + # log(f"{fn}(): target_arch '{target_arch}' not found in first line '{first_line}'") # get date, status, url and result from the markdown table comment_table = comment['body'][comment['body'].find('|'):comment['body'].rfind('|')+1] diff --git a/tools/config.py b/tools/config.py index d0ec6498..ddfe9e3f 100644 --- a/tools/config.py +++ b/tools/config.py @@ -30,7 +30,8 @@ # sectionname_SETTING_settingname for any setting with name settingname in # section sectionname SECTION_ARCHITECTURETARGETS = 'architecturetargets' -ARCHITECTURETARGETS_SETTING_ARCH_TARGET_MAP = 'arch_target_map' +ARCHITECTURETARGETS_SETTING_ARCH_TARGET_MAP = 'arch_target_map' # Obsolete, replaced by NODE_TYPE_MAP +NODE_TYPE_MAP = 'node_type_map' SECTION_BOT_CONTROL = 'bot_control' BOT_CONTROL_SETTING_COMMAND_PERMISSION = 'command_permission' @@ -120,6 +121,10 @@ SUBMITTED_JOB_COMMENTS_SETTING_AWAITS_RELEASE = 'awaits_release' SUBMITTED_JOB_COMMENTS_SETTING_AWAITS_RELEASE_DELAYED_BEGIN_MSG = 'awaits_release_delayed_begin_msg' SUBMITTED_JOB_COMMENTS_SETTING_AWAITS_RELEASE_HOLD_RELEASE_MSG = 'awaits_release_hold_release_msg' +SUBMITTED_JOB_COMMENTS_SETTING_INSTANCE_REPO = 'new_job_instance_repo' +SUBMITTED_JOB_COMMENTS_SETTING_BUILD_ON_ARCH = 'build_on_arch' +SUBMITTED_JOB_COMMENTS_SETTING_BUILD_FOR_ARCH = 'build_for_arch' +SUBMITTED_JOB_COMMENTS_SETTING_JOBDIR = 'jobdir' SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT = 'initial_comment' SUBMITTED_JOB_COMMENTS_SETTING_WITH_ACCELERATOR = 'with_accelerator' @@ -135,6 +140,26 @@ JOB_HANDOVER_PROTOCOL_HOLD_RELEASE } +# Allows us to error on config items that were removed +FORBIDDEN_CONFIG = { + SECTION_ARCHITECTURETARGETS: [ + ( + ARCHITECTURETARGETS_SETTING_ARCH_TARGET_MAP, + f"Config invalid: '{ARCHITECTURETARGETS_SETTING_ARCH_TARGET_MAP}' was removed and replaced by " + f"'{NODE_TYPE_MAP}'. See app.cfg.example for details." + ) + ], + SECTION_SUBMITTED_JOB_COMMENTS: [ + ( + SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT, + f"Config invalid: '{SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT}' was removed and replaced by " + f"'{SUBMITTED_JOB_COMMENTS_SETTING_INSTANCE_REPO}', '{SUBMITTED_JOB_COMMENTS_SETTING_BUILD_ON_ARCH}', " + f"'{SUBMITTED_JOB_COMMENTS_SETTING_BUILD_FOR_ARCH}' and '{SUBMITTED_JOB_COMMENTS_SETTING_JOBDIR}'. " + "See app.cfg.example for details." + ) + ] +} + def read_config(path='app.cfg'): """ @@ -158,10 +183,10 @@ def read_config(path='app.cfg'): return config -def check_required_cfg_settings(req_settings, path="app.cfg"): +def check_cfg_settings(req_settings, path="app.cfg"): """ - Reads the config file, checks if it contains the required settings, - if not logs an error message and exits. + Reads the config file, checks if it contains the required settings, and if it does not contain forbidden + (i.e. removed) settings. If the check fails, logs an error message and exits. Args: req_settings (dict (str, list)): required settings @@ -181,4 +206,14 @@ def check_required_cfg_settings(req_settings, path="app.cfg"): for item in req_settings[section]: if item not in cfg[section]: error(f'Missing configuration item "{item}" in section "{section}" of configuration file {path}.') + + # Check for forbidden arguments + for section in FORBIDDEN_CONFIG: + if section in cfg: + for item in FORBIDDEN_CONFIG[section]: + # First element of the tuple is the forbidden config item, check if its in the section + if item[0] in cfg[section]: + # Item 1 contains a specific error message + error(item[1]) + return True From 697dc6ea1baa432e790756bb9d94e8df9fb3c97d Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 16 Jul 2025 11:29:05 +0200 Subject: [PATCH 23/47] Update the status command to account for the new on:... for:... syntax --- eessi_bot_event_handler.py | 13 ++-- tasks/build.py | 121 +++++++++++++++++++++++++------------ 2 files changed, 93 insertions(+), 41 deletions(-) diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index 9bae8c48..723a6139 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -590,10 +590,12 @@ def handle_bot_command_status(self, event_info, bot_command): comment_status = '' comment_status += "\nThis is the status of all the `bot: build` commands:" - comment_status += "\n|arch|result|date|status|url|" - comment_status += "\n|----|------|----|------|---|" + comment_status += "\n|on|for|repo|result|date|status|url|" + comment_status += "\n|----|----|----|------|----|------|---|" for x in range(0, len(status_table['date'])): - comment_status += f"\n|{status_table['arch'][x]}|" + comment_status += f"\n|{status_table['on arch'][x]}|" + comment_status += f"{status_table['for arch'][x]}|" + comment_status += f"{status_table['for repo'][x]}|" comment_status += f"{status_table['result'][x]}|" comment_status += f"{status_table['date'][x]}|" comment_status += f"{status_table['status'][x]}|" @@ -601,7 +603,10 @@ def handle_bot_command_status(self, event_info, bot_command): self.log(f"Overview of finished builds: comment '{comment_status}'") issue_comment = create_comment(repo_name, pr_number, comment_status, ChatLevels.MINIMAL) - return issue_comment + if issue_comment: + return f"\n - added status comment {issue_comment.html_url}" + else: + return "\n - failed to create status comment" def start(self, app, port=3000): """ diff --git a/tasks/build.py b/tasks/build.py index 1b229d96..f5a64577 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -1206,6 +1206,15 @@ def template_to_regex(format_str, with_eol=True): return re.compile(full_pattern) +class PartialFormatDict(dict): + """ + A dictionary class that allows for missing keys - and will just return {key} in that case. + This can be used to partially format some, but not all placeholders in a formatting string. + """ + def __missing__(self, key): + return "{" + key + "}" + + def request_bot_build_issue_comments(repo_name, pr_number): """ Query the github API for the issue_comments in a pr. @@ -1235,6 +1244,7 @@ def request_bot_build_issue_comments(repo_name, pr_number): for comment in comments: # iterate through the comments to find the one where the status of the build was in submitted_job_comments_section = cfg[config.SECTION_SUBMITTED_JOB_COMMENTS] + accelerator_fmt = submitted_job_comments_section[config.SUBMITTED_JOB_COMMENTS_SETTING_WITH_ACCELERATOR] instance_repo_fmt = submitted_job_comments_section[config.SUBMITTED_JOB_COMMENTS_SETTING_INSTANCE_REPO] instance_repo_re = template_to_regex(instance_repo_fmt) comment_body = comment['body'].split('\n') @@ -1244,46 +1254,83 @@ def request_bot_build_issue_comments(repo_name, pr_number): # it is building for) # Then, check that it has at least 4 lines so that we can safely index up to that number if instance_repo_match and len(comment_body) >= 4: + log(f"{fn}(): found bot build response in issue, processing...") + # First, extract the repo_id print(f"Instance match: {instance_repo_match.groupdict()}") + log(f"{fn}(): found build for repository: {instance_repo_match.group('repo_id')}") + status_table['for repo'].append(instance_repo_match.group('repo_id')) + + # TODO: this unconditionally adds the accelerator_fmt, but that's only needed _if an accelerator was used_ + # We should split these cases. Probably by first doing a match _with_ accelerator (the most specific case) + # If that fails to match, we continue to match without accelerator + + # Then, try to match the architecture we build on. + # First try this including accelerator, to see if one was defined on_arch_fmt = submitted_job_comments_section[config.SUBMITTED_JOB_COMMENTS_SETTING_BUILD_ON_ARCH] - on_arch_re = template_to_regex(on_arch_fmt) - print(f"Matching string {comment_body[1]} with re: {on_arch_re}") - on_arch_match = re.match(on_arch_re, comment_body[1]) + on_arch_fmt_with_accel = on_arch_fmt.format_map(PartialFormatDict(on_accelerator=accelerator_fmt)) + on_arch_re_with_accel = template_to_regex(on_arch_fmt_with_accel) + print(f"Matching string {comment_body[1]} with re: {on_arch_re_with_accel}") + on_arch_match = re.match(on_arch_re_with_accel, comment_body[1]) + if on_arch_match: + # Pattern with accelerator matched, append to status_table + print(f"On arch match: {on_arch_match.groupdict()}") + log(f"{fn}(): found build on architecture: {on_arch_match.group('on_arch')}, " + f"with accelerator {on_arch_match.group('accelerator')}") + status_table['on arch'].append(f"`{on_arch_match.group('on_arch')}`, " + f"`{on_arch_match.group('accelerator')}`") + else: + # Pattern with accelerator did not match, retry without accelerator + on_arch_re = template_to_regex(on_arch_fmt) + print(f"Matching string {comment_body[1]} with re: {on_arch_re}") + on_arch_match = re.match(on_arch_re, comment_body[1]) + if on_arch_match: + # Pattern without accelerator matched, append to status_table + print(f"On arch match: {on_arch_match.groupdict()}") + log(f"{fn}(): found build on architecture: {on_arch_match.group('on_arch')}") + status_table['on arch'].append(f"`{on_arch_match.group('on_arch')}`") + else: + # This shouldn't happen: we had an instance_repo_match, but no match for the 'on architecture' + msg = "Could not match regular expression for extracting the architecture to build on.\n" + msg += "String to be matched:\n" + msg += f"{comment_body[1]}\n" + msg += "First regex attempted:\n" + msg += f"{on_arch_re_with_accel.pattern}\n" + msg += "Second regex attempted:\n" + msg += f"{on_arch_re.pattern}\n" + raise ValueError(msg) + # Now, do the same for the architecture we build for. I.e. first, try to match including accelerator for_arch_fmt = submitted_job_comments_section[config.SUBMITTED_JOB_COMMENTS_SETTING_BUILD_FOR_ARCH] - for_arch_re = template_to_regex(for_arch_fmt) - print(f"Matching string {comment_body[2]} with re: {for_arch_re}") - for_arch_match = re.match(for_arch_re, comment_body[2]) - print(f"On arch match: {on_arch_match.groupdict()}") - print(f"For arch match: {for_arch_match.groupdict()}") - # Does everything match (it should, if we already had an instance_repo_match, but good to be sure) - if on_arch_match and for_arch_match: - instance_repo_dict = instance_repo_match.groupdict() - on_arch_dict = on_arch_match.groupdict() - for_arch_dict = for_arch_match.groupdict() - # Play it safe - # TODO: probably log something in the 'else' case - if 'on_arch' in on_arch_dict: - status_table['on arch'].append(on_arch_dict['on_arch']) - if 'for_arch' in for_arch_dict: - status_table['for arch'].append(for_arch_dict['for_arch']) - if 'repo_id' in instance_repo_dict: - status_table['for repo'].append(instance_repo_dict['repo_id']) - - # TODO: extract the building on, building for and repository name, and put those in the table - # NOTE: previously, we could use arch. We can't anymore, since we also want the 'for' architecture - # that is not available in this scope - # We should probably split the SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT into different components - # and create a regex out of those components? - - # get archictecture from comment['body'] - node_map = get_node_types(cfg) - # for arch in node_map.keys(): - # # drop the first element in arch (which names the OS type) and join the remaining items with '-' - # target_arch = '-'.join(arch.split('/')[1:]) - # if target_arch in first_line: - # status_table['arch'].append(target_arch) - # else: - # log(f"{fn}(): target_arch '{target_arch}' not found in first line '{first_line}'") + for_arch_fmt_with_accel = for_arch_fmt.format_map(PartialFormatDict(for_accelerator=accelerator_fmt)) + for_arch_re_with_accel = template_to_regex(for_arch_fmt_with_accel) + print(f"Matching string {comment_body[2]} with re: {for_arch_re_with_accel}") + for_arch_match = re.match(for_arch_re_with_accel, comment_body[2]) + if for_arch_match: + # Pattern with accelerator matched, append to status_table + print(f"For arch match: {for_arch_match.groupdict()}") + log(f"{fn}(): found build for architecture: {for_arch_match.group('for_arch')}, " + f"with accelerator {for_arch_match.group('accelerator')}") + status_table['for arch'].append(f"`{for_arch_match.group('for_arch')}`, " + f"`{for_arch_match.group('accelerator')}`") + else: + # Pattern with accelerator did not match, retry without accelerator + for_arch_re = template_to_regex(for_arch_fmt) + print(f"Matching string {comment_body[1]} with re: {for_arch_re}") + for_arch_match = re.match(for_arch_re, comment_body[2]) + if for_arch_match: + # Pattern without accelerator matched, append to status_table + print(f"For arch match: {for_arch_match.groupdict()}") + log(f"{fn}(): found build for architecture: {for_arch_match.group('for_arch')}") + status_table['for arch'].append(f"`{for_arch_match.group('for_arch')}`") + else: + # This shouldn't happen: we had an instance_repo_match, but no match for the 'on architecture' + msg = "Could not match regular expression for extracting the architecture to build for.\n" + msg += "String to be matched:\n" + msg += f"{comment_body[2]}\n" + msg += "First regex attempted:\n" + msg += f"{for_arch_re_with_accel.pattern}\n" + msg += "Second regex attempted:\n" + msg += f"{for_arch_re.pattern}\n" + raise ValueError(msg) # get date, status, url and result from the markdown table comment_table = comment['body'][comment['body'].find('|'):comment['body'].rfind('|')+1] From c0fe051b88e8ffd92f1b2f5b98e353c0f82c3cae Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 16 Jul 2025 11:34:04 +0200 Subject: [PATCH 24/47] Remove debugging print statements --- tasks/build.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tasks/build.py b/tasks/build.py index f5a64577..54516b84 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -1248,7 +1248,6 @@ def request_bot_build_issue_comments(repo_name, pr_number): instance_repo_fmt = submitted_job_comments_section[config.SUBMITTED_JOB_COMMENTS_SETTING_INSTANCE_REPO] instance_repo_re = template_to_regex(instance_repo_fmt) comment_body = comment['body'].split('\n') - print(f"Matching string {comment_body[0]} with re: {instance_repo_re}") instance_repo_match = re.match(instance_repo_re, comment_body[0]) # Check if this body starts with an initial comment from the bot (first item is always the instance + repo # it is building for) @@ -1256,7 +1255,6 @@ def request_bot_build_issue_comments(repo_name, pr_number): if instance_repo_match and len(comment_body) >= 4: log(f"{fn}(): found bot build response in issue, processing...") # First, extract the repo_id - print(f"Instance match: {instance_repo_match.groupdict()}") log(f"{fn}(): found build for repository: {instance_repo_match.group('repo_id')}") status_table['for repo'].append(instance_repo_match.group('repo_id')) @@ -1269,11 +1267,9 @@ def request_bot_build_issue_comments(repo_name, pr_number): on_arch_fmt = submitted_job_comments_section[config.SUBMITTED_JOB_COMMENTS_SETTING_BUILD_ON_ARCH] on_arch_fmt_with_accel = on_arch_fmt.format_map(PartialFormatDict(on_accelerator=accelerator_fmt)) on_arch_re_with_accel = template_to_regex(on_arch_fmt_with_accel) - print(f"Matching string {comment_body[1]} with re: {on_arch_re_with_accel}") on_arch_match = re.match(on_arch_re_with_accel, comment_body[1]) if on_arch_match: # Pattern with accelerator matched, append to status_table - print(f"On arch match: {on_arch_match.groupdict()}") log(f"{fn}(): found build on architecture: {on_arch_match.group('on_arch')}, " f"with accelerator {on_arch_match.group('accelerator')}") status_table['on arch'].append(f"`{on_arch_match.group('on_arch')}`, " @@ -1281,11 +1277,9 @@ def request_bot_build_issue_comments(repo_name, pr_number): else: # Pattern with accelerator did not match, retry without accelerator on_arch_re = template_to_regex(on_arch_fmt) - print(f"Matching string {comment_body[1]} with re: {on_arch_re}") on_arch_match = re.match(on_arch_re, comment_body[1]) if on_arch_match: # Pattern without accelerator matched, append to status_table - print(f"On arch match: {on_arch_match.groupdict()}") log(f"{fn}(): found build on architecture: {on_arch_match.group('on_arch')}") status_table['on arch'].append(f"`{on_arch_match.group('on_arch')}`") else: @@ -1302,11 +1296,9 @@ def request_bot_build_issue_comments(repo_name, pr_number): for_arch_fmt = submitted_job_comments_section[config.SUBMITTED_JOB_COMMENTS_SETTING_BUILD_FOR_ARCH] for_arch_fmt_with_accel = for_arch_fmt.format_map(PartialFormatDict(for_accelerator=accelerator_fmt)) for_arch_re_with_accel = template_to_regex(for_arch_fmt_with_accel) - print(f"Matching string {comment_body[2]} with re: {for_arch_re_with_accel}") for_arch_match = re.match(for_arch_re_with_accel, comment_body[2]) if for_arch_match: # Pattern with accelerator matched, append to status_table - print(f"For arch match: {for_arch_match.groupdict()}") log(f"{fn}(): found build for architecture: {for_arch_match.group('for_arch')}, " f"with accelerator {for_arch_match.group('accelerator')}") status_table['for arch'].append(f"`{for_arch_match.group('for_arch')}`, " @@ -1314,11 +1306,9 @@ def request_bot_build_issue_comments(repo_name, pr_number): else: # Pattern with accelerator did not match, retry without accelerator for_arch_re = template_to_regex(for_arch_fmt) - print(f"Matching string {comment_body[1]} with re: {for_arch_re}") for_arch_match = re.match(for_arch_re, comment_body[2]) if for_arch_match: # Pattern without accelerator matched, append to status_table - print(f"For arch match: {for_arch_match.groupdict()}") log(f"{fn}(): found build for architecture: {for_arch_match.group('for_arch')}") status_table['for arch'].append(f"`{for_arch_match.group('for_arch')}`") else: From f179b664667a285e3dceef5728b5c5237e343936 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 16 Jul 2025 11:45:08 +0200 Subject: [PATCH 25/47] Warn about the removal of the repo_target_map --- app.cfg.example | 5 +++++ tools/config.py | 8 ++++++++ 2 files changed, 13 insertions(+) diff --git a/app.cfg.example b/app.cfg.example index 7da0147d..b357fbab 100644 --- a/app.cfg.example +++ b/app.cfg.example @@ -351,6 +351,11 @@ node_type_map = { }} [repo_targets] + +# No longer used, repo targets are now specified per node type in the node_type_map +# repo_target_map = { +# "linux/x86_64/amd/zen2" : ["eessi.io-2023.06-software","eessi.io-2025.06-software"] } + # points to definition of repositories (default repository defined by build container) repos_cfg_dir = PATH_TO_SHARED_DIRECTORY/repos diff --git a/tools/config.py b/tools/config.py index ddfe9e3f..7f814ea4 100644 --- a/tools/config.py +++ b/tools/config.py @@ -111,6 +111,7 @@ NEW_JOB_COMMENTS_SETTING_AWAITS_LAUNCH = 'awaits_launch' SECTION_REPO_TARGETS = 'repo_targets' +REPO_TARGETS_SETTING_REPO_TARGET_MAP = 'repo_target_map' REPO_TARGETS_SETTING_REPOS_CFG_DIR = 'repos_cfg_dir' SECTION_RUNNING_JOB_COMMENTS = 'running_job_comments' @@ -149,6 +150,13 @@ f"'{NODE_TYPE_MAP}'. See app.cfg.example for details." ) ], + SECTION_REPO_TARGETS: [ + ( + REPO_TARGETS_SETTING_REPO_TARGET_MAP, + f"Config invalid: '{REPO_TARGETS_SETTING_REPO_TARGET_MAP} was removed. Repository targets can now be " + f"specified within the '{NODE_TYPE_MAP}' dictionary. See app.cfg.example for details." + ) + ], SECTION_SUBMITTED_JOB_COMMENTS: [ ( SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT, From aad663e189f5af3aa7c798ef863f398960380371 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 16 Jul 2025 11:45:41 +0200 Subject: [PATCH 26/47] Fix typo --- app.cfg.example | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app.cfg.example b/app.cfg.example index b357fbab..63d3155a 100644 --- a/app.cfg.example +++ b/app.cfg.example @@ -305,7 +305,7 @@ signing = [architecturetargets] -# arch_target_map has been replaced by node_typ_map +# arch_target_map has been replaced by node_type_map # arch_target_map = { # } From be8c7d0553004829891b2f0cd1a4c47e6915b9ab Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 16 Jul 2025 11:48:18 +0200 Subject: [PATCH 27/47] Fix hound issues --- eessi_bot_event_handler.py | 4 +--- tasks/build.py | 9 +++------ 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index 723a6139..7287d533 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -417,9 +417,7 @@ def handle_pull_request_opened_event(self, event_info, pr, req_chatlevel=ChatLev comment = f"Instance `{app_name}` is configured to build for:" for node in node_map: - # Do not print virtual partition names, a bot admin may not want to share those - # Instead, just number them - comment += f"\n- Partition {arch}:" + comment += f"\n- Partition {node}:" current_partition = node_map[node] if "os" in current_partition: comment += f"\n - OS: {current_partition['os']}" diff --git a/tasks/build.py b/tasks/build.py index 54516b84..71d55de9 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -20,7 +20,6 @@ # Standard library imports from collections import namedtuple import configparser -import codecs from datetime import datetime, timezone import json import os @@ -1254,14 +1253,11 @@ def request_bot_build_issue_comments(repo_name, pr_number): # Then, check that it has at least 4 lines so that we can safely index up to that number if instance_repo_match and len(comment_body) >= 4: log(f"{fn}(): found bot build response in issue, processing...") + # First, extract the repo_id log(f"{fn}(): found build for repository: {instance_repo_match.group('repo_id')}") status_table['for repo'].append(instance_repo_match.group('repo_id')) - # TODO: this unconditionally adds the accelerator_fmt, but that's only needed _if an accelerator was used_ - # We should split these cases. Probably by first doing a match _with_ accelerator (the most specific case) - # If that fails to match, we continue to match without accelerator - # Then, try to match the architecture we build on. # First try this including accelerator, to see if one was defined on_arch_fmt = submitted_job_comments_section[config.SUBMITTED_JOB_COMMENTS_SETTING_BUILD_ON_ARCH] @@ -1292,6 +1288,7 @@ def request_bot_build_issue_comments(repo_name, pr_number): msg += "Second regex attempted:\n" msg += f"{on_arch_re.pattern}\n" raise ValueError(msg) + # Now, do the same for the architecture we build for. I.e. first, try to match including accelerator for_arch_fmt = submitted_job_comments_section[config.SUBMITTED_JOB_COMMENTS_SETTING_BUILD_FOR_ARCH] for_arch_fmt_with_accel = for_arch_fmt.format_map(PartialFormatDict(for_accelerator=accelerator_fmt)) @@ -1302,7 +1299,7 @@ def request_bot_build_issue_comments(repo_name, pr_number): log(f"{fn}(): found build for architecture: {for_arch_match.group('for_arch')}, " f"with accelerator {for_arch_match.group('accelerator')}") status_table['for arch'].append(f"`{for_arch_match.group('for_arch')}`, " - f"`{for_arch_match.group('accelerator')}`") + f"`{for_arch_match.group('accelerator')}`") else: # Pattern with accelerator did not match, retry without accelerator for_arch_re = template_to_regex(for_arch_fmt) From 7f766f4b3639304f8c03fec1daf4a382c7505ddf Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 16 Jul 2025 11:49:48 +0200 Subject: [PATCH 28/47] Format releveant output of show_config as code --- eessi_bot_event_handler.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index 7287d533..a14ace5c 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -417,16 +417,16 @@ def handle_pull_request_opened_event(self, event_info, pr, req_chatlevel=ChatLev comment = f"Instance `{app_name}` is configured to build for:" for node in node_map: - comment += f"\n- Partition {node}:" + comment += f"\n- Partition `{node}`:" current_partition = node_map[node] if "os" in current_partition: - comment += f"\n - OS: {current_partition['os']}" + comment += f"\n - OS: `{current_partition['os']}`" if "cpu_subdir" in current_partition: - comment += f"\n - CPU architecture: {current_partition['cpu_subdir']}" + comment += f"\n - CPU architecture: `{current_partition['cpu_subdir']}`" if "repo_targets" in current_partition: - comment += f"\n - Repositories: {current_partition['repo_targets']}" + comment += f"\n - Repositories: `{current_partition['repo_targets']}`" if "accel" in current_partition: - comment += f"\n - Accelerators: {current_partition['accel']}" + comment += f"\n - Accelerators: `{current_partition['accel']}`" comment += "\n" self.log(f"PR opened: comment '{comment}'") From d205598c787f9162de5b75afaf5784b0051cab40 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 16 Jul 2025 11:50:40 +0200 Subject: [PATCH 29/47] Rephrase to make things more clear --- eessi_bot_event_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index a14ace5c..d7beca4d 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -415,7 +415,7 @@ def handle_pull_request_opened_event(self, event_info, pr, req_chatlevel=ChatLev # repositories node_map = get_node_types(self.cfg) - comment = f"Instance `{app_name}` is configured to build for:" + comment = f"Instance `{app_name}` is configured to build on:" for node in node_map: comment += f"\n- Partition `{node}`:" current_partition = node_map[node] From ebcc7fd9c801ac6b6a8d5168cdbac9965799266e Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 16 Jul 2025 11:56:51 +0200 Subject: [PATCH 30/47] Forgot to add this new file... --- tools/build_params.py | 75 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 tools/build_params.py diff --git a/tools/build_params.py b/tools/build_params.py new file mode 100644 index 00000000..81bc7bc5 --- /dev/null +++ b/tools/build_params.py @@ -0,0 +1,75 @@ +# This file is part of the EESSI build-and-deploy bot, +# see https://github.com/EESSI/eessi-bot-software-layer +# +# The bot helps with requests to add software installations to the +# EESSI software layer, see https://github.com/EESSI/software-layer +# +# author: Caspar van Leeuwen +# +# license: GPLv2 +# + +from tools.filter import FILTER_COMPONENT_ACCEL, FILTER_COMPONENT_ARCH + +# Define these constants with the same values. We want the arguments passed to +# on: and for: to use the same keywords +BUILD_PARAM_ACCEL = FILTER_COMPONENT_ACCEL +BUILD_PARAM_ARCH = FILTER_COMPONENT_ARCH +BUILD_PARAMS = [ + BUILD_PARAM_ACCEL, + BUILD_PARAM_ARCH +] + +class EESSIBotBuildParamsValueError(Exception): + """ + Exception to be raised when an inappropriate value is specified for a build parameter + """ + pass + +class EESSIBotBuildParamsNameError(Exception): + """ + Exception to be raised when an unkown build parameter name is specified + """ + pass + +class EESSIBotBuildParams(dict): + """ + Class for representing build parameters. Essentially, this is a dictionary class + but with some additional parsing for the constructor + """ + def __init__(self, build_parameters): + """ + EESSIBotBuildParams constructor + + Args: + build_params (string): string containing comma separated build parameters + Example: "arch:amd/zen4,accel:nvidia/cc90" + + Raises: + EESSIBotBuildParamsNameError: raised if parsing an unknown build parameter + string + EESSIBotBuildParamsValueError: raised if an invalid value is passed for a build parameter + """ + build_param_dict = {} + + # Loop over defined build parameters argument + build_params_list = build_parameters.split(',') + for item in build_params_list: + # Separate build parameter name and value + build_param = item.split('=') + if len(build_param) != 2: + msg = f"Expected argument {item} to be split into exactly two parts when splitting by '=', " + msg += f"but the number of items after splitting is {len(build_param)}" + raise EESSIBotBuildParamsValueError(msg) + param_found = False + for full_param_name in BUILD_PARAMS: + # Identify which build param we are matching + if full_param_name.startswith(build_param[0]): + param_found = True + # Store the value of the build parameter by it's full name + build_param_dict[full_param_name] = build_param[1] + if not param_found: + msg = f"Build parameter {build_param[0]} not found. Known build parameters: {BUILD_PARAMS}" + raise EESSIBotBuildParamsNameError(msg) + + super().__init__(build_param_dict) From 0a8bc9b9465d15059139d3742cbf91ec79208a9f Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 16 Jul 2025 11:58:15 +0200 Subject: [PATCH 31/47] Fix hound issues --- tools/build_params.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/build_params.py b/tools/build_params.py index 81bc7bc5..4ba34886 100644 --- a/tools/build_params.py +++ b/tools/build_params.py @@ -20,18 +20,21 @@ BUILD_PARAM_ARCH ] + class EESSIBotBuildParamsValueError(Exception): """ Exception to be raised when an inappropriate value is specified for a build parameter """ pass + class EESSIBotBuildParamsNameError(Exception): """ Exception to be raised when an unkown build parameter name is specified """ pass + class EESSIBotBuildParams(dict): """ Class for representing build parameters. Essentially, this is a dictionary class @@ -65,9 +68,9 @@ def __init__(self, build_parameters): for full_param_name in BUILD_PARAMS: # Identify which build param we are matching if full_param_name.startswith(build_param[0]): - param_found = True - # Store the value of the build parameter by it's full name - build_param_dict[full_param_name] = build_param[1] + param_found = True + # Store the value of the build parameter by it's full name + build_param_dict[full_param_name] = build_param[1] if not param_found: msg = f"Build parameter {build_param[0]} not found. Known build parameters: {BUILD_PARAMS}" raise EESSIBotBuildParamsNameError(msg) From 81257dbb5f690745c299c5653ebeb0eeb4e5731a Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 16 Jul 2025 12:05:08 +0200 Subject: [PATCH 32/47] Update build params call signature --- tests/test_task_build.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/tests/test_task_build.py b/tests/test_task_build.py index 1c289947..eef91879 100644 --- a/tests/test_task_build.py +++ b/tests/test_task_build.py @@ -29,6 +29,7 @@ # Local application imports (anything from EESSI/eessi-bot-software-layer) from tasks.build import Job, create_pr_comment from tools import run_cmd, run_subprocess +from tools.build_params import EESSIBotBuildParams from tools.job_metadata import create_metadata_file, read_metadata_file from tools.pr_comments import PRComment, get_submitted_job_comment @@ -287,6 +288,7 @@ def test_create_pr_comment_succeeds(monkeypatch, mocked_github, tmpdir): ym = datetime.today().strftime('%Y.%m') pr_number = 1 job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic") + build_params = EESSIBotBuildParams("arch:amd/zen4,accel:nvidia/cc90") job_id = "123" app_name = "pytest" @@ -295,7 +297,7 @@ def test_create_pr_comment_succeeds(monkeypatch, mocked_github, tmpdir): repo = mocked_github.get_repo(repo_name) pr = repo.get_pull(pr_number) symlink = "/symlink" - comment = create_pr_comment(job, job_id, app_name, pr, symlink) + comment = create_pr_comment(job, job_id, app_name, pr, symlink, build_params) assert comment.id == 1 # check if created comment includes jobid? print("VERIFYING PR COMMENT") @@ -317,6 +319,7 @@ def test_create_pr_comment_succeeds_none(monkeypatch, mocked_github, tmpdir): ym = datetime.today().strftime('%Y.%m') pr_number = 1 job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic") + build_params = EESSIBotBuildParams("arch:amd/zen4,accel:nvidia/cc90") job_id = "123" app_name = "pytest" @@ -325,7 +328,7 @@ def test_create_pr_comment_succeeds_none(monkeypatch, mocked_github, tmpdir): repo = mocked_github.get_repo(repo_name) pr = repo.get_pull(pr_number) symlink = "/symlink" - comment = create_pr_comment(job, job_id, app_name, pr, symlink) + comment = create_pr_comment(job, job_id, app_name, pr, symlink, build_params) assert comment is None @@ -343,6 +346,7 @@ def test_create_pr_comment_raises_once_then_succeeds(monkeypatch, mocked_github, ym = datetime.today().strftime('%Y.%m') pr_number = 1 job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic") + build_params = EESSIBotBuildParams("arch:amd/zen4,accel:nvidia/cc90") job_id = "123" app_name = "pytest" @@ -351,7 +355,7 @@ def test_create_pr_comment_raises_once_then_succeeds(monkeypatch, mocked_github, repo = mocked_github.get_repo(repo_name) pr = repo.get_pull(pr_number) symlink = "/symlink" - comment = create_pr_comment(job, job_id, app_name, pr, symlink) + comment = create_pr_comment(job, job_id, app_name, pr, symlink, build_params) assert comment.id == 1 assert pr.create_call_count == 2 @@ -369,6 +373,7 @@ def test_create_pr_comment_always_raises(monkeypatch, mocked_github, tmpdir): ym = datetime.today().strftime('%Y.%m') pr_number = 1 job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic") + build_params = EESSIBotBuildParams("arch:amd/zen4,accel:nvidia/cc90") job_id = "123" app_name = "pytest" @@ -378,7 +383,7 @@ def test_create_pr_comment_always_raises(monkeypatch, mocked_github, tmpdir): pr = repo.get_pull(pr_number) symlink = "/symlink" with pytest.raises(Exception) as err: - create_pr_comment(job, job_id, app_name, pr, symlink) + create_pr_comment(job, job_id, app_name, pr, symlink, build_params) assert err.type == CreateIssueCommentException assert pr.create_call_count == 3 @@ -396,6 +401,7 @@ def test_create_pr_comment_three_raises(monkeypatch, mocked_github, tmpdir): ym = datetime.today().strftime('%Y.%m') pr_number = 1 job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic") + build_params = EESSIBotBuildParams("arch:amd/zen4,accel:nvidia/cc90") job_id = "123" app_name = "pytest" @@ -405,7 +411,7 @@ def test_create_pr_comment_three_raises(monkeypatch, mocked_github, tmpdir): pr = repo.get_pull(pr_number) symlink = "/symlink" with pytest.raises(Exception) as err: - create_pr_comment(job, job_id, app_name, pr, symlink) + create_pr_comment(job, job_id, app_name, pr, symlink, build_params) assert err.type == CreateIssueCommentException assert pr.create_call_count == 3 From f974463389222e2b44bea895a480637e802e3f28 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 16 Jul 2025 12:07:33 +0200 Subject: [PATCH 33/47] Fix example argument, and argument used to create build parameters in the test --- tools/build_params.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/build_params.py b/tools/build_params.py index 4ba34886..05eb63eb 100644 --- a/tools/build_params.py +++ b/tools/build_params.py @@ -45,8 +45,8 @@ def __init__(self, build_parameters): EESSIBotBuildParams constructor Args: - build_params (string): string containing comma separated build parameters - Example: "arch:amd/zen4,accel:nvidia/cc90" + build_parameters (string): string containing comma separated build parameters + Example: "arch=amd/zen4,accel=nvidia/cc90" Raises: EESSIBotBuildParamsNameError: raised if parsing an unknown build parameter From 4104796084c244e516211e27d58c47c66d942e1e Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 16 Jul 2025 12:09:50 +0200 Subject: [PATCH 34/47] Forgot to actually git add this file again... anyway, updated the syntax for the string to create build parameter objects --- tests/test_task_build.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_task_build.py b/tests/test_task_build.py index eef91879..af49ac9b 100644 --- a/tests/test_task_build.py +++ b/tests/test_task_build.py @@ -288,7 +288,7 @@ def test_create_pr_comment_succeeds(monkeypatch, mocked_github, tmpdir): ym = datetime.today().strftime('%Y.%m') pr_number = 1 job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic") - build_params = EESSIBotBuildParams("arch:amd/zen4,accel:nvidia/cc90") + build_params = EESSIBotBuildParams("arch=amd/zen4,accel=nvidia/cc90") job_id = "123" app_name = "pytest" @@ -319,7 +319,7 @@ def test_create_pr_comment_succeeds_none(monkeypatch, mocked_github, tmpdir): ym = datetime.today().strftime('%Y.%m') pr_number = 1 job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic") - build_params = EESSIBotBuildParams("arch:amd/zen4,accel:nvidia/cc90") + build_params = EESSIBotBuildParams("arch=amd/zen4,accel=nvidia/cc90") job_id = "123" app_name = "pytest" @@ -346,7 +346,7 @@ def test_create_pr_comment_raises_once_then_succeeds(monkeypatch, mocked_github, ym = datetime.today().strftime('%Y.%m') pr_number = 1 job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic") - build_params = EESSIBotBuildParams("arch:amd/zen4,accel:nvidia/cc90") + build_params = EESSIBotBuildParams("arch=amd/zen4,accel=nvidia/cc90") job_id = "123" app_name = "pytest" @@ -373,7 +373,7 @@ def test_create_pr_comment_always_raises(monkeypatch, mocked_github, tmpdir): ym = datetime.today().strftime('%Y.%m') pr_number = 1 job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic") - build_params = EESSIBotBuildParams("arch:amd/zen4,accel:nvidia/cc90") + build_params = EESSIBotBuildParams("arch=amd/zen4,accel=nvidia/cc90") job_id = "123" app_name = "pytest" @@ -401,7 +401,7 @@ def test_create_pr_comment_three_raises(monkeypatch, mocked_github, tmpdir): ym = datetime.today().strftime('%Y.%m') pr_number = 1 job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic") - build_params = EESSIBotBuildParams("arch:amd/zen4,accel:nvidia/cc90") + build_params = EESSIBotBuildParams("arch=amd/zen4,accel=nvidia/cc90") job_id = "123" app_name = "pytest" From 0b82386098b5952cb9f67d3d75c8dafd020f800f Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 16 Jul 2025 12:16:44 +0200 Subject: [PATCH 35/47] Update the app.cfg used for the unit tests to account for the changes in this PR --- tests/test_app.cfg | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_app.cfg b/tests/test_app.cfg index 4f833bbf..56c1d6cc 100644 --- a/tests/test_app.cfg +++ b/tests/test_app.cfg @@ -21,7 +21,10 @@ job_handover_protocol = hold_release awaits_release = job id `{job_id}` awaits release by job manager awaits_release_delayed_begin_msg = job id `{job_id}` will be eligible to start in about {delay_seconds} seconds awaits_release_hold_release_msg = job id `{job_id}` awaits release by job manager -initial_comment = New job on instance `{app_name}` for CPU micro-architecture `{arch_name}`{accelerator_spec} for repository `{repo_id}` in job dir `{symlink}` +new_job_instance_repo = New job on instance `{app_name}` for repository `{repo_id}` +build_on_arch = Building on: `{on_arch}`{on_accelerator} +build_for_arch = Building for: `{for_arch}`{for_accelerator} +jobdir = Job dir: `{symlink}` with_accelerator =  and accelerator `{accelerator}` [new_job_comments] From 372a7fe36b14486cbbe2eeeb20c5e2f703c9dfcf Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 16 Jul 2025 12:36:00 +0200 Subject: [PATCH 36/47] Update tests for new requirement that all filters have to be present in context --- tests/test_tools_filter.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tests/test_tools_filter.py b/tests/test_tools_filter.py index b689aa60..3684046a 100644 --- a/tests/test_tools_filter.py +++ b/tests/test_tools_filter.py @@ -231,21 +231,27 @@ def test_match_empty_context(complex_filter): assert expected == actual -def test_match_architecture_context(complex_filter): +# Test if it matches a context that does NOT contain all components +def test_match_sparse_context(complex_filter): context = {"architecture": "x86_64/intel/cascadelake"} - expected = True + expected = False actual = complex_filter.check_filters(context) assert expected == actual - -def test_match_architecture_job_context(complex_filter): - context = {"architecture": "x86_64/intel/cascadelake", "job": 1234} +def test_matching_context(complex_filter): + context = {"architecture": "x86_64/intel/cascadelake", "repository": "nessi.no-2022.A", "instance": "A"} expected = True actual = complex_filter.check_filters(context) assert expected == actual +def test_non_match_archictecture_context(complex_filter): + context = {"architecture": "x86_64/amd/zen4", "repository": "EESSI", "instance": "mybot", "job": 1234} + expected = False + actual = complex_filter.check_filters(context) + assert expected == actual + -def test_non_match_architecture_repository_context(complex_filter): +def test_non_match_repository_context(complex_filter): context = {"architecture": "x86_64/intel/cascadelake", "repository": "EESSI"} expected = False actual = complex_filter.check_filters(context) From d2be02a2a20141319a8380d694a7482420f97ab9 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 16 Jul 2025 12:40:58 +0200 Subject: [PATCH 37/47] Update tests to accomodate for new behaviour of filter checking that all filter components need to be present in the context for it to be a (possible) match --- tests/test_tools_filter.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/tests/test_tools_filter.py b/tests/test_tools_filter.py index 3684046a..ffee4b9e 100644 --- a/tests/test_tools_filter.py +++ b/tests/test_tools_filter.py @@ -231,19 +231,21 @@ def test_match_empty_context(complex_filter): assert expected == actual -# Test if it matches a context that does NOT contain all components +# A context lacking keys for components in the filter shouldn't match def test_match_sparse_context(complex_filter): context = {"architecture": "x86_64/intel/cascadelake"} expected = False actual = complex_filter.check_filters(context) assert expected == actual + def test_matching_context(complex_filter): context = {"architecture": "x86_64/intel/cascadelake", "repository": "nessi.no-2022.A", "instance": "A"} expected = True actual = complex_filter.check_filters(context) assert expected == actual + def test_non_match_archictecture_context(complex_filter): context = {"architecture": "x86_64/amd/zen4", "repository": "EESSI", "instance": "mybot", "job": 1234} expected = False @@ -252,11 +254,25 @@ def test_non_match_archictecture_context(complex_filter): def test_non_match_repository_context(complex_filter): - context = {"architecture": "x86_64/intel/cascadelake", "repository": "EESSI"} + context = {"architecture": "x86_64/intel/cascadelake", "repository": "EESSI", "instance": "A"} expected = False actual = complex_filter.check_filters(context) assert expected == actual +def test_non_match_instance_context(complex_filter): + context = {"architecture": "x86_64/intel/cascadelake", "repository": "nessi.no-2022.A", "instance": "B"} + expected = False + actual = complex_filter.check_filters(context) + assert expected == actual + + +# If additional keys are present in the context for which no filter component is defined +# it should not prevent a match +def test_match_additional_context(complex_filter): + context = {"architecture": "x86_64/intel/cascadelake", "repository": "nessi.no-2022.A", "instance": "A", "job": 1234} + expected = True + actual = complex_filter.check_filters(context) + assert expected == actual @pytest.fixture def arch_filter_slash_syntax(): From 6b3a118656ef90bb69ba60c181bcc6580278265b Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 16 Jul 2025 12:41:44 +0200 Subject: [PATCH 38/47] Fix hound issues --- tests/test_tools_filter.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_tools_filter.py b/tests/test_tools_filter.py index ffee4b9e..df464659 100644 --- a/tests/test_tools_filter.py +++ b/tests/test_tools_filter.py @@ -269,11 +269,13 @@ def test_non_match_instance_context(complex_filter): # If additional keys are present in the context for which no filter component is defined # it should not prevent a match def test_match_additional_context(complex_filter): - context = {"architecture": "x86_64/intel/cascadelake", "repository": "nessi.no-2022.A", "instance": "A", "job": 1234} + context = {"architecture": "x86_64/intel/cascadelake", "repository": "nessi.no-2022.A", "instance": "A", + "job": 1234} expected = True actual = complex_filter.check_filters(context) assert expected == actual + @pytest.fixture def arch_filter_slash_syntax(): af = EESSIBotActionFilter("") From 3b310f5fdec375ed64899edbe9c47e2b4b85401d Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 16 Jul 2025 12:43:24 +0200 Subject: [PATCH 39/47] Fix flake8 issues --- eessi_bot_job_manager.py | 1 + tests/test_tools_filter.py | 1 + tools/commands.py | 3 ++- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/eessi_bot_job_manager.py b/eessi_bot_job_manager.py index fedfb0ba..1efb9d85 100644 --- a/eessi_bot_job_manager.py +++ b/eessi_bot_job_manager.py @@ -73,6 +73,7 @@ config.RUNNING_JOB_COMMENTS_SETTING_RUNNING_JOB] # required } + class EESSIBotSoftwareLayerJobManager: """ Class for representing the job manager of the build-and-deploy bot. It diff --git a/tests/test_tools_filter.py b/tests/test_tools_filter.py index df464659..c6516cf7 100644 --- a/tests/test_tools_filter.py +++ b/tests/test_tools_filter.py @@ -259,6 +259,7 @@ def test_non_match_repository_context(complex_filter): actual = complex_filter.check_filters(context) assert expected == actual + def test_non_match_instance_context(complex_filter): context = {"architecture": "x86_64/intel/cascadelake", "repository": "nessi.no-2022.A", "instance": "B"} expected = False diff --git a/tools/commands.py b/tools/commands.py index 360279f5..a44e1fc8 100644 --- a/tools/commands.py +++ b/tools/commands.py @@ -20,6 +20,7 @@ from tools.filter import EESSIBotActionFilter, EESSIBotActionFilterError from tools.build_params import EESSIBotBuildParams + def contains_any_bot_command(body): """ Checks if argument contains any bot command. @@ -120,7 +121,7 @@ def __init__(self, cmd_str): for arg in cmd_as_list[1:]: if arg.startswith('for:'): # Extract everything after the 'for:' suffix and split by comma - filter_content=arg[4:] + filter_content = arg[4:] target_args.extend(filter_content.split(',')) # Join the filter arguments and pass to EESSIBotActionFilter From de0bd1c96cc7e2ba2e42c04c95bfbc07480ddec3 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Mon, 21 Jul 2025 11:03:12 +0200 Subject: [PATCH 40/47] Removed some comments that were only there for development, no longer needed --- tasks/build.py | 29 +---------------------------- 1 file changed, 1 insertion(+), 28 deletions(-) diff --git a/tasks/build.py b/tasks/build.py index 71d55de9..89084867 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -201,28 +201,6 @@ def get_node_types(cfg): log(f"{fn}(): node type map '{json.dumps(node_type_map)}'") return node_type_map -# Replaced by get_node_types -# def get_architecture_targets(cfg): -# """ -# Obtain mappings of architecture targets to Slurm parameters -# -# Args: -# cfg (ConfigParser): ConfigParser instance holding full configuration -# (typically read from 'app.cfg') -# -# Returns: -# (dict): dictionary mapping architecture targets (format -# OS/SOFTWARE_SUBDIR) to architecture specific Slurm job submission -# parameters -# """ -# fn = sys._getframe().f_code.co_name -# -# architecture_targets = cfg[config.SECTION_ARCHITECTURETARGETS] -# -# arch_target_map = json.loads(architecture_targets.get(config.ARCHITECTURETARGETS_SETTING_ARCH_TARGET_MAP)) -# log(f"{fn}(): arch target map '{json.dumps(arch_target_map)}'") -# return arch_target_map - def get_allowed_exportvars(cfg): """ @@ -868,7 +846,7 @@ def submit_job(job, cfg): # instances run on the same system job_name = cfg[config.SECTION_BUILDENV].get(config.BUILDENV_SETTING_JOB_NAME) - # add a default time limit of 24h to the job submit comnand if no other time + # add a default time limit of 24h to the job submit command if no other time # limit is specified already all_opts_str = " ".join([build_env_cfg[config.BUILDENV_SETTING_SLURM_PARAMS], job.slurm_opts]) all_opts_list = all_opts_str.split(" ") @@ -1003,15 +981,10 @@ def create_pr_comment(job, job_id, app_name, pr, symlink, build_params): # construct initial job comment buildenv = config.read_config()[config.SECTION_BUILDENV] job_handover_protocol = buildenv.get(config.BUILDENV_SETTING_JOB_HANDOVER_PROTOCOL) - # NO LONGER NEEDED now that we have cut up the sentence into different config items - # raw_comment_template = submitted_job_comments_cfg[config.SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT] new_job_instance_repo = submitted_job_comments_cfg[config.SUBMITTED_JOB_COMMENTS_SETTING_INSTANCE_REPO] build_on_arch = submitted_job_comments_cfg[config.SUBMITTED_JOB_COMMENTS_SETTING_BUILD_ON_ARCH] build_for_arch = submitted_job_comments_cfg[config.SUBMITTED_JOB_COMMENTS_SETTING_BUILD_FOR_ARCH] jobdir = submitted_job_comments_cfg[config.SUBMITTED_JOB_COMMENTS_SETTING_JOBDIR] - # NO LONGER NEEDED now that we have cut up the sentence into different config items - # Support using escape chars in the INITIAL_COMMENT, that means \n should be interpreted as unicode - # initial_comment_template = codecs.decode(raw_comment_template, 'unicode_escape') if job_handover_protocol == config.JOB_HANDOVER_PROTOCOL_DELAYED_BEGIN: release_msg_string = config.SUBMITTED_JOB_COMMENTS_SETTING_AWAITS_RELEASE_DELAYED_BEGIN_MSG release_comment_template = submitted_job_comments_cfg[release_msg_string] From d4ecc7be4561057de41701a3b6cfef3c85009d29 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen <33718780+casparvl@users.noreply.github.com> Date: Thu, 24 Jul 2025 14:51:43 +0200 Subject: [PATCH 41/47] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clarify descriptions in app.cfg.example Co-authored-by: Bob Dröge --- app.cfg.example | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/app.cfg.example b/app.cfg.example index 63d3155a..53a06ed4 100644 --- a/app.cfg.example +++ b/app.cfg.example @@ -309,18 +309,21 @@ signing = # arch_target_map = { # } -# Each entry in node_type_map describes a node type: os, what CPU architecture it has (cpu_subdir) the SLURM parameters -# that need to be passed to submit to it, which repository targets (repo_targets) can be build for on this node type -# and (optionally) which accelerators ('accel') -# All are strings, except repo_targets, which is a list of strings +# Each entry in the node_type_map dictionary describes a build node type. The key is a (descriptive) name for this build node, and its value is a dictionary containing the following build node properties as key-value pairs: + - os: its operating system (os) + - cpu_subdir: its CPU architecture + - slurm_params: the SLURM parameters that need to be passed to submit jobs to it + - repo_targets: supported repository targets for this node type + - accel (optional): which accelerators this node has +# All are strings, except repo_targets, which is a list of strings. # Note that the Slurm parameters should typically be chosen such that a single type of node (with one specific type of # CPU and one specific type of GPU) should be allocated. # Below is an example configuration for a system that contains 4 types of nodes: zen2 CPU nodes, zen4 CPU nodes, -# GPU nodes with an icelake CPU and A100 GPU, GPu nodes with a zen4 CPU and an H100 GPU -# The 'on:' argument to the bot build command determines which node type will be allocated for the build job -# E.g. 'bot:build on:arch=zen4,accel=nvidia/cc90 for:...' will match the gpu_h100 node type below -# If no 'on:' argument is passed to the build command, the 'for:' argument is used instead -# E.g. 'bot:build for:arch=icelake,accel=nvidia/cc80' will match the gpu_a100 node type below +# GPU nodes with an icelake CPU and A100 GPU, GPU nodes with a zen4 CPU and an H100 GPU. +# The 'on:' argument to the bot build command determines which node type will be allocated for the build job, +# e.g. 'bot:build on:arch=zen4,accel=nvidia/cc90 for:...' will match the gpu_h100 node type below. +# If no 'on:' argument is passed to the build command, the 'for:' argument is used instead, +# e.g. 'bot:build for:arch=icelake,accel=nvidia/cc80' will match the gpu_a100 node type below. node_type_map = { "cpu_zen2": { "os": "linux", From af731e19bba67ff16937454a8943bf1769c7dc86 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Mon, 28 Jul 2025 16:53:00 +0200 Subject: [PATCH 42/47] Re-comment the awaits_release, as this was done in develop as well. This option is no longe rused --- app.cfg.example | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app.cfg.example b/app.cfg.example index 53a06ed4..62caa332 100644 --- a/app.cfg.example +++ b/app.cfg.example @@ -396,7 +396,8 @@ scontrol_command = /usr/bin/scontrol # are removed, the output (in PR comments) will lack important # information. [submitted_job_comments] -awaits_release = job id `{job_id}` awaits release by job manager +# awaits_release is no longer used since bot release v0.7.0 +# awaits_release = job id `{job_id}` awaits release by job manager awaits_release_delayed_begin_msg = job id `{job_id}` will be eligible to start in about {delay_seconds} seconds awaits_release_hold_release_msg = job id `{job_id}` awaits release by job manager new_job_instance_repo = New job on instance `{app_name}` for repository `{repo_id}` From d48b35553fd38ef83dda6e85bf81dee453e2f25b Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Mon, 28 Jul 2025 17:02:50 +0200 Subject: [PATCH 43/47] Replace Partition with Node type in show_config output. Also, update docstring for status command to show the correct return type (string) --- eessi_bot_event_handler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index d7beca4d..ffd5211d 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -417,7 +417,7 @@ def handle_pull_request_opened_event(self, event_info, pr, req_chatlevel=ChatLev comment = f"Instance `{app_name}` is configured to build on:" for node in node_map: - comment += f"\n- Partition `{node}`:" + comment += f"\n- Node type `{node}`:" current_partition = node_map[node] if "os" in current_partition: comment += f"\n - OS: `{current_partition['os']}`" @@ -578,8 +578,8 @@ def handle_bot_command_status(self, event_info, bot_command): bot_command (EESSIBotCommand): command to be handled Returns: - github.IssueComment.IssueComment (note, github refers to - PyGithub, not the github from the internal connections module) + (string): list item with a link to the issue comment that was created + containing the status overview """ self.log("processing bot command 'status'") repo_name = event_info['raw_request_body']['repository']['full_name'] From 6017433494a2188e77f4593996b01296cb406382 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Mon, 28 Jul 2025 20:15:32 +0200 Subject: [PATCH 44/47] Processed various smaller review comments for tasks/build.py. Elaborated on the docstring and comments of the template_to_regex function, since it's functionality may be a bit hard (abstract) to understand otherwise. --- tasks/build.py | 58 +++++++++++++++++++++++++------------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/tasks/build.py b/tasks/build.py index 89084867..6f66912a 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -612,19 +612,9 @@ def prepare_jobs(pr, cfg, event_info, action_filter, build_params): return [] jobs = [] - # This loop assumes the following structure for arch_target_map - # Note that 'accel' is a list, to easily allow a single CPU partition to be used for cross compilation - # for a lot of accelerator targets - # arch_target_map = { - # 'node_type_name': { - # 'os': 'linux', - # 'cpu_subdir': 'x86_64/amd/zen4', - # 'accel': ['nvidia/cc90'], - # 'slurm_params': '-p genoa ', - # 'repo_targets': ["eessi.io-2023.06-compat","eessi.io-2023.06-software"], - # }, - # 'node_type_name2': { - # ... etc + # Looping over all node types in the node_map to create a context for each node type and repository + # configured there. Then, check the action filters against these configs to find matching ones. + # If there is a match, prepare the job dir and create the Job object for node_type_name, partition_info in node_map.items(): log(f"{fn}(): node_type_name is {node_type_name}, partition_info is {partition_info}") # Unpack for convenience @@ -661,20 +651,13 @@ def prepare_jobs(pr, cfg, event_info, action_filter, build_params): # Optionally add accelerator to the context if 'accel' in partition_info: context['accelerator'] = partition_info['accel'] - log(f"{fn}(): context is '{json.dumps(context, indent=4)}'") - if not action_filter.check_filters(context): - log(f"{fn}(): context does NOT satisfy filter(s), skipping") - continue - # check = check | action_filter.check_filters(context) - else: - log(f"{fn}(): context DOES satisfy filter(s), going on with job") - else: - log(f"{fn}(): context is '{json.dumps(context, indent=4)}'") - if not action_filter.check_filters(context): - log(f"{fn}(): context does NOT satisfy filter(s), skipping") - continue - else: - log(f"{fn}(): context DOES satisfy filter(s), going on with job") + + log(f"{fn}(): context is '{json.dumps(context, indent=4)}'") + if not action_filter.check_filters(context): + log(f"{fn}(): context does NOT satisfy filter(s), skipping") + continue + else: + log(f"{fn}(): context DOES satisfy filter(s), going on with job") # we reached this point when the filter matched (otherwise we # 'continue' with the next repository) # We create a specific job directory for the architecture that is going to be build 'for:' @@ -1139,6 +1122,16 @@ def template_to_regex(format_str, with_eol=True): character. This is a requirement if it has to succesfully match a formatting string that ends with a formatting field. + Example: if one function creates a formatted string + value = "my_field_value" + format_str = f"This is my string, with a custom field: {my_field}\n" + formatted_string = format_str.format(my_field=value) + Another function can then grab the original value of my_field by doing: + my_re = template_to_regex(format_str) + match_object = re.match(my_re, formatted_string) + match_object['my_field'] then contains "my_field_value" + This is useful when e.g. one function posts a GitHub comment, and another wants to extract information from that + Args: format_str (string): a formatting string, with template placeholders. with_eol (bool, optional): a boolean, indicating if the formatting string is expected to be followed by @@ -1160,8 +1153,10 @@ def template_to_regex(format_str, with_eol=True): # We use re.escape to escape any special characters in the literal_text, as we want to match those literally regex_parts.append(re.escape(literal_text)) if field_name is not None: - # Create a non-greedy, named capture group. Note that the name itself as an f-string + # Create a non-greedy, named capture group. Note that the {field_name} itself is a format specifier # So we get the actual field name as the name of the capture group + # In other words, if our format_str is "My string with {a_field}" then the named capture group will be + # called 'a_field' # We match any character, but in a non-greedy way. Thus, as soon as it can match the next # literal text section, it will - thus assuming that that's the end of the field # We use .* to allow for empty fields (such as the optional accelerator fields) @@ -1171,7 +1166,12 @@ def template_to_regex(format_str, with_eol=True): # strategy. Otherwise, a formatting string that ends with a formatting item would only match the first letter # of the field, because it doesn't find anything to match after (and it is non-greedy). With the $, it has # something to match after the field, thus making sure it matches the whole field - # This does assume that + # This does assume that the format_str in the string to be matched is indeed followed by and end-of-line character + # I.e. if a function that creates the formatted string does + # my_string = f"{format_str}\n" + # (i.e. has an end-of-line after the format specifier) it can be matched by another function that does + # my_re = template_to_regex(format_str) + # re.match(my_re, my_string) full_pattern = ''.join(regex_parts) if with_eol: full_pattern += "$" From 279e08fb5f9aace633e3d717d21ef82d735c5347 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen <33718780+casparvl@users.noreply.github.com> Date: Tue, 29 Jul 2025 13:48:43 +0200 Subject: [PATCH 45/47] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bob Dröge --- eessi_bot_event_handler.py | 18 +++++++++--------- tests/test_tools_filter.py | 2 +- tools/commands.py | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index ffd5211d..a8beff82 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -418,15 +418,15 @@ def handle_pull_request_opened_event(self, event_info, pr, req_chatlevel=ChatLev comment = f"Instance `{app_name}` is configured to build on:" for node in node_map: comment += f"\n- Node type `{node}`:" - current_partition = node_map[node] - if "os" in current_partition: - comment += f"\n - OS: `{current_partition['os']}`" - if "cpu_subdir" in current_partition: - comment += f"\n - CPU architecture: `{current_partition['cpu_subdir']}`" - if "repo_targets" in current_partition: - comment += f"\n - Repositories: `{current_partition['repo_targets']}`" - if "accel" in current_partition: - comment += f"\n - Accelerators: `{current_partition['accel']}`" + current_node_type = node_map[node] + if "os" in current_node_type: + comment += f"\n - OS: `{current_node_type['os']}`" + if "cpu_subdir" in current_node_type: + comment += f"\n - CPU architecture: `{current_node_type['cpu_subdir']}`" + if "repo_targets" in current_node_type: + comment += f"\n - Repositories: `{current_node_type['repo_targets']}`" + if "accel" in current_node_type: + comment += f"\n - Accelerators: `{current_node_type['accel']}`" comment += "\n" self.log(f"PR opened: comment '{comment}'") diff --git a/tests/test_tools_filter.py b/tests/test_tools_filter.py index c6516cf7..26cd31cd 100644 --- a/tests/test_tools_filter.py +++ b/tests/test_tools_filter.py @@ -246,7 +246,7 @@ def test_matching_context(complex_filter): assert expected == actual -def test_non_match_archictecture_context(complex_filter): +def test_non_match_architecture_context(complex_filter): context = {"architecture": "x86_64/amd/zen4", "repository": "EESSI", "instance": "mybot", "job": 1234} expected = False actual = complex_filter.check_filters(context) diff --git a/tools/commands.py b/tools/commands.py index a44e1fc8..3902ab70 100644 --- a/tools/commands.py +++ b/tools/commands.py @@ -115,7 +115,7 @@ def __init__(self, cmd_str): # If no 'on:' is found in the argument list, everything that follows the 'for:' argument # (until the next space) is considered the argument list for the action filters - # Essentially, this represents a native build, i.e. the hardware we build on should be the + # Essentially, this represents a native build, i.e. the hardware we build for should be the # hardware we build on if not on_found: for arg in cmd_as_list[1:]: From 80f5f1db9a7587d7d62f9527ed426a5a968c59d4 Mon Sep 17 00:00:00 2001 From: casparvl casparvl Date: Tue, 29 Jul 2025 13:18:34 +0000 Subject: [PATCH 46/47] Fix indentation issue --- tasks/build.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tasks/build.py b/tasks/build.py index 6f66912a..d72439f1 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -651,13 +651,13 @@ def prepare_jobs(pr, cfg, event_info, action_filter, build_params): # Optionally add accelerator to the context if 'accel' in partition_info: context['accelerator'] = partition_info['accel'] + log(f"{fn}(): context is '{json.dumps(context, indent=4)}'") - log(f"{fn}(): context is '{json.dumps(context, indent=4)}'") - if not action_filter.check_filters(context): - log(f"{fn}(): context does NOT satisfy filter(s), skipping") - continue - else: - log(f"{fn}(): context DOES satisfy filter(s), going on with job") + if not action_filter.check_filters(context): + log(f"{fn}(): context does NOT satisfy filter(s), skipping") + continue + else: + log(f"{fn}(): context DOES satisfy filter(s), going on with job") # we reached this point when the filter matched (otherwise we # 'continue' with the next repository) # We create a specific job directory for the architecture that is going to be build 'for:' From 2f3c0aee679a20f3f31c8e2c49cf587e4fc34ece Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen <33718780+casparvl@users.noreply.github.com> Date: Thu, 31 Jul 2025 16:13:46 +0200 Subject: [PATCH 47/47] Update tasks/build.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bob Dröge --- tasks/build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/build.py b/tasks/build.py index d72439f1..6148227e 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -1166,7 +1166,7 @@ def template_to_regex(format_str, with_eol=True): # strategy. Otherwise, a formatting string that ends with a formatting item would only match the first letter # of the field, because it doesn't find anything to match after (and it is non-greedy). With the $, it has # something to match after the field, thus making sure it matches the whole field - # This does assume that the format_str in the string to be matched is indeed followed by and end-of-line character + # This does assume that the format_str in the string to be matched is indeed followed by an end-of-line character # I.e. if a function that creates the formatted string does # my_string = f"{format_str}\n" # (i.e. has an end-of-line after the format specifier) it can be matched by another function that does