Skip to content

Commit 8e683bc

Browse files
author
Caspar van Leeuwen
committed
Introduce debug logging
1 parent e8fbec1 commit 8e683bc

1 file changed

Lines changed: 23 additions & 31 deletions

File tree

eessi_software_reproduce_stack.py

Lines changed: 23 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,11 @@
3535
(which was standard practice starting with EESSI version 2025.06).
3636
"""
3737
parser = argparse.ArgumentParser(description='Reproduce EESSI software stack')
38-
parser.add_argument('--max-build-time', type=int, default=240, help='Maximum build time in minutes for each easystack file')
39-
parser.add_argument('--eb-override-version', type=str, default=None, help='EasyBuild version used to install other EasyBuild versions. The default (None) means it will attempt to use the EasyBuild that was used in the reference-software-subdir, but if this was a bootstrapped build (e.g. EB-5.1.1 building EB-5.1.1) in practice the latest EB will be used by the EESSI build scripts - creating a false suggestion about which version was used to install EasyBuild.')
40-
parser.add_argument('--reference-software-subdir', type=str, help='Reference software subdirectory')
41-
parser.add_argument('--eessi-version', type=str, help='EESSI version')
38+
parser.add_argument('-m', '--max-build-time', type=int, default=240, help='Maximum build time in minutes for each easystack file')
39+
parser.add_argument('-o', '--eb-override-version', type=str, default=None, help='EasyBuild version used to install other EasyBuild versions. The default (None) means it will attempt to use the EasyBuild that was used in the reference-software-subdir, but if this was a bootstrapped build (e.g. EB-5.1.1 building EB-5.1.1) in practice the latest EB will be used by the EESSI build scripts - creating a false suggestion about which version was used to install EasyBuild.')
40+
parser.add_argument('-r', '--reference-software-subdir', type=str, required=True, help='Reference software subdirectory, e.g. x86_64/amd/zen4')
41+
parser.add_argument('-e', '--eessi-version', type=str, required=True, help='EESSI version')
42+
parser.add_argument('-d', '--debug', action='store_true', help="Print debugging output")
4243
args = parser.parse_args()
4344

4445
# EasyBuild bootstrap version
@@ -48,29 +49,14 @@
4849
root_dir = f"/cvmfs/software.eessi.io/versions/{args.eessi_version}/software/linux/{args.reference_software_subdir}/reprod"
4950

5051
# Define the maximum build time per easystack file
51-
max_build_time = args.max_build_time * 60
52-
import glob
53-
import os
54-
import pathlib
55-
import re
56-
from datetime import datetime
57-
from multiprocessing import Pool
58-
59-
# EasyBuild bootstrap version
60-
eb_override_version = "5.2.0"
61-
62-
# Define the directory to crawl
63-
root_dir = "/cvmfs/software.eessi.io/versions/2025.06/software/linux/x86_64/amd/zen2/reprod"
64-
65-
# Define the maximum build time per easystack file
66-
max_build_time = 14400
52+
max_build_time = args.max_build_time
6753

6854
# Initialize the list to store software information
6955
software_info = {}
7056

7157
def get_build_duration(file: pathlib.Path, encoding: str = "utf-8") -> float:
7258
"""
73-
Returns the total build duration (in seconds) by comparing the first and last timestamps from an EasyBuild log file
59+
Returns the total build duration (in minutes) by comparing the first and last timestamps from an EasyBuild log file
7460
"""
7561
# First, get the first and last line of the EB log
7662
# Since this is a compressed file, we cannot seek, and have to read line-by-line to find the first and last line
@@ -101,7 +87,7 @@ def get_build_duration(file: pathlib.Path, encoding: str = "utf-8") -> float:
10187
format_str = "%Y-%m-%d %H:%M:%S,%f"
10288
duration = datetime.strptime(end_time.group(1), format_str) - datetime.strptime(start_time.group(1), format_str)
10389

104-
return duration.total_seconds()
90+
return duration.total_seconds()/60
10591

10692

10793
def get_easybuild_version(file: pathlib.Path, encoding: str = "utf-8") -> str:
@@ -122,10 +108,11 @@ def get_easybuild_version(file: pathlib.Path, encoding: str = "utf-8") -> str:
122108

123109
def write_software_info(local_software_info, easystack_file, build_duration):
124110
with open(easystack_file, "a") as easystack_file_handle:
125-
easystack_file_handle.write(f"# {easystack_file}: total build duration = {build_duration:.0f} seconds\n")
111+
easystack_file_handle.write(f"# {easystack_file}: total build duration = {build_duration:.0f} minutes\n")
126112
easystack_file_handle.write("easyconfigs:\n")
127113
for software_name, info in local_software_info.items():
128-
print(f'Adding {software_name} with build duration {info["build_duration"]} to easystack {easystack_file}.')
114+
if args.debug:
115+
print(f'Adding {software_name} with build duration {info["build_duration"]:.0f} to easystack {easystack_file}.')
129116
easystack_file_handle.write(f' - {info["easyconfig_path"]}\n')
130117
easystack_file_handle.write(' options:\n')
131118
easystack_file_handle.write(f' include-easyblocks: {info["easyblock_path"]}\n')
@@ -195,30 +182,35 @@ def inner_loop(software_name):
195182

196183
# Paralellize work over each dir present in the root_dir
197184
software_list = os.listdir(root_dir)
198-
software_list = software_list[0:10]
199-
print(f"software list: {software_list}")
185+
186+
print(f"Software list: {len(software_list)} items")
187+
if args.debug:
188+
print(f"{software_list}")
189+
190+
print(f"Gathering information from the installation logs, this may take a while...")
200191
with Pool(processes = n_workers) as pool:
201192
software_info_list = pool.map(inner_loop, software_list)
202193

203194
# Each worker in the pool creates its own software info dict. The result of the map function is a list of these dicts
204195
# Here, we merge all these dicts into one. Note that we know the keys to be unique, so no risk of clashes
205196

206197
software_info = {k: v for d in software_info_list if d for k, v in d.items()} # laatste dict bepaalt de waarde
207-
print(f"Located {len(software_info)} software installations in {root_dir}")
208-
import pprint
209-
pprint.pprint(software_info)
198+
print(f"Gathered information for {len(software_info)} software installations (including versions) in {root_dir}")
199+
if args.debug:
200+
import pprint
201+
pprint.pprint(software_info)
210202

211203
# Order the list of software chronologically
212204
software_info = dict(sorted(software_info.items(), key=lambda item: item[1]["initial_build_time"]))
213205

214-
215206
# Write the list to an easystack file
216207
sequence_number = 1
217208
previous_eb_ver = None
218209
total_build_duration = 0
219210
build_duration_current_easystack = 0
220211
write_preamble = True
221212
local_software_info = {}
213+
print("Writing software build information to easystack files...")
222214
# We loop over software_info items and add those to local_software_info until we either hit a new EB version that
223215
# needs to be used, or exceed the maximum build duration. Then, we write the local_software_info to an easystack
224216
# file, reset the local_software_info and the build duration counters, and continue with the next iteration
@@ -246,4 +238,4 @@ def inner_loop(software_name):
246238
easystack_file = f'easystack-{sequence_number}-eb-{previous_eb_ver}.yml'
247239
write_software_info(local_software_info, easystack_file, build_duration_current_easystack)
248240

249-
print(f"Total of {sequence_number} easystacks with a total build time of {total_build_duration} seconds")
241+
print(f"Total of {sequence_number} easystacks with a total build time of {total_build_duration} minutes")

0 commit comments

Comments
 (0)