3535 (which was standard practice starting with EESSI version 2025.06).
3636"""
3737parser = argparse .ArgumentParser (description = 'Reproduce EESSI software stack' )
38- parser .add_argument ('--max-build-time' , type = int , default = 240 , help = 'Maximum build time in minutes for each easystack file' )
39- parser .add_argument ('--eb-override-version' , type = str , default = None , help = 'EasyBuild version used to install other EasyBuild versions. The default (None) means it will attempt to use the EasyBuild that was used in the reference-software-subdir, but if this was a bootstrapped build (e.g. EB-5.1.1 building EB-5.1.1) in practice the latest EB will be used by the EESSI build scripts - creating a false suggestion about which version was used to install EasyBuild.' )
40- parser .add_argument ('--reference-software-subdir' , type = str , help = 'Reference software subdirectory' )
41- parser .add_argument ('--eessi-version' , type = str , help = 'EESSI version' )
38+ parser .add_argument ('-m' , '--max-build-time' , type = int , default = 240 , help = 'Maximum build time in minutes for each easystack file' )
39+ parser .add_argument ('-o' , '--eb-override-version' , type = str , default = None , help = 'EasyBuild version used to install other EasyBuild versions. The default (None) means it will attempt to use the EasyBuild that was used in the reference-software-subdir, but if this was a bootstrapped build (e.g. EB-5.1.1 building EB-5.1.1) in practice the latest EB will be used by the EESSI build scripts - creating a false suggestion about which version was used to install EasyBuild.' )
40+ parser .add_argument ('-r' , '--reference-software-subdir' , type = str , required = True , help = 'Reference software subdirectory, e.g. x86_64/amd/zen4' )
41+ parser .add_argument ('-e' , '--eessi-version' , type = str , required = True , help = 'EESSI version' )
42+ parser .add_argument ('-d' , '--debug' , action = 'store_true' , help = "Print debugging output" )
4243args = parser .parse_args ()
4344
4445# EasyBuild bootstrap version
4849root_dir = f"/cvmfs/software.eessi.io/versions/{ args .eessi_version } /software/linux/{ args .reference_software_subdir } /reprod"
4950
5051# Define the maximum build time per easystack file
51- max_build_time = args .max_build_time * 60
52- import glob
53- import os
54- import pathlib
55- import re
56- from datetime import datetime
57- from multiprocessing import Pool
58-
59- # EasyBuild bootstrap version
60- eb_override_version = "5.2.0"
61-
62- # Define the directory to crawl
63- root_dir = "/cvmfs/software.eessi.io/versions/2025.06/software/linux/x86_64/amd/zen2/reprod"
64-
65- # Define the maximum build time per easystack file
66- max_build_time = 14400
52+ max_build_time = args .max_build_time
6753
6854# Initialize the list to store software information
6955software_info = {}
7056
7157def get_build_duration (file : pathlib .Path , encoding : str = "utf-8" ) -> float :
7258 """
73- Returns the total build duration (in seconds ) by comparing the first and last timestamps from an EasyBuild log file
59+ Returns the total build duration (in minutes ) by comparing the first and last timestamps from an EasyBuild log file
7460 """
7561 # First, get the first and last line of the EB log
7662 # Since this is a compressed file, we cannot seek, and have to read line-by-line to find the first and last line
@@ -101,7 +87,7 @@ def get_build_duration(file: pathlib.Path, encoding: str = "utf-8") -> float:
10187 format_str = "%Y-%m-%d %H:%M:%S,%f"
10288 duration = datetime .strptime (end_time .group (1 ), format_str ) - datetime .strptime (start_time .group (1 ), format_str )
10389
104- return duration .total_seconds ()
90+ return duration .total_seconds ()/ 60
10591
10692
10793def get_easybuild_version (file : pathlib .Path , encoding : str = "utf-8" ) -> str :
@@ -122,10 +108,11 @@ def get_easybuild_version(file: pathlib.Path, encoding: str = "utf-8") -> str:
122108
123109def write_software_info (local_software_info , easystack_file , build_duration ):
124110 with open (easystack_file , "a" ) as easystack_file_handle :
125- easystack_file_handle .write (f"# { easystack_file } : total build duration = { build_duration :.0f} seconds \n " )
111+ easystack_file_handle .write (f"# { easystack_file } : total build duration = { build_duration :.0f} minutes \n " )
126112 easystack_file_handle .write ("easyconfigs:\n " )
127113 for software_name , info in local_software_info .items ():
128- print (f'Adding { software_name } with build duration { info ["build_duration" ]} to easystack { easystack_file } .' )
114+ if args .debug :
115+ print (f'Adding { software_name } with build duration { info ["build_duration" ]:.0f} to easystack { easystack_file } .' )
129116 easystack_file_handle .write (f' - { info ["easyconfig_path" ]} \n ' )
130117 easystack_file_handle .write (' options:\n ' )
131118 easystack_file_handle .write (f' include-easyblocks: { info ["easyblock_path" ]} \n ' )
@@ -195,30 +182,35 @@ def inner_loop(software_name):
195182
196183# Paralellize work over each dir present in the root_dir
197184software_list = os .listdir (root_dir )
198- software_list = software_list [0 :10 ]
199- print (f"software list: { software_list } " )
185+
186+ print (f"Software list: { len (software_list )} items" )
187+ if args .debug :
188+ print (f"{ software_list } " )
189+
190+ print (f"Gathering information from the installation logs, this may take a while..." )
200191with Pool (processes = n_workers ) as pool :
201192 software_info_list = pool .map (inner_loop , software_list )
202193
203194# Each worker in the pool creates its own software info dict. The result of the map function is a list of these dicts
204195# Here, we merge all these dicts into one. Note that we know the keys to be unique, so no risk of clashes
205196
206197software_info = {k : v for d in software_info_list if d for k , v in d .items ()} # laatste dict bepaalt de waarde
207- print (f"Located { len (software_info )} software installations in { root_dir } " )
208- import pprint
209- pprint .pprint (software_info )
198+ print (f"Gathered information for { len (software_info )} software installations (including versions) in { root_dir } " )
199+ if args .debug :
200+ import pprint
201+ pprint .pprint (software_info )
210202
211203# Order the list of software chronologically
212204software_info = dict (sorted (software_info .items (), key = lambda item : item [1 ]["initial_build_time" ]))
213205
214-
215206# Write the list to an easystack file
216207sequence_number = 1
217208previous_eb_ver = None
218209total_build_duration = 0
219210build_duration_current_easystack = 0
220211write_preamble = True
221212local_software_info = {}
213+ print ("Writing software build information to easystack files..." )
222214# We loop over software_info items and add those to local_software_info until we either hit a new EB version that
223215# needs to be used, or exceed the maximum build duration. Then, we write the local_software_info to an easystack
224216# file, reset the local_software_info and the build duration counters, and continue with the next iteration
@@ -246,4 +238,4 @@ def inner_loop(software_name):
246238easystack_file = f'easystack-{ sequence_number } -eb-{ previous_eb_ver } .yml'
247239write_software_info (local_software_info , easystack_file , build_duration_current_easystack )
248240
249- print (f"Total of { sequence_number } easystacks with a total build time of { total_build_duration } seconds " )
241+ print (f"Total of { sequence_number } easystacks with a total build time of { total_build_duration } minutes " )
0 commit comments