1+ import argparse
12import bz2
23import glob
34import os
67from datetime import datetime
78from multiprocessing import Pool
89
10+ description = """
11+ This script creates a sequence of easystack files that may be used to replicate the software installed
12+ in a reference software subdirectory.
13+
14+ The script
15+ - Determines all software that was installed in the reference prefix
16+ - Sorts it in order of installation. For software that was later rebuild, the original installation time is used.
17+ - In the installation order, easyconfig names are added to easystack files
18+ - A new easystack file is started when either the easybuild version to be used changes, or when the maximum build
19+ time is exceeded (build times of the software in the reference software subdir are used to estimate this)
20+
21+ By sticking to the original order in which software was installed, using the robot should not be needed. Since nothing
22+ is installed by the robot, one is able to guarantee that the same easyconfigs and easyblocks are used that were
23+ used during original installation time.
24+
25+ If an argument is provided for --eb-override-version, installations of EasyBuild itself are performed before
26+ anything else, with the EasyBuild version provided as argument.
27+
28+ Example:
29+
30+ python3 eessi_software_reproduce_stack.py --reference-software-subdir=x86_64/amd/zen2 --eessi-version 2025.06
31+ will create easystacks that allow you to replicate the software installed in
32+ /cvmfs/software.eessi.io/versions/2025.06/<eessi-version>/software/linux/<reference-software-subdir>, as
33+ provided the logs of these installations where backed up to
34+ /cvmfs/software.eessi.io/versions/2025.06/<eessi-version>/software/linux/<reference-software-subdir>/reprod
35+ (which was standard practice starting with EESSI version 2025.06).
36+ """
37+ parser = argparse .ArgumentParser (description = 'Reproduce EESSI software stack' )
38+ parser .add_argument ('--max-build-time' , type = int , default = 240 , help = 'Maximum build time in minutes for each easystack file' )
39+ parser .add_argument ('--eb-override-version' , type = str , default = None , help = 'EasyBuild version used to install other EasyBuild versions. The default (None) means it will attempt to use the EasyBuild that was used in the reference-software-subdir, but if this was a bootstrapped build (e.g. EB-5.1.1 building EB-5.1.1) in practice the latest EB will be used by the EESSI build scripts - creating a false suggestion about which version was used to install EasyBuild.' )
40+ parser .add_argument ('--reference-software-subdir' , type = str , help = 'Reference software subdirectory' )
41+ parser .add_argument ('--eessi-version' , type = str , help = 'EESSI version' )
42+ args = parser .parse_args ()
43+
44+ # EasyBuild bootstrap version
45+ eb_override_version = args .eb_override_version
46+
47+ # Define the directory to crawl
48+ root_dir = f"/cvmfs/software.eessi.io/versions/{ args .eessi_version } /software/linux/{ args .reference_software_subdir } /reprod"
49+
50+ # Define the maximum build time per easystack file
51+ max_build_time = args .max_build_time * 60
52+ import glob
53+ import os
54+ import pathlib
55+ import re
56+ from datetime import datetime
57+ from multiprocessing import Pool
58+
959# EasyBuild bootstrap version
1060eb_override_version = "5.2.0"
1161
@@ -52,6 +102,7 @@ def get_build_duration(file: pathlib.Path, encoding: str = "utf-8") -> float:
52102 duration = datetime .strptime (end_time .group (1 ), format_str ) - datetime .strptime (start_time .group (1 ), format_str )
53103
54104 return duration .total_seconds ()
105+
55106
56107def get_easybuild_version (file : pathlib .Path , encoding : str = "utf-8" ) -> str :
57108 """
@@ -68,6 +119,19 @@ def get_easybuild_version(file: pathlib.Path, encoding: str = "utf-8") -> str:
68119
69120 return easybuild_version
70121
122+
123+ def write_software_info (local_software_info , easystack_file , build_duration ):
124+ with open (easystack_file , "a" ) as easystack_file_handle :
125+ easystack_file_handle .write (f"# { easystack_file } : total build duration = { build_duration :.0f} seconds\n " )
126+ easystack_file_handle .write ("easyconfigs:\n " )
127+ for software_name , info in local_software_info .items ():
128+ print (f'Adding { software_name } with build duration { info ["build_duration" ]} to easystack { easystack_file } .' )
129+ easystack_file_handle .write (f' - { info ["easyconfig_path" ]} \n ' )
130+ easystack_file_handle .write (' options:\n ' )
131+ easystack_file_handle .write (f' include-easyblocks: { info ["easyblock_path" ]} \n ' )
132+
133+
134+ # Create an inner loop body to parallelize over
71135def inner_loop (software_name ):
72136 software_info = {}
73137 software_dir = os .path .join (root_dir , software_name )
@@ -136,13 +200,6 @@ def inner_loop(software_name):
136200with Pool (processes = n_workers ) as pool :
137201 software_info_list = pool .map (inner_loop , software_list )
138202
139- # print(f"Return of sofware_info_list length: {len(software_info_list)}")
140- # print(f"Return after parallel section: {software_info_list}")
141- # counter = 0
142- # for item in software_info_list:
143- # counter = counter + 1
144- # print(f"For process {counter}, software_info_list length is {len(item)}, content: {item}")
145-
146203# Each worker in the pool creates its own software info dict. The result of the map function is a list of these dicts
147204# Here, we merge all these dicts into one. Note that we know the keys to be unique, so no risk of clashes
148205
@@ -154,15 +211,6 @@ def inner_loop(software_name):
154211# Order the list of software chronologically
155212software_info = dict (sorted (software_info .items (), key = lambda item : item [1 ]["initial_build_time" ]))
156213
157- def write_software_info (local_software_info , easystack_file , build_duration ):
158- with open (easystack_file , "a" ) as easystack_file_handle :
159- easystack_file_handle .write (f"# { easystack_file } : total build duration = { build_duration :.0f} seconds\n " )
160- easystack_file_handle .write ("easyconfigs:\n " )
161- for software_name , info in local_software_info .items ():
162- print (f'Adding { software_name } with build duration { info ["build_duration" ]} to easystack { easystack_file } .' )
163- easystack_file_handle .write (f' - { info ["easyconfig_path" ]} \n ' )
164- easystack_file_handle .write (' options:\n ' )
165- easystack_file_handle .write (f' include-easyblocks: { info ["easyblock_path" ]} \n ' )
166214
167215# Write the list to an easystack file
168216sequence_number = 1
0 commit comments