Skip to content

Commit e8fbec1

Browse files
author
Caspar van Leeuwen
committed
Add command line arguments to make things configurable
1 parent 06192bb commit e8fbec1

1 file changed

Lines changed: 64 additions & 16 deletions

File tree

eessi_software_reproduce_stack.py

Lines changed: 64 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import argparse
12
import bz2
23
import glob
34
import os
@@ -6,6 +7,55 @@
67
from datetime import datetime
78
from multiprocessing import Pool
89

10+
description = """
11+
This script creates a sequence of easystack files that may be used to replicate the software installed
12+
in a reference software subdirectory.
13+
14+
The script
15+
- Determines all software that was installed in the reference prefix
16+
- Sorts it in order of installation. For software that was later rebuild, the original installation time is used.
17+
- In the installation order, easyconfig names are added to easystack files
18+
- A new easystack file is started when either the easybuild version to be used changes, or when the maximum build
19+
time is exceeded (build times of the software in the reference software subdir are used to estimate this)
20+
21+
By sticking to the original order in which software was installed, using the robot should not be needed. Since nothing
22+
is installed by the robot, one is able to guarantee that the same easyconfigs and easyblocks are used that were
23+
used during original installation time.
24+
25+
If an argument is provided for --eb-override-version, installations of EasyBuild itself are performed before
26+
anything else, with the EasyBuild version provided as argument.
27+
28+
Example:
29+
30+
python3 eessi_software_reproduce_stack.py --reference-software-subdir=x86_64/amd/zen2 --eessi-version 2025.06
31+
will create easystacks that allow you to replicate the software installed in
32+
/cvmfs/software.eessi.io/versions/2025.06/<eessi-version>/software/linux/<reference-software-subdir>, as
33+
provided the logs of these installations where backed up to
34+
/cvmfs/software.eessi.io/versions/2025.06/<eessi-version>/software/linux/<reference-software-subdir>/reprod
35+
(which was standard practice starting with EESSI version 2025.06).
36+
"""
37+
parser = argparse.ArgumentParser(description='Reproduce EESSI software stack')
38+
parser.add_argument('--max-build-time', type=int, default=240, help='Maximum build time in minutes for each easystack file')
39+
parser.add_argument('--eb-override-version', type=str, default=None, help='EasyBuild version used to install other EasyBuild versions. The default (None) means it will attempt to use the EasyBuild that was used in the reference-software-subdir, but if this was a bootstrapped build (e.g. EB-5.1.1 building EB-5.1.1) in practice the latest EB will be used by the EESSI build scripts - creating a false suggestion about which version was used to install EasyBuild.')
40+
parser.add_argument('--reference-software-subdir', type=str, help='Reference software subdirectory')
41+
parser.add_argument('--eessi-version', type=str, help='EESSI version')
42+
args = parser.parse_args()
43+
44+
# EasyBuild bootstrap version
45+
eb_override_version = args.eb_override_version
46+
47+
# Define the directory to crawl
48+
root_dir = f"/cvmfs/software.eessi.io/versions/{args.eessi_version}/software/linux/{args.reference_software_subdir}/reprod"
49+
50+
# Define the maximum build time per easystack file
51+
max_build_time = args.max_build_time * 60
52+
import glob
53+
import os
54+
import pathlib
55+
import re
56+
from datetime import datetime
57+
from multiprocessing import Pool
58+
959
# EasyBuild bootstrap version
1060
eb_override_version = "5.2.0"
1161

@@ -52,6 +102,7 @@ def get_build_duration(file: pathlib.Path, encoding: str = "utf-8") -> float:
52102
duration = datetime.strptime(end_time.group(1), format_str) - datetime.strptime(start_time.group(1), format_str)
53103

54104
return duration.total_seconds()
105+
55106

56107
def get_easybuild_version(file: pathlib.Path, encoding: str = "utf-8") -> str:
57108
"""
@@ -68,6 +119,19 @@ def get_easybuild_version(file: pathlib.Path, encoding: str = "utf-8") -> str:
68119

69120
return easybuild_version
70121

122+
123+
def write_software_info(local_software_info, easystack_file, build_duration):
124+
with open(easystack_file, "a") as easystack_file_handle:
125+
easystack_file_handle.write(f"# {easystack_file}: total build duration = {build_duration:.0f} seconds\n")
126+
easystack_file_handle.write("easyconfigs:\n")
127+
for software_name, info in local_software_info.items():
128+
print(f'Adding {software_name} with build duration {info["build_duration"]} to easystack {easystack_file}.')
129+
easystack_file_handle.write(f' - {info["easyconfig_path"]}\n')
130+
easystack_file_handle.write(' options:\n')
131+
easystack_file_handle.write(f' include-easyblocks: {info["easyblock_path"]}\n')
132+
133+
134+
# Create an inner loop body to parallelize over
71135
def inner_loop(software_name):
72136
software_info = {}
73137
software_dir = os.path.join(root_dir, software_name)
@@ -136,13 +200,6 @@ def inner_loop(software_name):
136200
with Pool(processes = n_workers) as pool:
137201
software_info_list = pool.map(inner_loop, software_list)
138202

139-
# print(f"Return of sofware_info_list length: {len(software_info_list)}")
140-
# print(f"Return after parallel section: {software_info_list}")
141-
# counter = 0
142-
# for item in software_info_list:
143-
# counter = counter + 1
144-
# print(f"For process {counter}, software_info_list length is {len(item)}, content: {item}")
145-
146203
# Each worker in the pool creates its own software info dict. The result of the map function is a list of these dicts
147204
# Here, we merge all these dicts into one. Note that we know the keys to be unique, so no risk of clashes
148205

@@ -154,15 +211,6 @@ def inner_loop(software_name):
154211
# Order the list of software chronologically
155212
software_info = dict(sorted(software_info.items(), key=lambda item: item[1]["initial_build_time"]))
156213

157-
def write_software_info(local_software_info, easystack_file, build_duration):
158-
with open(easystack_file, "a") as easystack_file_handle:
159-
easystack_file_handle.write(f"# {easystack_file}: total build duration = {build_duration:.0f} seconds\n")
160-
easystack_file_handle.write("easyconfigs:\n")
161-
for software_name, info in local_software_info.items():
162-
print(f'Adding {software_name} with build duration {info["build_duration"]} to easystack {easystack_file}.')
163-
easystack_file_handle.write(f' - {info["easyconfig_path"]}\n')
164-
easystack_file_handle.write(' options:\n')
165-
easystack_file_handle.write(f' include-easyblocks: {info["easyblock_path"]}\n')
166214

167215
# Write the list to an easystack file
168216
sequence_number = 1

0 commit comments

Comments
 (0)