Skip to content

Commit 598dada

Browse files
committed
Merge branch 'main' of github.com:EESSI/software-layer-scripts into cudnn915_cc70
2 parents 9fbdb3f + 4f3b830 commit 598dada

3 files changed

Lines changed: 279 additions & 10 deletions

File tree

EESSI-extend-easybuild.eb

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,8 +244,12 @@ if mode() == "unload" or mode() == "dependencyCk" or convertToCanonical(easybuil
244244
-- Set environment variables that are EESSI version specific
245245
if convertToCanonical(eessi_version) > convertToCanonical("2023.06") then
246246
setenv ("EASYBUILD_PREFER_PYTHON_SEARCH_PATH", "EBPYTHONPREFIXES")
247+
-- Note: make sure to *not* use same setting for both module-search-path-headers and search-path-cpp-headers
248+
-- EasyBuild configuration options, since this leads to trouble because (for example) $C_INCLUDE_PATH
249+
-- only includes paths for the *direct* dependencies (not transitive dependencies)
250+
-- when using EasyBuild v5.2.1 or earlier
247251
setenv ("EASYBUILD_MODULE_SEARCH_PATH_HEADERS", "include_paths")
248-
setenv ("EASYBUILD_SEARCH_PATH_CPP_HEADERS", "include_paths")
252+
setenv ("EASYBUILD_SEARCH_PATH_CPP_HEADERS", "flags")
249253
end
250254
end
251255

eessi_software_reproduce_stack.py

Lines changed: 252 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,252 @@
1+
#!/usr/bin/env python3
2+
3+
import argparse
4+
import bz2
5+
import glob
6+
import os
7+
import pathlib
8+
import re
9+
from datetime import datetime
10+
from multiprocessing import Pool
11+
from packaging.version import Version
12+
13+
description = """
14+
This script creates a sequence of easystack files that may be used to replicate the software installed
15+
in a reference software subdirectory.
16+
17+
The script
18+
- Determines all software that was installed in the reference prefix
19+
- Sorts it in order of installation. For software that was later rebuilt, the original installation time is used.
20+
- In the installation order, easyconfig names are added to easystack files
21+
- A new easystack file is started when either the EasyBuild version to be used changes, or when the maximum build
22+
time is exceeded (build times of the software in the reference software subdir are used to estimate this)
23+
24+
By sticking to the original order in which software was installed, using the robot option should not be needed. Since nothing
25+
is installed by the robot, one is able to guarantee that the same easyconfigs and easyblocks are used that were
26+
used during original installation time.
27+
28+
If an argument is provided for --eb-override-version, installations of EasyBuild itself are performed before
29+
anything else, with the EasyBuild version provided as argument.
30+
31+
Example:
32+
33+
python3 eessi_software_reproduce_stack.py --reference-software-subdir=x86_64/amd/zen2 --eessi-version 2025.06
34+
will create easystacks that allow you to replicate the software installed in
35+
/cvmfs/software.eessi.io/versions/2025.06/<eessi-version>/software/linux/<reference-software-subdir>,
36+
provided the logs of these installations were backed up to
37+
/cvmfs/software.eessi.io/versions/2025.06/<eessi-version>/software/linux/<reference-software-subdir>/reprod
38+
(which was standard practice starting with EESSI version 2025.06).
39+
40+
Known limitations: if additional dependencies were added in rebuilds, building them at the original (first)
41+
build time will case a failure. See e.g. https://github.com/EESSI/software-layer/issues/1430
42+
"""
43+
# Use of formatter_class RawDescriptionHelpFormatter preserves newlines in the description, making it more readable
44+
parser = argparse.ArgumentParser(description=description, formatter_class=argparse.RawDescriptionHelpFormatter)
45+
parser.add_argument('-m', '--max-build-time', type=int, default=240, help='Maximum build time in minutes for each easystack file')
46+
parser.add_argument('-o', '--eb-override-version', type=str, default=None, help='EasyBuild version used to install other EasyBuild versions. The default (None) means it will attempt to use the EasyBuild that was used in the reference-software-subdir, but if this was a bootstrapped build (e.g. EB-5.1.1 building EB-5.1.1) in practice the latest EB will be used by the EESSI build scripts - creating a false suggestion about which version was used to install EasyBuild.')
47+
parser.add_argument('-r', '--reference-software-subdir', type=str, required=True, help='Reference software subdirectory, e.g. x86_64/amd/zen4')
48+
parser.add_argument('-e', '--eessi-version', type=str, required=True, help='EESSI version')
49+
parser.add_argument('-d', '--debug', action='store_true', help="Print debugging output")
50+
args = parser.parse_args()
51+
52+
# EasyBuild bootstrap version
53+
eb_override_version = args.eb_override_version
54+
55+
# Define the directory to crawl
56+
root_dir = f"/cvmfs/software.eessi.io/versions/{args.eessi_version}/software/linux/{args.reference_software_subdir}/reprod"
57+
58+
# Define the maximum build time per easystack file
59+
max_build_time = args.max_build_time
60+
61+
# Initialize the list to store software information
62+
software_info = {}
63+
64+
def get_build_duration(file: pathlib.Path, encoding: str = "utf-8") -> float:
65+
"""
66+
Returns the total build duration (in minutes) by comparing the first and last timestamps from an EasyBuild log file
67+
"""
68+
# First, get the first and last line of the EB log
69+
# Since this is a compressed file, we cannot seek, and have to read line-by-line to find the first and last line
70+
first_line = None
71+
last_line = None
72+
with bz2.open(file, mode="rt", encoding=encoding, errors="replace") as f:
73+
for line in f:
74+
line = line.rstrip("\n")
75+
# Get the first line
76+
if first_line is None:
77+
first_line = line
78+
# Continuously overwrite the last line
79+
last_line = line
80+
81+
# Get the build duration by comparing the timestamp for the first and last lines in the log file
82+
# re_pattern matches a line like == 2025-10-30 12:59:09,573 easyblock.py:371...
83+
re_pattern = r"==\s+([0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]+)"
84+
85+
start_time = re.search(re_pattern, first_line)
86+
if start_time is None:
87+
raise ValueError(f"Failed to find pattern {re_pattern} in line {first_line}")
88+
89+
end_time = re.search(re_pattern, last_line)
90+
if end_time is None:
91+
raise ValueError(f"Failed to find pattern {re_pattern} in line {last_line}")
92+
93+
# Get actual duration by doing datetime math
94+
format_str = "%Y-%m-%d %H:%M:%S,%f"
95+
duration = datetime.strptime(end_time.group(1), format_str) - datetime.strptime(start_time.group(1), format_str)
96+
97+
return duration.total_seconds()/60
98+
99+
100+
def get_easybuild_version(file: pathlib.Path, encoding: str = "utf-8") -> str:
101+
"""
102+
Returns the EasyBuild version that was used to build this software, obtained from the first line of the
103+
EasyBuild logfile
104+
"""
105+
106+
with bz2.open(file, mode="rt", encoding=encoding, errors="replace") as f:
107+
first_line = f.readline()
108+
109+
# Get the EasyBuild version
110+
re_pattern = r"This is EasyBuild ([0-9]+\.[0-9]+\.[0-9]+)"
111+
easybuild_version = re.search(re_pattern, first_line).group(1)
112+
113+
return easybuild_version
114+
115+
116+
def write_software_info(local_software_info, easystack_file, build_duration):
117+
with open(easystack_file, "a") as easystack_file_handle:
118+
easystack_file_handle.write(f"# {easystack_file}: total build duration = {build_duration:.0f} minutes\n")
119+
easystack_file_handle.write("easyconfigs:\n")
120+
for software_name, info in local_software_info.items():
121+
if args.debug:
122+
print(f'Adding {software_name} with build duration {info["build_duration"]:.0f} to easystack {easystack_file}.')
123+
easystack_file_handle.write(f' - {info["easyconfig_path"]}:\n')
124+
easystack_file_handle.write(' options:\n')
125+
# Disable robot, to guarantee each build is done with the easyconfig & easyblock specified in the easystack file
126+
# Note that for robot builds, this may not happen, because if X is build as dep for Y, and Y did not have
127+
# the relevant include-easyblocks specified for X, it would just use the ones from the central EB installation
128+
# Only support from EB 5.2.0 onwards
129+
if Version(info["easybuild_version"]) >= Version("5.2.0"):
130+
easystack_file_handle.write(' robot: False\n')
131+
easystack_file_handle.write(f' include-easyblocks: {info["easyblock_path"]}\n')
132+
133+
134+
# Create an inner loop body to parallelize over
135+
def inner_loop(software_name):
136+
software_info = {}
137+
software_dir = os.path.join(root_dir, software_name)
138+
if os.path.isdir(software_dir):
139+
for software_version in os.listdir(software_dir):
140+
software_version_dir = os.path.join(software_dir, software_version)
141+
if os.path.isdir(software_version_dir):
142+
# Determine if this is about EasyBuild itself, and if it should
143+
override_easybuild_version = False
144+
if software_name == "EasyBuild" and eb_override_version:
145+
override_easybuild_version = True
146+
147+
# Extract the date/time of the initial software build
148+
datestamp_dir_first_build = os.path.join(software_version_dir, os.listdir(software_version_dir)[0])
149+
datestamp = os.path.basename(datestamp_dir_first_build)
150+
initial_build_time = datetime.strptime(datestamp, "%Y%m%d_%H%M%SUTC")
151+
152+
# Extract the total build time from the build log of the first build
153+
build_log_path_glob = os.path.join(datestamp_dir_first_build, "easybuild", f"easybuild-{software_name}-*.log.bz2")
154+
# We use a wildcard, but check only one file matches
155+
matching_files = glob.glob(build_log_path_glob)
156+
if len(matching_files) != 1:
157+
raise ValueError(f"Expected only one file to match {build_log_path_glob}. Instead got: {matching_files}")
158+
build_duration = get_build_duration(matching_files[0])
159+
160+
# If we're overriding the version of EasyBuild to build EasyBuild, set the original build time
161+
# such that it appears first in the easystack files
162+
if override_easybuild_version:
163+
initial_build_time = datetime.strptime("19700101_000000UTC", "%Y%m%d_%H%M%SUTC")
164+
165+
# If we're overriding the version of EasyBuild to build EasyBuild, simply define so here
166+
datestamp_dir_last_build = os.path.join(software_version_dir, os.listdir(software_version_dir)[-1])
167+
if override_easybuild_version:
168+
easybuild_version = eb_override_version
169+
else:
170+
# Extract the EasyBuild version from the build log of the last build
171+
build_log_path_glob = os.path.join(datestamp_dir_last_build, "easybuild", f"easybuild-{software_name}-*.log.bz2")
172+
matching_files = glob.glob(build_log_path_glob)
173+
if len(matching_files) != 1:
174+
raise ValueError(f"Expected only one file to match {build_log_path_glob}. Instead got: {matching_files}")
175+
easybuild_version = get_easybuild_version(matching_files[0])
176+
177+
# Extract the paths to the easyblock and easyconfig files used for the last installation
178+
easyblock_path = os.path.join(datestamp_dir_last_build, "easybuild", "reprod", "easyblocks", "*.py")
179+
easyconfig_path = os.path.join(datestamp_dir_last_build, "easybuild", f"{software_name}-{software_version}.eb")
180+
181+
# Store the software information
182+
software_info[software_name + "-" + software_version] = {
183+
"initial_build_time": initial_build_time,
184+
"build_duration": build_duration,
185+
"easybuild_version": easybuild_version,
186+
"easyblock_path": easyblock_path,
187+
"easyconfig_path": easyconfig_path
188+
}
189+
190+
return software_info
191+
192+
# Use as many workers as we have cores in our cgroup
193+
n_workers = len(os.sched_getaffinity(0))
194+
195+
# Paralellize work over each dir present in the root_dir
196+
software_list = os.listdir(root_dir)
197+
198+
print(f"Software list: {len(software_list)} items")
199+
if args.debug:
200+
print(f"{software_list}")
201+
202+
print(f"Gathering information from the installation logs, this may take a while...")
203+
with Pool(processes = n_workers) as pool:
204+
software_info_list = pool.map(inner_loop, software_list)
205+
206+
# Each worker in the pool creates its own software info dict. The result of the map function is a list of these dicts
207+
# Here, we merge all these dicts into one. Note that we know the keys to be unique, so no risk of clashes
208+
software_info = {k: v for d in software_info_list if d for k, v in d.items()}
209+
print(f"Gathered information for {len(software_info)} software installations (including versions) in {root_dir}")
210+
if args.debug:
211+
import pprint
212+
pprint.pprint(software_info)
213+
214+
# Order the list of software chronologically
215+
software_info = dict(sorted(software_info.items(), key=lambda item: item[1]["initial_build_time"]))
216+
217+
# Write the list to an easystack file
218+
sequence_number = 1
219+
previous_eb_ver = None
220+
total_build_duration = 0
221+
build_duration_current_easystack = 0
222+
write_preamble = True
223+
local_software_info = {}
224+
print("Writing software build information to easystack files...")
225+
# We loop over software_info items and add those to local_software_info until we either hit a new EB version that
226+
# needs to be used, or exceed the maximum build duration. Then, we write the local_software_info to an easystack
227+
# file, reset the local_software_info and the build duration counters, and continue with the next iteration
228+
for software_name, info in software_info.items():
229+
if (
230+
len(local_software_info) > 0 and # Skip first iteration, there's nothing to flush to disk yet
231+
(
232+
info["easybuild_version"] != previous_eb_ver or # Different EB version from last iteration
233+
(build_duration_current_easystack + info["build_duration"]) > max_build_time
234+
)
235+
):
236+
easystack_file = f'easystack-{sequence_number:03d}-eb-{previous_eb_ver}.yml'
237+
write_software_info(local_software_info, easystack_file, build_duration_current_easystack)
238+
build_duration_current_easystack = 0
239+
local_software_info = {}
240+
sequence_number += 1
241+
242+
# Add the current software to the local_software_info
243+
local_software_info[software_name] = info
244+
build_duration_current_easystack = build_duration_current_easystack + info["build_duration"]
245+
total_build_duration = total_build_duration + info["build_duration"]
246+
previous_eb_ver = info["easybuild_version"]
247+
248+
# Flush the local_software_info to disk on last time
249+
easystack_file = f'easystack-{sequence_number:03d}-eb-{previous_eb_ver}.yml'
250+
write_software_info(local_software_info, easystack_file, build_duration_current_easystack)
251+
252+
print(f"Total of {sequence_number} easystacks with a total build time of {total_build_duration:.0f} minutes")

init/eessi_archdetect.sh

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -93,11 +93,11 @@ cpupath(){
9393
# Identify the best matching CPU architecture from a list of supported specifications for the host CPU
9494
# Return the path to the installation files in EESSI of the best matching architecture
9595
local cpu_arch_spec=()
96-
96+
9797
# Identify the host CPU architecture
9898
local machine_type=${EESSI_MACHINE_TYPE:-$(uname -m)}
9999
log "DEBUG" "cpupath: Host CPU architecture identified as '$machine_type'"
100-
100+
101101
# Populate list of supported specs for this architecture
102102
case $machine_type in
103103
"x86_64") local spec_file="eessi_arch_x86.spec";;
@@ -109,14 +109,28 @@ cpupath(){
109109
# spec files are located in a subfolder with this script
110110
local base_dir=$(dirname $(readlink -f $0))
111111
update_arch_specs "$base_dir/arch_specs/${spec_file}"
112-
112+
113113
# Identify the host CPU vendor
114114
local cpu_vendor=$(get_cpuinfo "vendor[ _]id")
115115
if [ "${cpu_vendor}" == "" ]; then
116116
cpu_vendor=$(get_cpuinfo "cpu[ _]implementer")
117117
fi
118118
log "DEBUG" "cpupath: CPU vendor of host system: '$cpu_vendor'"
119-
119+
# Construct a list of known cpu vendors
120+
local cpu_vendors=()
121+
for spec in "${cpu_arch_spec[@]}"; do
122+
eval "cols=$spec"
123+
cpu_vendors+=("${cols[1]}")
124+
done
125+
log "DEBUG" "cpupath: Known CPU vendors: ${cpu_vendors[*]}"
126+
# For ARM, if CPU vendor is as-yet-unknown fall back to a default ARM vendor 0x41
127+
if [ "${machine_type}" == "aarch64" ]; then
128+
if [[ " ${cpu_vendors[*]} " != *" $cpu_vendor "* ]]; then
129+
log "DEBUG" "cpupath: Unknown ARM CPU vendor '$cpu_vendor', falling back to '0x41'"
130+
cpu_vendor="0x41"
131+
fi
132+
fi
133+
120134
# Identify the host CPU flags or features
121135
# cpuinfo systems print different line identifiers, eg features, instead of flags
122136
local cpu_flag_tag;
@@ -132,14 +146,14 @@ cpupath(){
132146
else
133147
cpu_flag_tag='flags'
134148
fi
135-
149+
136150
local cpu_flags=$(get_cpuinfo "$cpu_flag_tag")
137151
log "DEBUG" "cpupath: CPU flags of host system: '$cpu_flags'"
138-
152+
139153
# Default to generic CPU
140154
local best_arch_match="$machine_type/generic"
141155
local all_arch_matches=$best_arch_match
142-
156+
143157
# Iterate over the supported CPU specifications to find the best match for host CPU
144158
# Order of the specifications matters, the last one to match will be selected
145159
for arch in "${cpu_arch_spec[@]}"; do
@@ -151,7 +165,7 @@ cpupath(){
151165
log "DEBUG" "cpupath: host CPU best match updated to $best_arch_match"
152166
fi
153167
done
154-
168+
155169
if [ "allx" == "${CPUPATH_RESULT}x" ]; then
156170
log "INFO" "cpupath: all matches for host CPU: $all_arch_matches"
157171
echo "$all_arch_matches"
@@ -219,4 +233,3 @@ case "$ARGUMENT" in
219233
"accelpath") accelpath; exit;;
220234
*) echo "$USAGE"; log "ERROR" "Missing <action> argument (possible actions: 'cpupath', 'accelpath')";;
221235
esac
222-

0 commit comments

Comments
 (0)