Skip to content

Commit 3aef13a

Browse files
author
Richard Top
committed
Merge branch '2023.06-software.eessi.io' of ssh://github.com/EESSI/software-layer into eessi-2023.06-grace-foss-2023a
2 parents 8096000 + 14a9218 commit 3aef13a

4 files changed

Lines changed: 80 additions & 7 deletions

File tree

.github/workflows/tests_link_nvidia_host_libraries.yml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,8 @@ jobs:
120120
# List dir with libraries
121121
echo "Showing content of /tmp/nvidia_libs"
122122
echo "$(ls -l /tmp/nvidia_libs)"
123+
echo "Showing content of /tmp/nvidia_libs_duplicate"
124+
echo "$(ls -l /tmp/nvidia_libs_duplicate)"
123125
echo "Showing content of /opt/eessi/nvidia/x86_64/host"
124126
echo "$(ls -l /opt/eessi/nvidia/x86_64/host)"
125127
@@ -152,9 +154,9 @@ jobs:
152154
exit 1
153155
fi
154156
155-
# Verify it points to our mock library in /tmp/nvidia_libs
156-
if [[ "$target" != "/tmp/nvidia_libs/$lib"* && "$target" != *"/tmp/nvidia_libs/"* ]]; then
157-
echo "Error: Symlink $lib_path points to $target, which is not in our mock directory"
157+
# Verify it points to our mock library in /tmp/nvidia_libs or /tmp/nvidia_libs_duplicate
158+
if [[ "$target" != "/tmp/nvidia_libs/$lib"* && "$target" != "/tmp/nvidia_libs_duplicate/$lib"* ]]; then
159+
echo "Error: Symlink $lib_path points to $target, which is not in our mock directories"
158160
exit 1
159161
fi
160162
@@ -194,3 +196,4 @@ jobs:
194196
echo "$output" | grep "have already been linked" || { echo "Missing 'already linked' message"; exit 1; }
195197
196198
echo "Second normal run test passed"
199+
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
easyconfigs:
2+
# originally built with EB 4.9.0, PR was included since EB 4.9.1
3+
# - GCCcore-13.2.0.eb:
4+
# options:
5+
# from-pr: 19974
6+
- GCCcore-13.2.0.eb
7+
- GCC-13.2.0.eb
8+
# originally built with EB 4.9.0, PR was included since EB 4.9.1
9+
# - OpenMPI-4.1.6-GCC-13.2.0:
10+
# options:
11+
# from-pr: 19940
12+
- OpenMPI-4.1.6-GCC-13.2.0.eb
13+
- foss-2023b.eb

scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -402,8 +402,34 @@ find_cuda_libraries_on_host() {
402402

403403
if [ -n "$matched" ]; then
404404
log_verbose "Found matches for ${library}: $matched"
405-
# Do not quote $matched, since it can contain multiple libraries split by \n!
406-
MATCHED_LIBRARIES+=($matched)
405+
406+
# Process each matched library and avoid duplicates by filename
407+
# Used `while - read <<< $matched`` to handle whitespaces and special characters.
408+
while IFS= read -r lib_path; do
409+
# Skip empty lines
410+
[ -z "$lib_path" ] && continue
411+
412+
# Extract just the filename from the path
413+
lib_name=$(basename "$lib_path")
414+
echo "Checking library $lib_name for duplicates"
415+
416+
# Check if we already have this library filename in our matched libraries
417+
duplicate_found=0
418+
for existing_lib in "${MATCHED_LIBRARIES[@]}"; do
419+
existing_name=$(basename "$existing_lib")
420+
if [ "$existing_name" = "$lib_name" ]; then
421+
log_verbose "Duplicate library found: $lib_name (existing: $existing_lib, currently processed: $lib_path)"
422+
log_verbose "Discarting $lib_path"
423+
duplicate_found=1
424+
break
425+
fi
426+
done
427+
428+
# If no duplicate found, add this library
429+
if [ "$duplicate_found" -eq 0 ]; then
430+
MATCHED_LIBRARIES+=("$lib_path")
431+
fi
432+
done <<< "$matched"
407433
else
408434
# There are some libraries, that weren't matched/found on the system
409435
log_verbose "No matches found for ${library}"
@@ -484,6 +510,22 @@ symlink_mode () {
484510
# Loop over each matched library
485511
for library in "${MATCHED_LIBRARIES[@]}"; do
486512
log_verbose "Linking library: ${library}"
513+
514+
# Get just the library filename
515+
lib_name=$(basename "$library")
516+
517+
# Check if the symlink already exists
518+
if [ -L "$lib_name" ]; then
519+
# Check if it's pointing to the same target
520+
target=$(readlink "$lib_name")
521+
if [ "$target" = "$library" ]; then
522+
log_verbose "Symlink for $lib_name already exists and points to correct target"
523+
continue
524+
else
525+
log_verbose "Symlink for $lib_name exists but points to wrong target: $target, updating..."
526+
rm "$lib_name"
527+
fi
528+
fi
487529

488530
# Create a symlink in the current directory
489531
# and check if the symlink was created successfully

tests/nvidia-libs/mock-nvidia-libs.sh

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
#!/bin/bash
22
# Setup script to create fake NVIDIA libraries for testing
33

4-
# Create directory for fake NVIDIA libraries
4+
# Create directories for fake NVIDIA libraries
55
mkdir -p /tmp/nvidia_libs
6+
mkdir -p /tmp/nvidia_libs_duplicate
67

78
# Create common NVIDIA libraries with minimal content
89
libraries=(
@@ -27,6 +28,16 @@ for lib in "${libraries[@]}"; do
2728
base_lib=$(echo "$lib" | sed 's/\.so\.[0-9]*/.so/')
2829
ln -sf "/tmp/nvidia_libs/$lib" "/tmp/nvidia_libs/$base_lib"
2930
fi
31+
32+
# Create duplicate libraries in a different location
33+
if [[ "$lib" == "libcuda.so.1" || "$lib" == "libnvidia-ml.so.1" ]]; then
34+
echo "This is a duplicate $lib for testing purposes" > "/tmp/nvidia_libs_duplicate/$lib"
35+
chmod +x "/tmp/nvidia_libs_duplicate/$lib"
36+
if [[ "$lib" == *".so."* ]]; then
37+
base_lib=$(echo "$lib" | sed 's/\.so\.[0-9]*/.so/')
38+
ln -sf "/tmp/nvidia_libs_duplicate/$lib" "/tmp/nvidia_libs_duplicate/$base_lib"
39+
fi
40+
fi
3041
done
3142

3243
# Create a fake ldconfig cache that points to our fake libraries
@@ -38,11 +49,15 @@ cat > /tmp/ldconfig/ldconfig << 'EOF'
3849
# Fake ldconfig command that returns our fake libraries
3950
4051
if [ "$1" = "-p" ]; then
41-
# Simulate ldconfig -p output
52+
# Simulate ldconfig -p output with duplicate entries
4253
echo "libcuda.so.1 (libc6,x86-64) => /tmp/nvidia_libs/libcuda.so.1"
54+
echo "libcuda.so.1 (libc6,x86-64) => /tmp/nvidia_libs_duplicate/libcuda.so.1"
4355
echo "libcuda.so (libc6,x86-64) => /tmp/nvidia_libs/libcuda.so"
56+
echo "libcuda.so (libc6,x86-64) => /tmp/nvidia_libs_duplicate/libcuda.so"
4457
echo "libnvidia-ml.so.1 (libc6,x86-64) => /tmp/nvidia_libs/libnvidia-ml.so.1"
58+
echo "libnvidia-ml.so.1 (libc6,x86-64) => /tmp/nvidia_libs_duplicate/libnvidia-ml.so.1"
4559
echo "libnvidia-ml.so (libc6,x86-64) => /tmp/nvidia_libs/libnvidia-ml.so"
60+
echo "libnvidia-ml.so (libc6,x86-64) => /tmp/nvidia_libs_duplicate/libnvidia-ml.so"
4661
echo "libnvidia-ptxjitcompiler.so.1 (libc6,x86-64) => /tmp/nvidia_libs/libnvidia-ptxjitcompiler.so.1"
4762
echo "libnvidia-ptxjitcompiler.so (libc6,x86-64) => /tmp/nvidia_libs/libnvidia-ptxjitcompiler.so"
4863
echo "libOpenCL.so.1 (libc6,x86-64) => /tmp/nvidia_libs/libOpenCL.so.1"

0 commit comments

Comments
 (0)