|
1 | 1 | #!/usr/bin/env bash |
2 | 2 |
|
3 | | -# This script can be used to install CUDA under the `.../host_injections` directory. |
4 | | -# This provides the parts of the CUDA installation that cannot be redistributed as |
5 | | -# part of EESSI due to license limitations. While GPU-based software from EESSI will |
6 | | -# _run_ without these, installation of additional CUDA software requires the CUDA |
7 | | -# installation(s) under `host_injections` to be present. |
8 | | -# |
9 | | -# The `host_injections` directory is a variant symlink that by default points to |
10 | | -# `/opt/eessi`, unless otherwise defined in the local CVMFS configuration (see |
11 | | -# https://cvmfs.readthedocs.io/en/stable/cpt-repo.html#variant-symlinks). For the |
12 | | -# installation to be successful, this directory needs to be writeable by the user |
13 | | -# executing this script. |
14 | | - |
15 | | -# Initialise our bash functions |
16 | 3 | TOPDIR=$(dirname $(realpath $BASH_SOURCE)) |
17 | | -source "$TOPDIR"/../../utils.sh |
18 | | - |
19 | | -# Function to display help message |
20 | | -show_help() { |
21 | | - echo "Usage: $0 [OPTIONS]" |
22 | | - echo "Options:" |
23 | | - echo " --help Display this help message" |
24 | | - echo " --accept-cuda-eula You _must_ accept the CUDA EULA to install" |
25 | | - echo " CUDA, see the EULA at" |
26 | | - echo " https://docs.nvidia.com/cuda/eula/index.html" |
27 | | - echo " -c, --cuda-version CUDA_VERSION Specify a version o CUDA to install (must" |
28 | | - echo " have a corresponding easyconfig in the" |
29 | | - echo " EasyBuild release)" |
30 | | - echo " -t, --temp-dir /path/to/tmpdir Specify a location to use for temporary" |
31 | | - echo " storage during the CUDA install" |
32 | | - echo " (must have >10GB available)" |
33 | | -} |
34 | | - |
35 | | -# Initialize variables |
36 | | -install_cuda_version="" |
37 | | -eula_accepted=0 |
38 | | - |
39 | | -# Parse command-line options |
40 | | -while [[ $# -gt 0 ]]; do |
41 | | - case "$1" in |
42 | | - --help) |
43 | | - show_help |
44 | | - exit 0 |
45 | | - ;; |
46 | | - -c|--cuda-version) |
47 | | - if [ -n "$2" ]; then |
48 | | - install_cuda_version="$2" |
49 | | - shift 2 |
50 | | - else |
51 | | - echo "Error: Argument required for $1" |
52 | | - show_help |
53 | | - exit 1 |
54 | | - fi |
55 | | - ;; |
56 | | - --accept-cuda-eula) |
57 | | - eula_accepted=1 |
58 | | - shift 1 |
59 | | - ;; |
60 | | - -t|--temp-dir) |
61 | | - if [ -n "$2" ]; then |
62 | | - CUDA_TEMP_DIR="$2" |
63 | | - shift 2 |
64 | | - else |
65 | | - echo "Error: Argument required for $1" |
66 | | - show_help |
67 | | - exit 1 |
68 | | - fi |
69 | | - ;; |
70 | | - *) |
71 | | - show_help |
72 | | - fatal_error "Error: Unknown option: $1" |
73 | | - ;; |
74 | | - esac |
75 | | -done |
76 | | - |
77 | | -# Make sure EESSI is initialised |
78 | | -check_eessi_initialised |
79 | | - |
80 | | -# Make sure the CUDA version supplied is a semantic version |
81 | | -is_semantic_version() { |
82 | | - local version=$1 |
83 | | - local regex='^[0-9]+\.[0-9]+\.[0-9]+$' |
84 | | - |
85 | | - if [[ $version =~ $regex ]]; then |
86 | | - return 0 # Return success (0) if it's a semantic version |
87 | | - else |
88 | | - return 1 # Return failure (1) if it's not a semantic version |
89 | | - fi |
90 | | -} |
91 | | -if ! is_semantic_version "$install_cuda_version"; then |
92 | | - show_help |
93 | | - error="\nYou must provide a semantic version for CUDA (e.g., 12.1.1) via the appropriate\n" |
94 | | - error="${error}command line option. This script is intended for use with EESSI so the 'correct'\n" |
95 | | - error="${error}version to provide is probably one of those available under\n" |
96 | | - error="${error}$EESSI_SOFTWARE_PATH/software/CUDA\n" |
97 | | - fatal_error "${error}" |
98 | | -fi |
99 | | - |
100 | | -# Make sure they have accepted the CUDA EULA |
101 | | -if [ "$eula_accepted" -ne 1 ]; then |
102 | | - show_help |
103 | | - error="\nYou _must_ accept the CUDA EULA via the appropriate command line option.\n" |
104 | | - fatal_error "${error}" |
105 | | -fi |
106 | | - |
107 | | -# As an installation location just use $EESSI_SOFTWARE_PATH but replacing `versions` with `host_injections` |
108 | | -# (CUDA is a binary installation so no need to worry too much about the EasyBuild setup) |
109 | | -cuda_install_parent=${EESSI_SOFTWARE_PATH/versions/host_injections} |
110 | | - |
111 | | -# Only install CUDA if specified version is not found. |
112 | | -# (existence of easybuild subdir implies a successful install) |
113 | | -if [ -d "${cuda_install_parent}"/software/CUDA/"${install_cuda_version}"/easybuild ]; then |
114 | | - echo_green "CUDA software found! No need to install CUDA again." |
115 | | -else |
116 | | - # We need to be able write to the installation space so let's make sure we can |
117 | | - if ! create_directory_structure "${cuda_install_parent}"/software/CUDA ; then |
118 | | - fatal_error "No write permissions to directory ${cuda_install_parent}/software/CUDA" |
119 | | - fi |
120 | | - |
121 | | - # we need a directory we can use for temporary storage |
122 | | - if [[ -z "${CUDA_TEMP_DIR}" ]]; then |
123 | | - tmpdir=$(mktemp -d) |
124 | | - else |
125 | | - tmpdir="${CUDA_TEMP_DIR}"/temp |
126 | | - if ! mkdir -p "$tmpdir" ; then |
127 | | - fatal_error "Could not create directory ${tmpdir}" |
128 | | - fi |
129 | | - fi |
130 | | - |
131 | | - required_space_in_tmpdir=50000 |
132 | | - # Let's see if we have sources and build locations defined if not, we use the temporary space |
133 | | - if [[ -z "${EASYBUILD_BUILDPATH}" ]]; then |
134 | | - export EASYBUILD_BUILDPATH=${tmpdir}/build |
135 | | - required_space_in_tmpdir=$((required_space_in_tmpdir + 5000000)) |
136 | | - fi |
137 | | - if [[ -z "${EASYBUILD_SOURCEPATH}" ]]; then |
138 | | - export EASYBUILD_SOURCEPATH=${tmpdir}/sources |
139 | | - required_space_in_tmpdir=$((required_space_in_tmpdir + 5000000)) |
140 | | - fi |
141 | | - |
142 | | - # The install is pretty fat, you need lots of space for download/unpack/install (~3*5GB), |
143 | | - # need to do a space check before we proceed |
144 | | - avail_space=$(df --output=avail "${cuda_install_parent}"/ | tail -n 1 | awk '{print $1}') |
145 | | - if (( avail_space < 5000000 )); then |
146 | | - fatal_error "Need at least 5GB disk space to install CUDA under ${cuda_install_parent}, exiting now..." |
147 | | - fi |
148 | | - avail_space=$(df --output=avail "${tmpdir}"/ | tail -n 1 | awk '{print $1}') |
149 | | - if (( avail_space < required_space_in_tmpdir )); then |
150 | | - error="Need at least ${required_space_in_tmpdir}GB disk space under ${tmpdir}.\n" |
151 | | - error="${error}Set the environment variable CUDA_TEMP_DIR to a location with adequate space to pass this check.\n" |
152 | | - error="${error}You can alternatively set EASYBUILD_BUILDPATH and/or EASYBUILD_SOURCEPATH\n" |
153 | | - error="${error}to reduce this requirement. Exiting now..." |
154 | | - fatal_error "${error}" |
155 | | - fi |
156 | | - |
157 | | - if ! command -v "eb" &>/dev/null; then |
158 | | - echo_yellow "Attempting to load an EasyBuild module to do actual install" |
159 | | - module load EasyBuild |
160 | | - # There are some scenarios where this may fail |
161 | | - if [ $? -ne 0 ]; then |
162 | | - error="'eb' command not found in your environment and\n" |
163 | | - error="${error} module load EasyBuild\n" |
164 | | - error="${error}failed for some reason.\n" |
165 | | - error="${error}Please re-run this script with the 'eb' command available." |
166 | | - fatal_error "${error}" |
167 | | - fi |
168 | | - fi |
169 | | - |
170 | | - cuda_easyconfig="CUDA-${install_cuda_version}.eb" |
171 | | - |
172 | | - # Check the easyconfig file is available in the release |
173 | | - # (eb search always returns 0, so we need a grep to ensure a usable exit code) |
174 | | - eb --search ^${cuda_easyconfig}|grep CUDA > /dev/null 2>&1 |
175 | | - # Check the exit code |
176 | | - if [ $? -ne 0 ]; then |
177 | | - eb_version=$(eb --version) |
178 | | - available_cuda_easyconfigs=$(eb --search "^CUDA-.*.eb"|grep CUDA) |
179 | | - |
180 | | - error="The easyconfig ${cuda_easyconfig} was not found in EasyBuild version:\n" |
181 | | - error="${error} ${eb_version}\n" |
182 | | - error="${error}You either need to give a different version of CUDA to install _or_ \n" |
183 | | - error="${error}use a different version of EasyBuild for the installation.\n" |
184 | | - error="${error}\nThe versions of CUDA available with the current eb command are:\n" |
185 | | - error="${error}${available_cuda_easyconfigs}" |
186 | | - fatal_error "${error}" |
187 | | - fi |
188 | | - |
189 | | - # We need the --rebuild option, as the CUDA module may or may not be on the |
190 | | - # `MODULEPATH` yet. Even if it is, we still want to redo this installation |
191 | | - # since it will provide the symlinked targets for the parts of the CUDA |
192 | | - # installation in the `.../versions/...` prefix |
193 | | - # We install the module in our `tmpdir` since we do not need the modulefile, |
194 | | - # we only care about providing the targets for the symlinks. |
195 | | - extra_args="--rebuild --installpath-modules=${tmpdir}" |
196 | | - |
197 | | - # We don't want hooks used in this install, we need a vanilla CUDA installation |
198 | | - touch "$tmpdir"/none.py |
199 | | - # shellcheck disable=SC2086 # Intended splitting of extra_args |
200 | | - eb --prefix="$tmpdir" ${extra_args} --accept-eula-for=CUDA --hooks="$tmpdir"/none.py --installpath="${cuda_install_parent}"/ "${cuda_easyconfig}" |
201 | | - ret=$? |
202 | | - if [ $ret -ne 0 ]; then |
203 | | - eb_last_log=$(unset EB_VERBOSE; eb --last-log) |
204 | | - cp -a ${eb_last_log} . |
205 | | - fatal_error "CUDA installation failed, please check EasyBuild logs $(basename ${eb_last_log})..." |
206 | | - else |
207 | | - echo_green "CUDA installation at ${cuda_install_parent}/software/CUDA/${install_cuda_version} succeeded!" |
208 | | - fi |
209 | | - # clean up tmpdir |
210 | | - rm -rf "${tmpdir}" |
211 | | -fi |
| 4 | +echo "This script was replaced by the $TOPDIR/install_cuda_and_libraries.sh script. See https://www.eessi.io/docs/site_specific_config/gpu/ for more information" |
0 commit comments