@@ -175,11 +175,106 @@ cpupath(){
175175 fi
176176}
177177
178+ nvidia_accelpath () {
179+ # Check for NVIDIA GPUs via nvidia-smi command
180+ local nvidia_smi
181+ nvidia_smi=$( command -v nvidia-smi)
182+
183+ if [[ $? -eq 0 ]]; then
184+ log " DEBUG" " nvidia_accelpath: nvidia-smi command found @ ${nvidia_smi} "
185+ local nvidia_smi_out
186+ nvidia_smi_out=$( mktemp -p /tmp nvidia_smi_out.XXXXX)
187+
188+ nvidia-smi --query-gpu=gpu_name,count,driver_version,compute_cap --format=csv,noheader 2>&1 > $nvidia_smi_out
189+ if [[ $? -eq 0 ]]; then
190+ local nvidia_smi_info=$( head -n 1 $nvidia_smi_out )
191+ local cuda_cc=$( echo $nvidia_smi_info | sed ' s/, /,/g' | cut -f4 -d, | sed ' s/\.//g' )
192+ log " DEBUG" " nvidia_accelpath: CUDA compute capability '${cuda_cc} ' derived from nvidia-smi output '${nvidia_smi_info} '"
193+
194+ echo " accel/nvidia/cc${cuda_cc} "
195+ rm -f $nvidia_smi_out
196+ return 0
197+ else
198+ log " DEBUG" " nvidia_accelpath: nvidia-smi command failed, see output in $nvidia_smi_out "
199+ return 3
200+ fi
201+ else
202+ log " DEBUG" " nvidia_accelpath: nvidia-smi command not found"
203+ return 2
204+ fi
205+ }
206+
207+ amd_accelpath () {
208+ # Method 1: Check for AMD GPUs via KFD sysfs interface (No amd-smi or Python required)
209+ local kfd_nodes=" /sys/devices/virtual/kfd/kfd/topology/nodes"
210+
211+ if [[ -d " $kfd_nodes " ]]; then
212+ log " DEBUG" " amd_accelpath: KFD sysfs path found @ ${kfd_nodes} "
213+ local amdgcn_cc=" "
214+
215+ # ls -1v ensures numeric/version sorting (nodes/0, nodes/1, ..., nodes/10)
216+ for node in $( ls -1v " $kfd_nodes " 2> /dev/null) ; do
217+ local prop_file=" $kfd_nodes /$node /properties"
218+
219+ if [[ -f " $prop_file " ]]; then
220+ # Extract the integer value. 2>/dev/null suppresses read errors.
221+ local gfx_ver=$( grep " ^gfx_target_version" " $prop_file " 2> /dev/null | awk ' {print $2}' )
222+
223+ # If gfx_ver is non-empty and greater than 0 (0 means it's a CPU node)
224+ if [[ -n " $gfx_ver " && " $gfx_ver " -gt 0 ]]; then
225+ local major=$(( (gfx_ver / 10000 ) % 100 ))
226+ local minor=$(( (gfx_ver / 100 ) % 100 ))
227+ local step=$(( gfx_ver % 100 ))
228+
229+ amdgcn_cc=$( printf " gfx%d%d%x" $major $minor $step )
230+ log " DEBUG" " amd_accelpath: AMDGCN compute capability '${amdgcn_cc} ' derived from KFD node ${node} "
231+ break
232+ fi
233+ fi
234+ done
235+
236+ if [[ -n " $amdgcn_cc " ]]; then
237+ echo " accel/amd/${amdgcn_cc} "
238+ return 0
239+ fi
240+ log " DEBUG" " amd_accelpath: KFD topology found, but no AMD GPUs detected. Falling back to amd-smi."
241+ else
242+ log " DEBUG" " amd_accelpath: KFD sysfs path not found. Falling back to amd-smi."
243+ fi
244+
245+ # Method 2: Fallback to AMD GPUs via amd-smi command using /tmp files
246+ local amd_smi
247+ amd_smi=$( command -v amd-smi)
248+
249+ if [[ $? -eq 0 ]]; then
250+ log " DEBUG" " amd_accelpath: amd-smi command found @ ${amd_smi} "
251+ local amd_smi_out
252+ amd_smi_out=$( mktemp -p /tmp amd_smi_out.XXXXX)
253+
254+ amd-smi static --asic | grep TARGET_GRAPHICS_VERSION 2>&1 > $amd_smi_out
255+ if [[ $? -eq 0 ]]; then
256+ local amd_smi_info=$( head -n 1 $amd_smi_out )
257+ local amdgcn_cc=$( echo $amd_smi_info | sed ' s/.*: //' )
258+ log " DEBUG" " amd_accelpath: AMDGCN compute capability '${amdgcn_cc} ' derived from amd-smi output '${amd_smi_info} '"
259+
260+ echo " accel/amd/${amdgcn_cc} "
261+ rm -f $amd_smi_out
262+ return 0
263+ else
264+ log " DEBUG" " amd_accelpath: amd-smi command failed, see output in $amd_smi_out "
265+ return 3
266+ fi
267+ else
268+ log " DEBUG" " amd_accelpath: amd-smi command not found"
269+ return 2
270+ fi
271+ }
272+
178273accelpath () {
179274 # If EESSI_ACCELERATOR_TARGET_OVERRIDE is set, use it
180275 log " DEBUG" " accelpath: Override variable set as '$EESSI_ACCELERATOR_TARGET_OVERRIDE ' "
181276 if [ ! -z $EESSI_ACCELERATOR_TARGET_OVERRIDE ]; then
182- # Regex that allows both NVIDIA and AMD overrides
277+ # Updated regex to allow both NVIDIA and AMD overrides
183278 if [[ " $EESSI_ACCELERATOR_TARGET_OVERRIDE " =~ ^accel/(nvidia/cc[0-9]+| amd/gfx[0-9a-f]+)$ ]]; then
184279 echo " $EESSI_ACCELERATOR_TARGET_OVERRIDE "
185280 return 0
@@ -189,28 +284,27 @@ accelpath() {
189284 fi
190285 fi
191286
192- # check for NVIDIA GPUs via nvidia-smi command
193- nvidia_smi=$( command -v nvidia-smi)
287+ # 1. Check for NVIDIA GPUs
288+ local nv_res
289+ nv_res=$( nvidia_accelpath)
194290 if [[ $? -eq 0 ]]; then
195- log " DEBUG" " accelpath: nvidia-smi command found @ ${nvidia_smi} "
196- nvidia_smi_out=$( mktemp -p /tmp nvidia_smi_out.XXXXX)
197- nvidia-smi --query-gpu=gpu_name,count,driver_version,compute_cap --format=csv,noheader 2>&1 > $nvidia_smi_out
198- if [[ $? -eq 0 ]]; then
199- nvidia_smi_info=$( head -n 1 $nvidia_smi_out )
200- cuda_cc=$( echo $nvidia_smi_info | sed ' s/, /,/g' | cut -f4 -d, | sed ' s/\.//g' )
201- log " DEBUG" " accelpath: CUDA compute capability '${cuda_cc} ' derived from nvidia-smi output '${nvidia_smi_info} '"
202- res=" accel/nvidia/cc${cuda_cc} "
203- log " DEBUG" " accelpath: result: ${res} "
204- echo $res
205- rm -f $nvidia_smi_out
206- else
207- log " DEBUG" " accelpath: nvidia-smi command failed, see output in $nvidia_smi_out "
208- exit 3
209- fi
210- else
211- log " DEBUG" " accelpath: nvidia-smi command not found"
212- exit 2
291+ log " DEBUG" " accelpath: result: ${nv_res} "
292+ echo " $nv_res "
293+ return 0
213294 fi
295+
296+ # 2. Check for AMD GPUs
297+ local amd_res
298+ amd_res=$( amd_accelpath)
299+ if [[ $? -eq 0 ]]; then
300+ log " DEBUG" " accelpath: result: ${amd_res} "
301+ echo " $amd_res "
302+ return 0
303+ fi
304+
305+ # 3. Fail gracefully if neither is found
306+ log " DEBUG" " accelpath: No supported accelerators found on this system."
307+ exit 2
214308}
215309
216310# Parse command line arguments
0 commit comments