@@ -212,24 +212,70 @@ accelpath() {
212212 # fi
213213
214214 # check for AMD GPUs via amd-smi command
215- amd_smi=$( command -v amd-smi)
216- if [[ $? -eq 0 ]]; then
217- log " DEBUG" " accelpath: amd-smi command found @ ${amd_smi} "
218- amd_smi_out=$( mktemp -p /tmp amd_smi_out.XXXXX)
219- amd-smi static --asic | grep TARGET_GRAPHICS_VERSION 2>&1 > $amd_smi_out
220- if [[ $? -eq 0 ]]; then
221- amd_smi_info=$( head -n 1 $amd_smi_out )
222- amdgcn_cc=$( echo $amd_smi_info | sed ' s/.*: //' )
223- log " DEBUG" " accelpath: AMDGCN compute capability '${amdgcn_cc} ' derived from amd-smi output '${amd_smi_info} '"
215+ # amd_smi=$(command -v amd-smi)
216+ # if [[ $? -eq 0 ]]; then
217+ # log "DEBUG" "accelpath: amd-smi command found @ ${amd_smi}"
218+ # amd_smi_out=$(mktemp -p /tmp amd_smi_out.XXXXX)
219+ # amd-smi static --asic | grep TARGET_GRAPHICS_VERSION 2>&1 > $amd_smi_out
220+ # if [[ $? -eq 0 ]]; then
221+ # amd_smi_info=$(head -n 1 $amd_smi_out)
222+ # amdgcn_cc=$(echo $amd_smi_info | sed 's/.*: //')
223+ # log "DEBUG" "accelpath: AMDGCN compute capability '${amdgcn_cc}' derived from amd-smi output '${amd_smi_info}'"
224+ # res="accel/amd/${amdgcn_cc}"
225+ # echo $res
226+ # rm -f $amd_smi_out
227+ # else
228+ # log "DEBUG" "accelpath: ami-smi command failed, see output in $amd_smi_out"
229+ # exit 3
230+ # fi
231+ # else
232+ # log "DEBUG" "accelpath: amd-smi command not found"
233+ # exit 2
234+ # fi
235+
236+ # logic ported from https://github.com/llvm/llvm-project/blob/6e738e187055bbd33b6c3d203b6b55904dfcb624/clang/tools/offload-arch/AMDGPUArchByKFD.cpp
237+ # check for AMD GPUs via KFD sysfs interface (No amd-smi or Python required)
238+ kfd_nodes=" /sys/devices/virtual/kfd/kfd/topology/nodes"
239+
240+ if [[ -d " $kfd_nodes " ]]; then
241+ log " DEBUG" " accelpath: KFD sysfs path found @ ${kfd_nodes} "
242+ amdgcn_cc=" "
243+
244+ # ls -1v ensures numeric/version sorting (nodes/0, nodes/1, ..., nodes/10)
245+ # just like LLVM's llvm::sort by node ID.
246+ # Assuming homogeneous nodes for EESSI, grab the first valid GPU and break
247+ for node in $( ls -1v " $kfd_nodes " 2> /dev/null) ; do
248+ prop_file=" $kfd_nodes /$node /properties"
249+
250+ if [[ -f " $prop_file " ]]; then
251+ # Extract the integer value. 2>/dev/null suppresses read errors.
252+ gfx_ver=$( grep " ^gfx_target_version" " $prop_file " 2> /dev/null | awk ' {print $2}' )
253+
254+ # If gfx_ver is non-empty and greater than 0 (0 means it's a CPU node)
255+ if [[ -n " $gfx_ver " && " $gfx_ver " -gt 0 ]]; then
256+ # Perform the exact math from AMDGPUArchByKFD.cpp
257+ major=$(( (gfx_ver / 10000 ) % 100 ))
258+ minor=$(( (gfx_ver / 100 ) % 100 ))
259+ step=$(( gfx_ver % 100 ))
260+
261+ # Format as gfx<major><minor><hex_step> (e.g., 9 0 a -> gfx90a)
262+ amdgcn_cc=$( printf " gfx%d%d%x" $major $minor $step )
263+
264+ log " DEBUG" " accelpath: AMDGCN compute capability '${amdgcn_cc} ' derived from KFD node ${node} "
265+ break
266+ fi
267+ fi
268+ done
269+
270+ if [[ -n " $amdgcn_cc " ]]; then
224271 res=" accel/amd/${amdgcn_cc} "
225- echo $res
226- rm -f $amd_smi_out
272+ echo " $res "
227273 else
228- log " DEBUG" " accelpath: ami-smi command failed, see output in $amd_smi_out "
274+ log " DEBUG" " accelpath: KFD topology found, but no AMD GPUs detected (only CPUs) "
229275 exit 3
230276 fi
231277 else
232- log " DEBUG" " accelpath: amd-smi command not found"
278+ log " DEBUG" " accelpath: KFD sysfs path not found. AMD GPU driver not loaded? "
233279 exit 2
234280 fi
235281}
0 commit comments