@@ -212,24 +212,74 @@ accelpath() {
212212 # fi
213213
214214 # check for AMD GPUs via amd-smi command
215- amd_smi=$( command -v amd-smi)
216- if [[ $? -eq 0 ]]; then
217- log " DEBUG" " accelpath: amd-smi command found @ ${amd_smi} "
218- amd_smi_out=$( mktemp -p /tmp amd_smi_out.XXXXX)
219- amd-smi static --asic | grep TARGET_GRAPHICS_VERSION 2>&1 > $amd_smi_out
220- if [[ $? -eq 0 ]]; then
221- amd_smi_info=$( head -n 1 $amd_smi_out )
222- amdgcn_cc=$( echo $amd_smi_info | sed ' s/.*: //' )
223- log " DEBUG" " accelpath: AMDGCN compute capability '${amdgcn_cc} ' derived from amd-smi output '${amd_smi_info} '"
215+ # amd_smi=$(command -v amd-smi)
216+ # if [[ $? -eq 0 ]]; then
217+ # log "DEBUG" "accelpath: amd-smi command found @ ${amd_smi}"
218+ # amd_smi_out=$(mktemp -p /tmp amd_smi_out.XXXXX)
219+ # amd-smi static --asic | grep TARGET_GRAPHICS_VERSION 2>&1 > $amd_smi_out
220+ # if [[ $? -eq 0 ]]; then
221+ # amd_smi_info=$(head -n 1 $amd_smi_out)
222+ # amdgcn_cc=$(echo $amd_smi_info | sed 's/.*: //')
223+ # log "DEBUG" "accelpath: AMDGCN compute capability '${amdgcn_cc}' derived from amd-smi output '${amd_smi_info}'"
224+ # res="accel/amd/${amdgcn_cc}"
225+ # echo $res
226+ # rm -f $amd_smi_out
227+ # else
228+ # log "DEBUG" "accelpath: ami-smi command failed, see output in $amd_smi_out"
229+ # exit 3
230+ # fi
231+ # else
232+ # log "DEBUG" "accelpath: amd-smi command not found"
233+ # exit 2
234+ # fi
235+
236+ # logic ported from https://github.com/llvm/llvm-project/blob/6e738e187055bbd33b6c3d203b6b55904dfcb624/clang/tools/offload-arch/AMDGPUArchByKFD.cpp
237+ # check for AMD GPUs via KFD sysfs interface (No amd-smi or Python required)
238+ kfd_nodes=" /sys/devices/virtual/kfd/kfd/topology/nodes"
239+
240+ echo $kfd_nodes
241+
242+ if [[ -d " $kfd_nodes " ]]; then
243+ log " DEBUG" " accelpath: KFD sysfs path found @ ${kfd_nodes} "
244+ amdgcn_cc=" "
245+
246+ # ls -1v ensures numeric/version sorting (nodes/0, nodes/1, ..., nodes/10)
247+ # just like LLVM's llvm::sort by node ID.
248+ for node in $( ls -1v " $kfd_nodes " 2> /dev/null) ; do
249+ prop_file=" $kfd_nodes /$node /properties"
250+
251+ if [[ -f " $prop_file " ]]; then
252+ # Extract the integer value. 2>/dev/null suppresses read errors.
253+ gfx_ver=$( grep " ^gfx_target_version" " $prop_file " 2> /dev/null | awk ' {print $2}' )
254+
255+ # If gfx_ver is non-empty and greater than 0 (0 means it's a CPU node)
256+ if [[ -n " $gfx_ver " && " $gfx_ver " -gt 0 ]]; then
257+ # Perform the exact math from AMDGPUArchByKFD.cpp
258+ major=$(( (gfx_ver / 10000 ) % 100 ))
259+ minor=$(( (gfx_ver / 100 ) % 100 ))
260+ step=$(( gfx_ver % 100 ))
261+
262+ # Format as gfx<major><minor><hex_step> (e.g., 9 0 a -> gfx90a)
263+ amdgcn_cc=$( printf " gfx%d%d%x" $major $minor $step )
264+
265+ log " DEBUG" " accelpath: AMDGCN compute capability '${amdgcn_cc} ' derived from KFD node ${node} "
266+
267+ # Assuming homogeneous nodes for EESSI, grab the first valid GPU and break
268+ break
269+ fi
270+ fi
271+ done
272+
273+ if [[ -n " $amdgcn_cc " ]]; then
224274 res=" accel/amd/${amdgcn_cc} "
225- echo $res
226- rm -f $amd_smi_out
275+ echo " $res "
276+ # Proceed with successful exit
227277 else
228- log " DEBUG" " accelpath: ami-smi command failed, see output in $amd_smi_out "
278+ log " DEBUG" " accelpath: KFD topology found, but no AMD GPUs detected (only CPUs) "
229279 exit 3
230280 fi
231281 else
232- log " DEBUG" " accelpath: amd-smi command not found"
282+ log " DEBUG" " accelpath: KFD sysfs path not found. AMD GPU driver not loaded? "
233283 exit 2
234284 fi
235285}
0 commit comments