Skip to content

Commit e361c8b

Browse files
committed
Extract amdgcn_cc from kfd
1 parent ee1eff6 commit e361c8b

1 file changed

Lines changed: 59 additions & 13 deletions

File tree

init/eessi_archdetect.sh

Lines changed: 59 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -212,24 +212,70 @@ accelpath() {
212212
# fi
213213

214214
# check for AMD GPUs via amd-smi command
215-
amd_smi=$(command -v amd-smi)
216-
if [[ $? -eq 0 ]]; then
217-
log "DEBUG" "accelpath: amd-smi command found @ ${amd_smi}"
218-
amd_smi_out=$(mktemp -p /tmp amd_smi_out.XXXXX)
219-
amd-smi static --asic | grep TARGET_GRAPHICS_VERSION 2>&1 > $amd_smi_out
220-
if [[ $? -eq 0 ]]; then
221-
amd_smi_info=$(head -n 1 $amd_smi_out)
222-
amdgcn_cc=$(echo $amd_smi_info | sed 's/.*: //')
223-
log "DEBUG" "accelpath: AMDGCN compute capability '${amdgcn_cc}' derived from amd-smi output '${amd_smi_info}'"
215+
# amd_smi=$(command -v amd-smi)
216+
# if [[ $? -eq 0 ]]; then
217+
# log "DEBUG" "accelpath: amd-smi command found @ ${amd_smi}"
218+
# amd_smi_out=$(mktemp -p /tmp amd_smi_out.XXXXX)
219+
# amd-smi static --asic | grep TARGET_GRAPHICS_VERSION 2>&1 > $amd_smi_out
220+
# if [[ $? -eq 0 ]]; then
221+
# amd_smi_info=$(head -n 1 $amd_smi_out)
222+
# amdgcn_cc=$(echo $amd_smi_info | sed 's/.*: //')
223+
# log "DEBUG" "accelpath: AMDGCN compute capability '${amdgcn_cc}' derived from amd-smi output '${amd_smi_info}'"
224+
# res="accel/amd/${amdgcn_cc}"
225+
# echo $res
226+
# rm -f $amd_smi_out
227+
# else
228+
# log "DEBUG" "accelpath: ami-smi command failed, see output in $amd_smi_out"
229+
# exit 3
230+
# fi
231+
# else
232+
# log "DEBUG" "accelpath: amd-smi command not found"
233+
# exit 2
234+
# fi
235+
236+
# logic ported from https://github.com/llvm/llvm-project/blob/6e738e187055bbd33b6c3d203b6b55904dfcb624/clang/tools/offload-arch/AMDGPUArchByKFD.cpp
237+
# check for AMD GPUs via KFD sysfs interface (No amd-smi or Python required)
238+
kfd_nodes="/sys/devices/virtual/kfd/kfd/topology/nodes"
239+
240+
if [[ -d "$kfd_nodes" ]]; then
241+
log "DEBUG" "accelpath: KFD sysfs path found @ ${kfd_nodes}"
242+
amdgcn_cc=""
243+
244+
# ls -1v ensures numeric/version sorting (nodes/0, nodes/1, ..., nodes/10)
245+
# just like LLVM's llvm::sort by node ID.
246+
# Assuming homogeneous nodes for EESSI, grab the first valid GPU and break
247+
for node in $(ls -1v "$kfd_nodes" 2>/dev/null); do
248+
prop_file="$kfd_nodes/$node/properties"
249+
250+
if [[ -f "$prop_file" ]]; then
251+
# Extract the integer value. 2>/dev/null suppresses read errors.
252+
gfx_ver=$(grep "^gfx_target_version" "$prop_file" 2>/dev/null | awk '{print $2}')
253+
254+
# If gfx_ver is non-empty and greater than 0 (0 means it's a CPU node)
255+
if [[ -n "$gfx_ver" && "$gfx_ver" -gt 0 ]]; then
256+
# Perform the exact math from AMDGPUArchByKFD.cpp
257+
major=$(( (gfx_ver / 10000) % 100 ))
258+
minor=$(( (gfx_ver / 100) % 100 ))
259+
step=$(( gfx_ver % 100 ))
260+
261+
# Format as gfx<major><minor><hex_step> (e.g., 9 0 a -> gfx90a)
262+
amdgcn_cc=$(printf "gfx%d%d%x" $major $minor $step)
263+
264+
log "DEBUG" "accelpath: AMDGCN compute capability '${amdgcn_cc}' derived from KFD node ${node}"
265+
break
266+
fi
267+
fi
268+
done
269+
270+
if [[ -n "$amdgcn_cc" ]]; then
224271
res="accel/amd/${amdgcn_cc}"
225-
echo $res
226-
rm -f $amd_smi_out
272+
echo "$res"
227273
else
228-
log "DEBUG" "accelpath: ami-smi command failed, see output in $amd_smi_out"
274+
log "DEBUG" "accelpath: KFD topology found, but no AMD GPUs detected (only CPUs)"
229275
exit 3
230276
fi
231277
else
232-
log "DEBUG" "accelpath: amd-smi command not found"
278+
log "DEBUG" "accelpath: KFD sysfs path not found. AMD GPU driver not loaded?"
233279
exit 2
234280
fi
235281
}

0 commit comments

Comments
 (0)