Skip to content

Commit 0da9695

Browse files
authored
Merge pull request #117 from shiv-tyagi/add-gpu-list-cmd
Add 'amd-ctk gpu list' command to display GPU info
2 parents a01e79e + 966f284 commit 0da9695

4 files changed

Lines changed: 138 additions & 34 deletions

File tree

README.md

Lines changed: 11 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -182,50 +182,27 @@ The AMD Container Toolkit supports GPU selection using unique identifiers (UUIDs
182182
183183
## Getting GPU UUIDs
184184
185-
GPU UUIDs can be obtained using different tools:
185+
GPU UUIDs can be obtained using the `amd-ctk gpu list` command:
186186
187-
### Using ROCm SMI
188187
```bash
189-
rocm-smi --showuniqueid
188+
amd-ctk gpu list
190189
```
191190
192191
This will display output similar to:
193192
```
194-
GPU[0] : Unique ID: 0xef2c1799a1f3e2ed
195-
GPU[1] : Unique ID: 0x1234567890abcdef
193+
Found 2 AMD GPU devices
194+
---------------------------------------------------------------------------
195+
GPU Id UUID DRM Devices
196+
---------------------------------------------------------------------------
197+
0 0xEF2C1799A1F3E2ED /dev/dri/renderD128
198+
1 0x1234567890ABCDEF /dev/dri/renderD129
196199
```
197200
198-
### Using AMD-SMI
199-
The `amd-smi` tool can also be used to get the ASIC_SERIAL, which serves as the GPU UUID:
201+
Use the `UUID` value (e.g., `0xEF2C1799A1F3E2ED`) as the GPU UUID in container configurations.
200202
201-
```bash
202-
amd-smi static -aB
203-
```
204-
205-
This will display output similar to:
206-
```
207-
GPU: 0
208-
ASIC:
209-
MARKET_NAME: AMD Instinct MI210
210-
VENDOR_ID: 0x1002
211-
VENDOR_NAME: Advanced Micro Devices Inc. [AMD/ATI]
212-
SUBVENDOR_ID: 0x1002
213-
DEVICE_ID: 0x740f
214-
SUBSYSTEM_ID: 0x0c34
215-
REV_ID: 0x02
216-
ASIC_SERIAL: 0xD1CC3F11CFDD5112
217-
OAM_ID: N/A
218-
NUM_COMPUTE_UNITS: 104
219-
TARGET_GRAPHICS_VERSION: gfx90a
220-
BOARD:
221-
MODEL_NUMBER: 102-D67302-00
222-
PRODUCT_SERIAL: 692231000131
223-
FRU_ID: 113-HPED67302000B.009
224-
PRODUCT_NAME: Instinct MI210
225-
MANUFACTURER_NAME: AMD
226-
```
203+
If GPU Tracker is enabled, `amd-ctk gpu-tracker status` also displays UUIDs alongside container allocation and accessibility information.
227204
228-
Use the `ASIC_SERIAL` value (e.g., `0xD1CC3F11CFDD5112`) as the GPU UUID in container configurations.
205+
**Note:** The UUID used by the AMD Container Toolkit is sourced from the KFD topology (`/sys/class/kfd/kfd/topology/nodes/*/properties`). This may differ from the `ASIC_SERIAL` reported by `amd-smi` or the Unique ID reported by `rocm-smi`. Always use the UUID shown by `amd-ctk gpu list` for container configurations.
229206
230207
## Using UUIDs with Environment Variables
231208

cmd/amd-ctk/gpu/gpu.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/**
2+
# Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
**/
16+
17+
package gpu
18+
19+
import (
20+
"github.com/ROCm/container-toolkit/cmd/amd-ctk/gpu/list"
21+
"github.com/urfave/cli/v2"
22+
)
23+
24+
func AddNewCommand() *cli.Command {
25+
gpuCmd := cli.Command{
26+
Name: "gpu",
27+
Usage: "GPU related commands",
28+
UsageText: "amd-ctk gpu [command] [options]",
29+
}
30+
31+
gpuCmd.Subcommands = []*cli.Command{
32+
list.AddNewCommand(),
33+
}
34+
35+
return &gpuCmd
36+
}

cmd/amd-ctk/gpu/list/list.go

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
/**
2+
# Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
**/
16+
17+
package list
18+
19+
import (
20+
"fmt"
21+
"strings"
22+
23+
"github.com/ROCm/container-toolkit/internal/amdgpu"
24+
"github.com/urfave/cli/v2"
25+
)
26+
27+
func AddNewCommand() *cli.Command {
28+
gpuListCmd := cli.Command{
29+
Name: "list",
30+
Usage: "List AMD GPUs with their UUIDs",
31+
UsageText: "amd-ctk gpu list",
32+
Action: func(c *cli.Context) error {
33+
return performAction(c)
34+
},
35+
}
36+
37+
return &gpuListCmd
38+
}
39+
40+
func performAction(c *cli.Context) error {
41+
devs, err := amdgpu.GetAMDGPUs()
42+
if err != nil {
43+
return fmt.Errorf("failed to list AMD devices: %v", err)
44+
}
45+
46+
uuidToGPUIdMap, err := amdgpu.GetUniqueIdToDeviceIndexMap()
47+
if err != nil {
48+
uuidToGPUIdMap = make(map[string][]int)
49+
}
50+
51+
gpuIdToUUIDMap := make(map[int]string)
52+
for uuid, gpuIds := range uuidToGPUIdMap {
53+
if strings.HasPrefix(uuid, "0x") || strings.HasPrefix(uuid, "0X") {
54+
uuid = uuid[2:]
55+
}
56+
uuid = "0x" + strings.ToUpper(uuid)
57+
for _, gpuId := range gpuIds {
58+
gpuIdToUUIDMap[gpuId] = uuid
59+
}
60+
}
61+
62+
suffix := "devices"
63+
if len(devs) == 1 {
64+
suffix = "device"
65+
}
66+
fmt.Printf("Found %v AMD GPU %s\n", len(devs), suffix)
67+
68+
fmt.Println(strings.Repeat("-", 75))
69+
fmt.Printf("%-10s%-25s%-40s\n", "GPU Id", "UUID", "DRM Devices")
70+
fmt.Println(strings.Repeat("-", 75))
71+
for idx, dev := range devs {
72+
uuid := gpuIdToUUIDMap[idx]
73+
if uuid == "" {
74+
uuid = "N/A"
75+
}
76+
77+
var renderDevs []string
78+
for _, dd := range dev.DrmDevices {
79+
if !strings.HasPrefix(dd, "/dev/dri/card") {
80+
renderDevs = append(renderDevs, dd)
81+
}
82+
}
83+
84+
drmStr := strings.Join(renderDevs, ", ")
85+
fmt.Printf("%-10v%-25s%-40s\n", idx, uuid, drmStr)
86+
}
87+
88+
return nil
89+
}

cmd/amd-ctk/main.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"os"
2323

2424
"github.com/ROCm/container-toolkit/cmd/amd-ctk/cdi"
25+
"github.com/ROCm/container-toolkit/cmd/amd-ctk/gpu"
2526
gpuTracker "github.com/ROCm/container-toolkit/cmd/amd-ctk/gpu-tracker"
2627
"github.com/ROCm/container-toolkit/cmd/amd-ctk/runtime"
2728
"github.com/urfave/cli/v2"
@@ -86,6 +87,7 @@ func main() {
8687
showVersion(),
8788
runtime.AddNewCommand(),
8889
cdi.AddNewCommand(),
90+
gpu.AddNewCommand(),
8991
gpuTracker.AddNewCommand(),
9092
}
9193

0 commit comments

Comments
 (0)