@@ -18,13 +18,16 @@ import (
1818
1919const amdSmiImage = "un1def/amd-smi:6.2.2-0"
2020
21+ const ttSmiImage = "dstackai/tt-smi:latest"
22+
2123type GpuVendor string
2224
2325const (
24- GpuVendorNone GpuVendor = "none"
25- GpuVendorNvidia GpuVendor = "nvidia"
26- GpuVendorAmd GpuVendor = "amd"
27- GpuVendorIntel GpuVendor = "intel"
26+ GpuVendorNone GpuVendor = "none"
27+ GpuVendorNvidia GpuVendor = "nvidia"
28+ GpuVendorAmd GpuVendor = "amd"
29+ GpuVendorIntel GpuVendor = "intel"
30+ GpuVendorTenstorrent GpuVendor = "tenstorrent"
2831)
2932
3033type GpuInfo struct {
@@ -57,6 +60,9 @@ func GetGpuVendor() GpuVendor {
5760 if _ , err := os .Stat ("/dev/accel" ); ! errors .Is (err , os .ErrNotExist ) {
5861 return GpuVendorIntel
5962 }
63+ if _ , err := os .Stat ("/dev/tenstorrent" ); ! errors .Is (err , os .ErrNotExist ) {
64+ return GpuVendorTenstorrent
65+ }
6066 return GpuVendorNone
6167}
6268
@@ -68,6 +74,8 @@ func GetGpuInfo(ctx context.Context) []GpuInfo {
6874 return getAmdGpuInfo (ctx )
6975 case GpuVendorIntel :
7076 return getIntelGpuInfo (ctx )
77+ case GpuVendorTenstorrent :
78+ return getTenstorrentGpuInfo (ctx )
7179 case GpuVendorNone :
7280 return []GpuInfo {}
7381 }
@@ -195,6 +203,85 @@ func getAmdGpuInfo(ctx context.Context) []GpuInfo {
195203 return gpus
196204}
197205
206+ type ttSmiSnapshot struct {
207+ DeviceInfo []ttDeviceInfo `json:"device_info"`
208+ }
209+
210+ type ttDeviceInfo struct {
211+ BoardInfo ttBoardInfo `json:"board_info"`
212+ }
213+
214+ type ttBoardInfo struct {
215+ BoardType string `json:"board_type"`
216+ BusID string `json:"bus_id"`
217+ }
218+
219+ func getTenstorrentGpuInfo (ctx context.Context ) []GpuInfo {
220+ gpus := []GpuInfo {}
221+
222+ cmd := execute.ExecTask {
223+ Command : "docker" ,
224+ Args : []string {
225+ "run" ,
226+ "--rm" ,
227+ "--device" , "/dev/tenstorrent" ,
228+ ttSmiImage ,
229+ "-s" ,
230+ },
231+ StreamStdio : false ,
232+ }
233+ res , err := cmd .Execute (ctx )
234+ if err != nil {
235+ log .Error (ctx , "failed to execute tt-smi" , "err" , err )
236+ return gpus
237+ }
238+ if res .ExitCode != 0 {
239+ log .Error (
240+ ctx , "failed to execute tt-smi" ,
241+ "exitcode" , res .ExitCode , "stdout" , res .Stdout , "stderr" , res .Stderr ,
242+ )
243+ return gpus
244+ }
245+
246+ var ttSmiSnapshot ttSmiSnapshot
247+ if err := json .Unmarshal ([]byte (res .Stdout ), & ttSmiSnapshot ); err != nil {
248+ log .Error (ctx , "cannot read tt-smi json" , "err" , err )
249+ log .Debug (ctx , "tt-smi output" , "stdout" , res .Stdout )
250+ return gpus
251+ }
252+
253+ for i , device := range ttSmiSnapshot .DeviceInfo {
254+ // Extract board type without R/L suffix
255+ boardType := strings .TrimSpace (device .BoardInfo .BoardType )
256+ name := boardType
257+
258+ // Remove " R" or " L" suffix if present
259+ if strings .HasSuffix (boardType , " R" ) {
260+ name = boardType [:len (boardType )- 2 ]
261+ } else if strings .HasSuffix (boardType , " L" ) {
262+ name = boardType [:len (boardType )- 2 ]
263+ }
264+
265+ // Determine VRAM based on board type
266+ vram := 0
267+ if strings .HasPrefix (name , "n150" ) {
268+ vram = 12 * 1024 // 12GB in MiB
269+ } else if strings .HasPrefix (name , "n300" ) {
270+ vram = 24 * 1024 // 24GB in MiB
271+ }
272+
273+ gpus = append (gpus , GpuInfo {
274+ Vendor : GpuVendorTenstorrent ,
275+ Name : name ,
276+ Vram : vram ,
277+ ID : device .BoardInfo .BusID ,
278+ Index : strconv .Itoa (i ),
279+ })
280+ }
281+
282+ return gpus
283+ }
284+
198285func getAmdRenderNodePath (bdf string ) (string , error ) {
199286 // amd-smi uses extended BDF Notation with domain: Domain:Bus:Device.Function, e.g., 0000:5f:00.0
200287 // udev creates /dev/dri/by-path/pci-<BDF>-render -> ../renderD<N> symlinks
0 commit comments