|
1 | 1 | package capacity |
2 | 2 |
|
3 | 3 | import ( |
| 4 | + "encoding/xml" |
| 5 | + "fmt" |
4 | 6 | "os" |
5 | 7 | "os/exec" |
| 8 | + "strconv" |
6 | 9 | "strings" |
7 | 10 | "syscall" |
8 | 11 |
|
9 | 12 | "github.com/pkg/errors" |
10 | 13 | "github.com/rs/zerolog/log" |
11 | 14 | "github.com/shirou/gopsutil/host" |
| 15 | + "github.com/threefoldtech/zosbase/pkg" |
12 | 16 | "github.com/threefoldtech/zosbase/pkg/capacity/dmi" |
13 | 17 | "github.com/threefoldtech/zosbase/pkg/capacity/smartctl" |
14 | 18 | "github.com/threefoldtech/zosbase/pkg/gridtypes" |
@@ -182,3 +186,101 @@ func (r *ResourceOracle) GPUs() ([]PCI, error) { |
182 | 186 | } |
183 | 187 | return ListPCI(GPU) |
184 | 188 | } |
| 189 | + |
| 190 | +// normalizeBusID converts a bus ID from format "00000000:01:00.0" to "0000:01:00.0" |
| 191 | +func normalizeBusID(busID string) string { |
| 192 | + parts := strings.Split(busID, ":") |
| 193 | + if len(parts) != 3 { |
| 194 | + return busID |
| 195 | + } |
| 196 | + domain := strings.TrimLeft(parts[0], "0") |
| 197 | + if domain == "" { |
| 198 | + domain = "0000" |
| 199 | + } |
| 200 | + domain = fmt.Sprintf("%0*s", 4, domain) |
| 201 | + return fmt.Sprintf("%s:%s:%s", domain, parts[1], parts[2]) |
| 202 | +} |
| 203 | + |
| 204 | +// DisplayNode represents a display device from lshw XML output |
| 205 | +type DisplayNode struct { |
| 206 | + Class string `xml:"class,attr"` |
| 207 | + BusInfo string `xml:"businfo"` |
| 208 | + Product string `xml:"product"` |
| 209 | + Vendor string `xml:"vendor"` |
| 210 | + Resources struct { |
| 211 | + Memory []struct { |
| 212 | + Value string `xml:"value,attr"` |
| 213 | + } `xml:"resource"` |
| 214 | + } `xml:"resources"` |
| 215 | +} |
| 216 | + |
| 217 | +// DisplayList represents the root XML structure from lshw |
| 218 | +type DisplayList struct { |
| 219 | + Nodes []DisplayNode `xml:"node"` |
| 220 | +} |
| 221 | + |
| 222 | +// GetGpuDevice gets the GPU information using lshw command |
| 223 | +func GetGpuDevice(p *PCI) (pkg.GPUInfo, error) { |
| 224 | + cmd := exec.Command("lshw", "-C", "display", "-xml") |
| 225 | + output, err := cmd.Output() |
| 226 | + if err != nil { |
| 227 | + return pkg.GPUInfo{}, fmt.Errorf("failed to run lshw command: %w", err) |
| 228 | + } |
| 229 | + |
| 230 | + var displayList DisplayList |
| 231 | + err = xml.Unmarshal(output, &displayList) |
| 232 | + if err != nil { |
| 233 | + return pkg.GPUInfo{}, fmt.Errorf("failed to parse lshw XML output: %w", err) |
| 234 | + } |
| 235 | + |
| 236 | + for _, node := range displayList.Nodes { |
| 237 | + if node.Class != "display" { |
| 238 | + continue |
| 239 | + } |
| 240 | + |
| 241 | + busInfo := node.BusInfo |
| 242 | + if !strings.HasPrefix(busInfo, "pci@") { |
| 243 | + continue |
| 244 | + } |
| 245 | + |
| 246 | + busID := strings.TrimPrefix(busInfo, "pci@") |
| 247 | + normalizedBusID := normalizeBusID(busID) |
| 248 | + |
| 249 | + if normalizedBusID != p.Slot { |
| 250 | + continue |
| 251 | + } |
| 252 | + |
| 253 | + var vram uint64 = 0 |
| 254 | + for _, resource := range node.Resources.Memory { |
| 255 | + if strings.Contains(resource.Value, "-") { |
| 256 | + parts := strings.Split(resource.Value, "-") |
| 257 | + if len(parts) == 2 { |
| 258 | + start := strings.TrimSpace(parts[0]) |
| 259 | + end := strings.TrimSpace(parts[1]) |
| 260 | + if startVal, err1 := strconv.ParseUint(start, 16, 64); err1 == nil { |
| 261 | + if endVal, err2 := strconv.ParseUint(end, 16, 64); err2 == nil { |
| 262 | + size := (endVal - startVal + 1) / (1024 * 1024) |
| 263 | + if size > vram { |
| 264 | + vram = size |
| 265 | + } |
| 266 | + } |
| 267 | + } |
| 268 | + } |
| 269 | + } |
| 270 | + } |
| 271 | + |
| 272 | + vendor, device, ok := p.GetDevice() |
| 273 | + if !ok { |
| 274 | + return pkg.GPUInfo{}, fmt.Errorf("failed to get vendor and device info") |
| 275 | + } |
| 276 | + |
| 277 | + return pkg.GPUInfo{ |
| 278 | + ID: p.ShortID(), |
| 279 | + Vendor: vendor.Name, |
| 280 | + Device: device.Name, |
| 281 | + Vram: vram, |
| 282 | + }, nil |
| 283 | + } |
| 284 | + |
| 285 | + return pkg.GPUInfo{}, fmt.Errorf("gpu not found in lshw output") |
| 286 | +} |
0 commit comments