Skip to content

Commit 5fe6f7a

Browse files
committed
Add support for MutableCSINodeAllocatableCount
The CSI list's all PCIe devices that are not of type VIRTIO_BLOCK_DEVICE and subtracts them from the theoretically maximum, so kubernetes can report a correct dynamic max volume count that can be attached for each node. Signed-off-by: Niclas Schad <niclas.schad@stackit.cloud>
1 parent bddf6e4 commit 5fe6f7a

7 files changed

Lines changed: 118 additions & 9 deletions

File tree

pkg/csi/blockstorage/controllerserver.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,10 @@ func (cs *controllerServer) ControllerPublishVolume(ctx context.Context, req *cs
370370

371371
_, err = cloud.AttachVolume(ctx, instanceID, volumeID)
372372
if err != nil {
373+
// Trigger's an immediate `NodeGetInfo` RPC call when MutableCSINodeAllocatableCount is enabled
374+
if stackiterrors.IsTooManyDevicesError(err) {
375+
return nil, status.Errorf(codes.ResourceExhausted, "[ControllerPublishVolume] Node can't accept any more volumes %v. All PCIe lanes are exhausted!", err)
376+
}
373377
klog.Errorf("Failed to AttachVolume: %v", err)
374378
return nil, status.Errorf(codes.Internal, "[ControllerPublishVolume] Attach Volume failed with error %v", err)
375379
}

pkg/csi/blockstorage/nodeserver.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -308,8 +308,16 @@ func (ns *nodeServer) NodeGetInfo(ctx context.Context, _ *csi.NodeGetInfoRequest
308308
}
309309

310310
maxVolumesPerNode := DetermineMaxVolumesByFlavor(flavor)
311-
// Subtract 1 for root disk and another for configDrive/spare
312-
maxVolumesPerNode -= 2
311+
312+
// Subtract already mounted Volumes
313+
emptyPCIeRootPorts, err := mount.CountNonVirtioBlockDevices()
314+
if err != nil {
315+
klog.Errorf("[NodeGetInfo] unable to retrieve PCIe root ports %v", err)
316+
emptyPCIeRootPorts = 0
317+
}
318+
319+
maxVolumesPerNode -= emptyPCIeRootPorts
320+
klog.V(4).Infof("Determined %d PCIe ports occupied by non virtio block devices", emptyPCIeRootPorts)
313321
klog.V(4).Infof("Determined node to support %d volumes", maxVolumesPerNode)
314322

315323
nodeInfo := &csi.NodeGetInfoResponse{

pkg/csi/blockstorage/utils.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ func DetermineMaxVolumesByFlavor(flavor string) int64 {
9797
return 159
9898
default:
9999
// All other flavors can mount 28 volumes
100-
return 25
100+
return 28
101101
}
102102
}
103103

pkg/csi/blockstorage/utils_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,14 @@ var _ = Describe("Util Test", func() {
1212
maxVolumes := DetermineMaxVolumesByFlavor(flavor)
1313
Expect(maxVolumes).To(Equal(int64(expectedMaxVolumes)))
1414
},
15-
Entry("Intel 3rd Gen", "c3i.2", 25),
16-
Entry("Intel 2rd Gen", "c2i.2", 25),
17-
Entry("Intel 1st Gen", "c1.2", 25),
18-
Entry("AMD 1st Gen without overprovisioning", "s1a.8d", 25),
15+
Entry("Intel 3rd Gen", "c3i.2", 28),
16+
Entry("Intel 2rd Gen", "c2i.2", 28),
17+
Entry("Intel 1st Gen", "c1.2", 28),
18+
Entry("AMD 1st Gen without overprovisioning", "s1a.8d", 28),
1919
Entry("AMD 2nd Gen without overprovisioning", "s2a.8d", 159),
2020
Entry("Nvidia GPU", "n2.14d.g1", 10),
2121
Entry("Nvidia GPU", "n2.56d.g4", 10),
22-
Entry("ARM Gen1Link without CPU-overprovisioning ARM Gen1", "g1r.4d", 25),
22+
Entry("ARM Gen1Link without CPU-overprovisioning ARM Gen1", "g1r.4d", 28),
2323
)
2424
})
2525
})

pkg/csi/util/mount/mount_darwin.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,8 @@ func newDeviceStats(statfs *unix.Statfs_t) *DeviceStats {
1717
UsedInodes: int64(statfs.Files) - int64(statfs.Ffree),
1818
}
1919
}
20+
21+
func CountNonVirtioBlockDevices() (int64, error) {
22+
// not implemented
23+
return 0, nil
24+
}

pkg/csi/util/mount/mount_linux.go

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,15 @@ package mount
44

55
import "golang.org/x/sys/unix"
66

7+
var (
8+
pciAddressRegex = regexp.MustCompile(`^[0-9a-fA-F]{4}:[0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-9a-fA-F]$`)
9+
)
10+
11+
const (
12+
RedhatVendor = "0x1af4"
13+
VirtioBlockDevice = "0x1042"
14+
)
15+
716
func newDeviceStats(statfs *unix.Statfs_t) *DeviceStats {
817
return &DeviceStats{
918
Block: false,
@@ -17,3 +26,74 @@ func newDeviceStats(statfs *unix.Statfs_t) *DeviceStats {
1726
UsedInodes: int64(statfs.Files) - int64(statfs.Ffree),
1827
}
1928
}
29+
30+
// CountNonVirtioBlockDevices returns the number of PCIe Root ports who
31+
// are currently occupied by anything else than an VIRTIO 1.0 Block Device
32+
// returns zero when something went wrong
33+
func CountNonVirtioBlockDevices() (int64, error) {
34+
const pciPath = "/sys/bus/pci/devices"
35+
36+
// Get all PCI devices
37+
devices, err := os.ReadDir(pciPath)
38+
if err != nil {
39+
return 0, fmt.Errorf("failed to read PCI bus: %w", err)
40+
}
41+
42+
pcieSlotsOccupiedByNonBlockDevice := 0
43+
44+
for _, dev := range devices {
45+
devPath := filepath.Join(pciPath, dev.Name())
46+
47+
// 1. Identify if it's a Root Port / Bridge
48+
// We check the 'class' file. PCI Bridge class code starts with 0x0604
49+
classBuf, err := os.ReadFile(filepath.Join(devPath, "class"))
50+
if err != nil {
51+
klog.Errorf("failed to read PCI device class %s : %v", devPath, err)
52+
continue
53+
}
54+
class := strings.TrimSpace(string(classBuf))
55+
56+
// Class 0x060400 is a PCI-to-PCI bridge (standard for Root Ports)
57+
if strings.HasPrefix(class, "0x0604") {
58+
// 2. Check if the port has downstream devices
59+
// If the bridge has children, they appear as subdirectories
60+
// matching the PCI address format (e.g., 0000:01:00.0)
61+
files, err2 := os.ReadDir(devPath)
62+
if err2 != nil {
63+
klog.Errorf("failed to read dir %s : %v", devPath, err2)
64+
}
65+
for _, file := range files {
66+
// Ignore PCI bus directories such as pci001 pci002 and pci010
67+
// Devices must follow <domain:bus:device.function> format
68+
if pciAddressRegex.MatchString(file.Name()) {
69+
isNonBlockDevice := IsNonBlockDevice(devPath, file)
70+
if isNonBlockDevice {
71+
pcieSlotsOccupiedByNonBlockDevice++
72+
}
73+
break
74+
}
75+
}
76+
} else {
77+
klog.V(4).Infof("skipping class %s: path: %s", class, devPath)
78+
}
79+
}
80+
81+
return int64(pcieSlotsOccupiedByNonBlockDevice), nil
82+
}
83+
84+
func IsNonBlockDevice(devPath string, file os.DirEntry) bool {
85+
var isNonBlockDevice bool
86+
pciDevicePath := filepath.Join(devPath, file.Name())
87+
vendorBuf, err := os.ReadFile(filepath.Join(pciDevicePath, "vendor"))
88+
if err != nil {
89+
klog.Errorf("failed to read PCI device vendor %s : %v", pciDevicePath, err)
90+
}
91+
deviceBuf, err := os.ReadFile(filepath.Join(pciDevicePath, "device"))
92+
if err != nil {
93+
klog.Errorf("failed to read PCI device file %s : %v", pciDevicePath, err)
94+
}
95+
if strings.TrimSpace(string(vendorBuf)) == RedhatVendor && strings.TrimSpace(string(deviceBuf)) != VirtioBlockDevice {
96+
isNonBlockDevice = true
97+
}
98+
return isNonBlockDevice
99+
}

pkg/stackit/stackiterrors/errors.go

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@ import (
44
"errors"
55
"fmt"
66
"net/http"
7+
"strings"
78

89
oapiError "github.com/stackitcloud/stackit-sdk-go/core/oapierror"
9-
wait "github.com/stackitcloud/stackit-sdk-go/services/iaas/v2api/wait"
10+
"github.com/stackitcloud/stackit-sdk-go/services/iaas/v2api/wait"
1011
)
1112

1213
var ErrNotFound = errors.New("failed to find object")
@@ -20,6 +21,17 @@ func IsNotFound(err error) bool {
2021
return oAPIError.StatusCode == http.StatusNotFound
2122
}
2223

24+
func IsTooManyDevicesError(err error) bool {
25+
var oAPIError *oapiError.GenericOpenAPIError
26+
if ok := errors.As(err, &oAPIError); !ok {
27+
return false
28+
}
29+
30+
// TODO: Improve this if possible
31+
return oAPIError.StatusCode == http.StatusForbidden &&
32+
strings.Contains(oAPIError.ErrorMessage, "maximum allowed number of disk devices")
33+
}
34+
2335
func IgnoreNotFound(err error) error {
2436
if IsNotFound(err) {
2537
return nil

0 commit comments

Comments
 (0)