Skip to content

Commit c3469b7

Browse files
committed
add kubevirt patch to fix vfio pci passthrough
Signed-off-by: Woojoong Kim <woojoongkim@microsoft.com>
1 parent c8bb829 commit c3469b7

2 files changed

Lines changed: 373 additions & 1 deletion

File tree

SPECS/kubevirt/kubevirt.spec

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,15 @@
2020
Summary: Container native virtualization
2121
Name: kubevirt
2222
Version: 1.7.1
23-
Release: 2%{?dist}
23+
Release: 3%{?dist}
2424
License: ASL 2.0
2525
Vendor: Microsoft Corporation
2626
Distribution: Azure Linux
2727
Group: System/Management
2828
URL: https://github.com/kubevirt/kubevirt
2929
Source0: https://github.com/kubevirt/kubevirt/archive/refs/tags/v%{version}.tar.gz#/%{name}-%{version}.tar.gz
3030
Patch0: CVE-2025-11065.patch
31+
Patch1: vfio-pci-fix.patch
3132

3233
%global debug_package %{nil}
3334
BuildRequires: swtpm-tools
@@ -265,6 +266,9 @@ install -p -m 0644 cmd/virt-launcher/qemu.conf %{buildroot}%{_datadir}/kube-virt
265266
%{_bindir}/virt-tests
266267

267268
%changelog
269+
* Thu Apr 23 2026 Woojoong Kim <woojoongkim@microsoft.com> - 1.7.1-3
270+
- Add vfio pci passthrough patch
271+
268272
* Wed Mar 25 2026 Aditya Singh <v-aditysing@microsoft.com> - 1.7.1-2
269273
- Bump to rebuild with updated glibc
270274

SPECS/kubevirt/vfio-pci-fix.patch

Lines changed: 368 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,368 @@
1+
From 3c576ab7e84f9c8ea8cc89691df6bc001b5d5dee Mon Sep 17 00:00:00 2001
2+
From: Michael Henriksen <mhenriks@redhat.com>
3+
Date: Fri, 17 Apr 2026 23:29:54 -0400
4+
Subject: [PATCH] Fix VM with PCI hostdev failing to restart after hotplug
5+
block volume
6+
7+
When a hotplug block volume is mounted into the virt-launcher pod,
8+
allowBlockMajorMinor() calls cgroupManager.Set() to add the block
9+
device to the cgroup allowlist. On cgroups v2, this replaces the
10+
entire eBPF device filter program. The v2Manager rebuilds the program
11+
from its in-memory rule cache, which is initialized from
12+
generateDeviceRulesForVMI() and does not include devices provisioned
13+
by device plugins. This wipes access to device-plugin-provided nodes
14+
such as /dev/vfio/* (PCI/MDEV/GPU/SR-IOV passthrough) and
15+
/dev/bus/usb/* (USB passthrough), causing libvirt to fail with
16+
"pci backend driver type 'default' is not supported" when starting
17+
the domain.
18+
19+
Fix by recursively scanning /dev/vfio/ and /dev/bus/usb/ inside the
20+
container and including all discovered device rules in the initial
21+
cache so they are preserved when the eBPF program is rebuilt.
22+
23+
Fixes: https://github.com/kubevirt/kubevirt/issues/17124
24+
25+
Signed-off-by: Michael Henriksen <mhenriks@redhat.com>
26+
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
27+
Signed-off-by: Michael Henriksen <mhenriks@redhat.com>
28+
---
29+
pkg/virt-handler/cgroup/BUILD.bazel | 3 +
30+
pkg/virt-handler/cgroup/cgroup_test.go | 50 ++++++++++++++
31+
pkg/virt-handler/cgroup/util.go | 95 +++++++++++++++++++++++---
32+
tests/storage/hotplug.go | 77 +++++++++++++++++++++
33+
4 files changed, 214 insertions(+), 11 deletions(-)
34+
35+
diff --git a/pkg/virt-handler/cgroup/BUILD.bazel b/pkg/virt-handler/cgroup/BUILD.bazel
36+
index ace69f1d78..4f4ec95714 100644
37+
--- a/pkg/virt-handler/cgroup/BUILD.bazel
38+
+++ b/pkg/virt-handler/cgroup/BUILD.bazel
39+
@@ -40,6 +40,9 @@ go_test(
40+
embed = [":go_default_library"],
41+
race = "on",
42+
deps = [
43+
+ "//pkg/safepath:go_default_library",
44+
+ "//pkg/virt-handler/isolation:go_default_library",
45+
+ "//staging/src/kubevirt.io/api/core/v1:go_default_library",
46+
"//staging/src/kubevirt.io/client-go/testutils:go_default_library",
47+
"//vendor/github.com/onsi/ginkgo/v2:go_default_library",
48+
"//vendor/github.com/onsi/gomega:go_default_library",
49+
diff --git a/pkg/virt-handler/cgroup/cgroup_test.go b/pkg/virt-handler/cgroup/cgroup_test.go
50+
index 50b5198e2a..53450e2a06 100644
51+
--- a/pkg/virt-handler/cgroup/cgroup_test.go
52+
+++ b/pkg/virt-handler/cgroup/cgroup_test.go
53+
@@ -20,12 +20,20 @@
54+
package cgroup
55+
56+
import (
57+
+ "os"
58+
+ "path/filepath"
59+
+
60+
. "github.com/onsi/ginkgo/v2"
61+
. "github.com/onsi/gomega"
62+
runc_cgroups "github.com/opencontainers/runc/libcontainer/cgroups"
63+
runc_configs "github.com/opencontainers/runc/libcontainer/configs"
64+
"github.com/opencontainers/runc/libcontainer/devices"
65+
"go.uber.org/mock/gomock"
66+
+
67+
+ v1 "kubevirt.io/api/core/v1"
68+
+
69+
+ "kubevirt.io/kubevirt/pkg/safepath"
70+
+ "kubevirt.io/kubevirt/pkg/virt-handler/isolation"
71+
)
72+
73+
var _ = Describe("cgroup manager", func() {
74+
@@ -195,3 +203,45 @@ var _ = Describe("cgroup manager", func() {
75+
),
76+
)
77+
})
78+
+
79+
+var _ = Describe("generateDeviceRulesForVMI", func() {
80+
+ var (
81+
+ ctrl *gomock.Controller
82+
+ tempDir string
83+
+ )
84+
+
85+
+ BeforeEach(func() {
86+
+ ctrl = gomock.NewController(GinkgoT())
87+
+ tempDir = GinkgoT().TempDir()
88+
+ Expect(os.MkdirAll(filepath.Join(tempDir, "dev"), 0755)).To(Succeed())
89+
+ })
90+
+
91+
+ newMockIsolationWithMountRoot := func() isolation.IsolationResult {
92+
+ mountRoot, err := safepath.NewPathNoFollow(tempDir)
93+
+ Expect(err).ToNot(HaveOccurred())
94+
+
95+
+ mockIso := isolation.NewMockIsolationResult(ctrl)
96+
+ mockIso.EXPECT().MountRoot().Return(mountRoot, nil)
97+
+ return mockIso
98+
+ }
99+
+
100+
+ It("should not fail when /dev/vfio does not exist", func() {
101+
+ rules, err := generateDeviceRulesForVMI(&v1.VirtualMachineInstance{}, newMockIsolationWithMountRoot(), "")
102+
+ Expect(err).ToNot(HaveOccurred())
103+
+ Expect(rules).To(BeEmpty())
104+
+ })
105+
+
106+
+ It("should not fail when /dev/vfio exists but is empty", func() {
107+
+ Expect(os.MkdirAll(filepath.Join(tempDir, "dev", "vfio"), 0755)).To(Succeed())
108+
+ rules, err := generateDeviceRulesForVMI(&v1.VirtualMachineInstance{}, newMockIsolationWithMountRoot(), "")
109+
+ Expect(err).ToNot(HaveOccurred())
110+
+ Expect(rules).To(BeEmpty())
111+
+ })
112+
+
113+
+ It("should not fail when /dev/bus/usb exists but is empty", func() {
114+
+ Expect(os.MkdirAll(filepath.Join(tempDir, "dev", "bus", "usb"), 0755)).To(Succeed())
115+
+ rules, err := generateDeviceRulesForVMI(&v1.VirtualMachineInstance{}, newMockIsolationWithMountRoot(), "")
116+
+ Expect(err).ToNot(HaveOccurred())
117+
+ Expect(rules).To(BeEmpty())
118+
+ })
119+
+})
120+
diff --git a/pkg/virt-handler/cgroup/util.go b/pkg/virt-handler/cgroup/util.go
121+
index 892113c83d..be115bad14 100644
122+
--- a/pkg/virt-handler/cgroup/util.go
123+
+++ b/pkg/virt-handler/cgroup/util.go
124+
@@ -59,6 +59,9 @@ const (
125+
V2 CgroupVersion = "v2"
126+
127+
loggingVerbosity = 2
128+
+
129+
+ rwmPermissions = "rwm"
130+
+ rwPermissions = "rw"
131+
)
132+
133+
var (
134+
@@ -126,6 +129,18 @@ func getSourceBlockToFsMigratedVolumes(vmi *v1.VirtualMachineInstance, host stri
135+
return vols
136+
}
137+
138+
+func getDevicePermissionsFromCgroups() devices.Permissions {
139+
+ if cgroups.IsCgroup2UnifiedMode() {
140+
+ return rwmPermissions
141+
+ } else {
142+
+ return rwPermissions
143+
+ }
144+
+}
145+
+
146+
+func getDeviceRwmPermissions() devices.Permissions {
147+
+ return rwmPermissions
148+
+}
149+
+
150+
// This builds up the known persistent block devices allow list for a VMI (as in, hotplugged volumes are handled separately)
151+
// This will be maintained and extended as new devices likely have to end up on this list as well
152+
// For example - https://kubernetes.io/docs/concepts/scheduling-eviction/dynamic-resource-allocation/
153+
@@ -159,7 +174,7 @@ func generateDeviceRulesForVMI(vmi *v1.VirtualMachineInstance, isolationRes isol
154+
}
155+
return nil, fmt.Errorf("failed to resolve path for volume %s: %v", volume.Name, err)
156+
}
157+
- if deviceRule, err := newAllowedDeviceRule(path); err != nil {
158+
+ if deviceRule, err := newAllowedDeviceRule(path, getDeviceRwmPermissions()); err != nil {
159+
return nil, fmt.Errorf("failed to create device rule for %s: %v", path, err)
160+
} else if deviceRule != nil {
161+
log.Log.V(loggingVerbosity).Infof("device rule for volume %s: %v", volume.Name, deviceRule)
162+
@@ -171,7 +186,7 @@ func generateDeviceRulesForVMI(vmi *v1.VirtualMachineInstance, isolationRes isol
163+
if err != nil {
164+
return nil, err
165+
}
166+
- if deviceRule, err := newAllowedDeviceRule(path); err != nil {
167+
+ if deviceRule, err := newAllowedDeviceRule(path, getDeviceRwmPermissions()); err != nil {
168+
return nil, fmt.Errorf("failed to create device rule for %s: %v", path, err)
169+
} else if deviceRule != nil {
170+
log.Log.V(loggingVerbosity).Infof("device rule for volume rng: %v", deviceRule)
171+
@@ -183,7 +198,7 @@ func generateDeviceRulesForVMI(vmi *v1.VirtualMachineInstance, isolationRes isol
172+
if err != nil {
173+
return nil, err
174+
}
175+
- if deviceRule, err := newAllowedDeviceRule(path); err != nil {
176+
+ if deviceRule, err := newAllowedDeviceRule(path, getDeviceRwmPermissions()); err != nil {
177+
return nil, fmt.Errorf("failed to create device rule for %s: %v", path, err)
178+
} else if deviceRule != nil {
179+
log.Log.V(loggingVerbosity).Infof("device rule for volume vsock: %v", deviceRule)
180+
@@ -191,10 +206,73 @@ func generateDeviceRulesForVMI(vmi *v1.VirtualMachineInstance, isolationRes isol
181+
}
182+
}
183+
184+
+ // Device-plugin-provisioned devices (VFIO, USB) must be in the cgroup
185+
+ // rule cache so they survive eBPF program rebuilds during hotplug.
186+
+ for _, devDir := range []string{
187+
+ filepath.Join("dev", "vfio"),
188+
+ filepath.Join("dev", "bus", "usb"),
189+
+ } {
190+
+ rules, err := discoverDeviceRulesInDir(mountRoot, devDir)
191+
+ if err != nil {
192+
+ return nil, fmt.Errorf("failed to discover device rules in %s: %v", devDir, err)
193+
+ }
194+
+ vmiDeviceRules = append(vmiDeviceRules, rules...)
195+
+ }
196+
+
197+
return vmiDeviceRules, nil
198+
}
199+
200+
-func newAllowedDeviceRule(devicePath *safepath.Path) (*devices.Rule, error) {
201+
+// discoverDeviceRulesInDir recursively scans a directory under the
202+
+// container's filesystem and creates allow rules for all device nodes
203+
+// found. These devices are provisioned by device plugins or the container
204+
+// runtime and must be preserved in the v2 cgroup manager's rule cache so
205+
+// they are not lost when the eBPF device filter is rebuilt by subsequent
206+
+// Set() calls (e.g. during hotplug volume mounting).
207+
+func discoverDeviceRulesInDir(mountRoot *safepath.Path, relPath string) ([]*devices.Rule, error) {
208+
+ dirPath, err := safepath.JoinNoFollow(mountRoot, relPath)
209+
+ if err != nil {
210+
+ if errors.Is(err, os.ErrNotExist) {
211+
+ return nil, nil
212+
+ }
213+
+ return nil, err
214+
+ }
215+
+
216+
+ var entries []os.DirEntry
217+
+ err = dirPath.ExecuteNoFollow(func(path string) (err error) {
218+
+ entries, err = os.ReadDir(path)
219+
+ return err
220+
+ })
221+
+ if err != nil {
222+
+ return nil, err
223+
+ }
224+
+
225+
+ var rules []*devices.Rule
226+
+ for _, entry := range entries {
227+
+ if entry.IsDir() {
228+
+ subRules, err := discoverDeviceRulesInDir(mountRoot, filepath.Join(relPath, entry.Name()))
229+
+ if err != nil {
230+
+ return nil, err
231+
+ }
232+
+ rules = append(rules, subRules...)
233+
+ continue
234+
+ }
235+
+ devPath, err := safepath.JoinNoFollow(dirPath, entry.Name())
236+
+ if err != nil {
237+
+ return nil, err
238+
+ }
239+
+ rule, err := newAllowedDeviceRule(devPath, getDeviceRwmPermissions())
240+
+ if err != nil {
241+
+ return nil, fmt.Errorf("failed to create device rule for %s/%s: %v", relPath, entry.Name(), err)
242+
+ }
243+
+ if rule != nil {
244+
+ log.Log.V(loggingVerbosity).Infof("device rule for %s/%s: %v", relPath, entry.Name(), rule)
245+
+ rules = append(rules, rule)
246+
+ }
247+
+ }
248+
+ return rules, nil
249+
+}
250+
+
251+
+func newAllowedDeviceRule(devicePath *safepath.Path, devicePermissions devices.Permissions) (*devices.Rule, error) {
252+
fileInfo, err := safepath.StatAtNoFollow(devicePath)
253+
if err != nil {
254+
return nil, err
255+
@@ -211,7 +289,7 @@ func newAllowedDeviceRule(devicePath *safepath.Path) (*devices.Rule, error) {
256+
Type: deviceType,
257+
Major: int64(unix.Major(stat.Rdev)),
258+
Minor: int64(unix.Minor(stat.Rdev)),
259+
- Permissions: "rwm",
260+
+ Permissions: devicePermissions,
261+
Allow: true,
262+
}, nil
263+
}
264+
@@ -224,12 +302,7 @@ func GenerateDefaultDeviceRules() []*devices.Rule {
265+
266+
const toAllow = true
267+
268+
- var permissions devices.Permissions
269+
- if cgroups.IsCgroup2UnifiedMode() {
270+
- permissions = "rwm"
271+
- } else {
272+
- permissions = "rw"
273+
- }
274+
+ permissions := getDevicePermissionsFromCgroups()
275+
276+
defaultRules := []*devices.Rule{
277+
{ // /dev/ptmx (PTY master multiplex)
278+
diff --git a/tests/storage/hotplug.go b/tests/storage/hotplug.go
279+
index 2a40430c29..cc3567386f 100644
280+
--- a/tests/storage/hotplug.go
281+
+++ b/tests/storage/hotplug.go
282+
@@ -2275,6 +2275,83 @@ var _ = Describe(SIG("Hotplug", func() {
283+
verifyVolumeNolongerAccessible(vmi, targets[0])
284+
})
285+
})
286+
+
287+
+ // Regression test for https://github.com/kubevirt/kubevirt/issues/17124
288+
+ Context("with PCI hostdev", Serial, func() {
289+
+ const deviceName = "example.org/soundcard"
290+
+
291+
+ BeforeEach(func() {
292+
+ kvconfig.EnableFeatureGate(featuregate.HostDevicesGate)
293+
+
294+
+ kv := libkubevirt.GetCurrentKv(virtClient)
295+
+ config := kv.Spec.Configuration
296+
+ config.PermittedHostDevices = &v1.PermittedHostDevices{
297+
+ PciHostDevices: []v1.PciHostDevice{
298+
+ {
299+
+ PCIVendorSelector: "8086:2668",
300+
+ ResourceName: deviceName,
301+
+ },
302+
+ },
303+
+ }
304+
+ kvconfig.UpdateKubeVirtConfigValueAndWait(config)
305+
+ })
306+
+
307+
+ AfterEach(func() {
308+
+ kv := libkubevirt.GetCurrentKv(virtClient)
309+
+ config := kv.Spec.Configuration
310+
+ config.PermittedHostDevices = &v1.PermittedHostDevices{}
311+
+ kvconfig.UpdateKubeVirtConfigValueAndWait(config)
312+
+ kvconfig.DisableFeatureGate(featuregate.HostDevicesGate)
313+
+ })
314+
+
315+
+ It("should restart a VM after hotplugging a block volume", func() {
316+
+ sc, exists := libstorage.GetRWOBlockStorageClass()
317+
+ if !exists {
318+
+ Skip("no RWO block storage class available")
319+
+ }
320+
+
321+
+ vmiSpec := libvmifact.NewAlpineWithTestTooling()
322+
+ vmiSpec.Spec.Domain.Devices.HostDevices = []v1.HostDevice{
323+
+ {Name: "sound0", DeviceName: deviceName},
324+
+ }
325+
+ vm, err := virtClient.VirtualMachine(testsuite.GetTestNamespace(nil)).Create(
326+
+ context.Background(),
327+
+ libvmi.NewVirtualMachine(vmiSpec, libvmi.WithRunStrategy(v1.RunStrategyAlways)),
328+
+ metav1.CreateOptions{},
329+
+ )
330+
+ Expect(err).ToNot(HaveOccurred())
331+
+ Eventually(matcher.ThisVM(vm)).WithTimeout(300 * time.Second).WithPolling(time.Second).Should(matcher.BeReady())
332+
+
333+
+ vmi, err := virtClient.VirtualMachineInstance(vm.Namespace).Get(context.Background(), vm.Name, metav1.GetOptions{})
334+
+ Expect(err).ToNot(HaveOccurred())
335+
+ libwait.WaitForSuccessfulVMIStart(vmi, libwait.WithTimeout(240))
336+
+
337+
+ dvBuilder := libdv.NewDataVolume(
338+
+ libdv.WithBlankImageSource(),
339+
+ libdv.WithStorage(
340+
+ libdv.StorageWithStorageClass(sc),
341+
+ libdv.StorageWithVolumeSize(cd.BlankVolumeSize),
342+
+ libdv.StorageWithVolumeMode(k8sv1.PersistentVolumeBlock),
343+
+ ),
344+
+ )
345+
+ dv, err := virtClient.CdiClient().CdiV1beta1().DataVolumes(testsuite.GetTestNamespace(nil)).Create(
346+
+ context.Background(), dvBuilder, metav1.CreateOptions{})
347+
+ Expect(err).ToNot(HaveOccurred())
348+
+ libstorage.EventuallyDV(dv, 240, Or(matcher.HaveSucceeded(), matcher.WaitForFirstConsumer()))
349+
+
350+
+ By("Hotplugging a block volume to the running VM")
351+
+ addVolumeVMWithSource(vm.Name, vm.Namespace, getAddVolumeOptions("hotplug-vol", v1.DiskBusSCSI, &v1.HotplugVolumeSource{
352+
+ DataVolume: &v1.DataVolumeSource{Name: dv.Name},
353+
+ }, false, false, ""))
354+
+ verifyVolumeStatus(vmi, v1.VolumeReady, "", "hotplug-vol")
355+
+
356+
+ By("Restarting the VM")
357+
+ vm = libvmops.StopVirtualMachine(vm)
358+
+ err = virtClient.VirtualMachine(vm.Namespace).Start(context.Background(), vm.Name, &v1.StartOptions{})
359+
+ Expect(err).ToNot(HaveOccurred())
360+
+ Eventually(matcher.ThisVM(vm), 300*time.Second, time.Second).Should(matcher.BeReady())
361+
+ })
362+
+ })
363+
}))
364+
365+
func verifyVolumeAndDiskVMAdded(virtClient kubecli.KubevirtClient, vm *v1.VirtualMachine, volumeNames ...string) {
366+
--
367+
2.34.1
368+

0 commit comments

Comments
 (0)