Skip to content

Commit 01bc551

Browse files
runtime: Add annotation-based block device mounting
Add support for mounting block devices (volumeDevices) as filesystems inside the guest VM via annotation. This allows CSI block mode PVCs to be automatically mounted by kata-agent, eliminating the need for privileged containers. Annotation format: io.katacontainers.volume.block-mounts: '{"<devicePath>": {"mount": "<path>", "fstype": "<fs>"}}' Example: io.katacontainers.volume.block-mounts: '{"/dev/xvda": {"mount": "/data", "fstype": "ext4"}}' Supported options: - mount: destination path in container (required) - fstype: filesystem type - ext4, xfs, btrfs (default: ext4) - options: mount options array (default: ["rw"]) - fsGroup: optional gid for filesystem group ownership Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent b3e0ea1 commit 01bc551

4 files changed

Lines changed: 587 additions & 2 deletions

File tree

src/runtime/virtcontainers/kata_agent.go

Lines changed: 255 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,75 @@ const (
166166
grpcSetPolicyRequest = "grpc.SetPolicyRequest"
167167
)
168168

169+
// BlockMountConfig specifies how a block device should be mounted by kata-agent
170+
type BlockMountConfig struct {
171+
// Mount is the destination path inside the container
172+
Mount string `json:"mount"`
173+
// Fstype is the filesystem type (ext4, xfs, etc.)
174+
Fstype string `json:"fstype,omitempty"`
175+
// Options are mount options (ro, rw, noatime, etc.)
176+
Options []string `json:"options,omitempty"`
177+
// FsGroup sets ownership of the mounted filesystem
178+
FsGroup *int64 `json:"fsGroup,omitempty"`
179+
}
180+
181+
// BlockMountAnnotation maps device paths to their mount configurations
182+
// Key: device path (e.g., "/dev/vdb")
183+
// Value: mount configuration
184+
type BlockMountAnnotation map[string]BlockMountConfig
185+
186+
// allowedBlockMountFsTypes defines the filesystem types allowed for block mount annotation
187+
var allowedBlockMountFsTypes = map[string]bool{
188+
"ext4": true, "ext3": true, "ext2": true,
189+
"xfs": true, "btrfs": true, "vfat": true,
190+
"": true, // empty is allowed, defaults to ext4 at mount time
191+
}
192+
193+
// parseBlockMountAnnotation parses the block mount annotation from container/sandbox annotations
194+
func parseBlockMountAnnotation(annotations map[string]string) (BlockMountAnnotation, error) {
195+
raw, ok := annotations[vcAnnotations.BlockDeviceMounts]
196+
if !ok || raw == "" {
197+
return nil, nil
198+
}
199+
200+
var config BlockMountAnnotation
201+
if err := json.Unmarshal([]byte(raw), &config); err != nil {
202+
return nil, fmt.Errorf("failed to parse %s annotation: %w", vcAnnotations.BlockDeviceMounts, err)
203+
}
204+
205+
// Validate the configuration
206+
for devicePath, mountConfig := range config {
207+
if err := validateBlockMountConfig(devicePath, mountConfig); err != nil {
208+
return nil, err
209+
}
210+
}
211+
212+
return config, nil
213+
}
214+
215+
// validateBlockMountConfig validates a single block mount configuration
216+
func validateBlockMountConfig(devicePath string, config BlockMountConfig) error {
217+
// Validate device path format
218+
if !strings.HasPrefix(devicePath, "/dev/") {
219+
return fmt.Errorf("invalid device path %q: must start with /dev/", devicePath)
220+
}
221+
222+
// Validate mount destination
223+
if config.Mount == "" {
224+
return fmt.Errorf("mount destination required for device %s", devicePath)
225+
}
226+
if !filepath.IsAbs(config.Mount) {
227+
return fmt.Errorf("mount destination %q must be absolute path", config.Mount)
228+
}
229+
230+
// Validate filesystem type
231+
if !allowedBlockMountFsTypes[config.Fstype] {
232+
return fmt.Errorf("unsupported filesystem type %q for device %s", config.Fstype, devicePath)
233+
}
234+
235+
return nil
236+
}
237+
169238
// newKataAgent returns an agent from an agent type.
170239
func newKataAgent() agent {
171240
return &kataAgent{}
@@ -1246,6 +1315,20 @@ func (k *kataAgent) appendVfioDevice(dev ContainerDevice, device api.Device, c *
12461315
}
12471316

12481317
func (k *kataAgent) appendDevices(deviceList []*grpc.Device, c *Container) []*grpc.Device {
1318+
// Parse block mount annotation to know which devices to skip
1319+
// (they will be mounted by the agent instead of being passed as raw devices)
1320+
var annotations map[string]string
1321+
if c.config != nil {
1322+
annotations = c.config.Annotations
1323+
}
1324+
if annotations == nil && c.sandbox != nil && c.sandbox.config != nil {
1325+
annotations = c.sandbox.config.Annotations
1326+
}
1327+
blockMounts, err := parseBlockMountAnnotation(annotations)
1328+
if err != nil {
1329+
k.Logger().WithError(err).Warn("Failed to parse block mount annotation, devices will not be filtered")
1330+
}
1331+
12491332
for _, dev := range c.devices {
12501333
device := c.sandbox.devManager.GetDeviceByID(dev.ID)
12511334
if device == nil {
@@ -1257,6 +1340,14 @@ func (k *kataAgent) appendDevices(deviceList []*grpc.Device, c *Container) []*gr
12571340
continue
12581341
}
12591342

1343+
// Skip devices that will be mounted via annotation
1344+
if blockMounts != nil {
1345+
if _, shouldMount := blockMounts[dev.ContainerPath]; shouldMount {
1346+
k.Logger().WithField("device", dev.ContainerPath).Debug("Skipping device in appendDevices (will be mounted via annotation)")
1347+
continue
1348+
}
1349+
}
1350+
12601351
var kataDevice *grpc.Device
12611352

12621353
switch device.DeviceType() {
@@ -1452,6 +1543,16 @@ func (k *kataAgent) createContainer(ctx context.Context, sandbox *Sandbox, c *Co
14521543

14531544
ctrStorages = append(ctrStorages, volumeStorages...)
14541545

1546+
// Handle annotation-based block device mounts (for volumeDevices that should be mounted)
1547+
annotationStorages, err := k.createAnnotationBlockStorages(c, ociSpec)
1548+
if err != nil {
1549+
return nil, fmt.Errorf("failed to process block mount annotations: %w", err)
1550+
}
1551+
if len(annotationStorages) > 0 {
1552+
ctrStorages = append(ctrStorages, annotationStorages...)
1553+
k.Logger().WithField("count", len(annotationStorages)).Info("Added annotation-based block mount storages")
1554+
}
1555+
14551556
// Layer storage objects are prepended to the list so that they come _before_ the
14561557
// rootfs because the rootfs depends on them (it's an overlay of the layers).
14571558
ctrStorages = append(layerStorages, ctrStorages...)
@@ -1935,6 +2036,160 @@ func (k *kataAgent) handleBlkOCIMounts(c *Container, spec *specs.Spec) ([]*grpc.
19352036
return layerStorages, volumeStorages, nil
19362037
}
19372038

2039+
// createAnnotationBlockStorages creates Storage objects for devices specified in the
2040+
// block mount annotation. These devices are already hotplugged via the normal
2041+
// volumeDevices path; this function adds mount instructions for the agent.
2042+
func (k *kataAgent) createAnnotationBlockStorages(c *Container, spec *specs.Spec) ([]*grpc.Storage, error) {
2043+
// Get annotations from container config (falls back to sandbox config)
2044+
var annotations map[string]string
2045+
if c.config != nil {
2046+
annotations = c.config.Annotations
2047+
}
2048+
if annotations == nil && c.sandbox != nil && c.sandbox.config != nil {
2049+
annotations = c.sandbox.config.Annotations
2050+
}
2051+
2052+
// Skip sandbox containers
2053+
ctrType := ContainerType(annotations[vcAnnotations.ContainerTypeKey])
2054+
if ctrType.IsCriSandbox() {
2055+
return nil, nil
2056+
}
2057+
2058+
blockMounts, err := parseBlockMountAnnotation(annotations)
2059+
if err != nil {
2060+
return nil, err
2061+
}
2062+
if blockMounts == nil {
2063+
return nil, nil
2064+
}
2065+
2066+
k.Logger().WithField("count", len(blockMounts)).Debug("Processing block mount annotations")
2067+
2068+
var storages []*grpc.Storage
2069+
devicesToRemove := make(map[string]bool)
2070+
2071+
for devicePath, mountConfig := range blockMounts {
2072+
2073+
// Find the corresponding device in c.devices to get the BlockDrive info
2074+
var blockDrive *config.BlockDrive
2075+
for _, dev := range c.devices {
2076+
if dev.ContainerPath == devicePath {
2077+
device := c.sandbox.devManager.GetDeviceByID(dev.ID)
2078+
if device != nil && device.DeviceType() == config.DeviceBlock {
2079+
blockDrive = device.GetDeviceInfo().(*config.BlockDrive)
2080+
break
2081+
}
2082+
}
2083+
}
2084+
2085+
// Determine the source and driver type for the Storage object
2086+
var source string
2087+
var driverType string
2088+
2089+
if blockDrive != nil {
2090+
// Use the block drive information from the hotplugged device
2091+
switch c.sandbox.config.HypervisorConfig.BlockDeviceDriver {
2092+
case config.VirtioMmio:
2093+
driverType = kataMmioBlkDevType
2094+
source = blockDrive.VirtPath
2095+
case config.VirtioBlock:
2096+
driverType = kataBlkDevType
2097+
source = blockDrive.PCIPath.String()
2098+
case config.VirtioBlockCCW:
2099+
driverType = kataBlkCCWDevType
2100+
source = blockDrive.DevNo
2101+
case config.VirtioSCSI:
2102+
driverType = kataSCSIDevType
2103+
source = blockDrive.SCSIAddr
2104+
case config.Nvdimm:
2105+
driverType = kataNvdimmDevType
2106+
source = fmt.Sprintf("/dev/pmem%s", blockDrive.NvdimmID)
2107+
default:
2108+
driverType = kataBlkDevType
2109+
source = blockDrive.VirtPath
2110+
}
2111+
} else {
2112+
return nil, fmt.Errorf("block device %q not found in container devices - ensure volumeDevices.devicePath matches annotation key", devicePath)
2113+
}
2114+
2115+
// Default filesystem type
2116+
fstype := mountConfig.Fstype
2117+
if fstype == "" {
2118+
fstype = "ext4"
2119+
}
2120+
2121+
// Default mount options
2122+
options := mountConfig.Options
2123+
if len(options) == 0 {
2124+
options = []string{"rw"}
2125+
}
2126+
2127+
// Create unique mount point in guest for this device
2128+
// Same pattern as handleBlkOCIMounts
2129+
filename := b64.URLEncoding.EncodeToString([]byte(source))
2130+
guestMountPoint := filepath.Join(kataGuestSandboxStorageDir(), filename)
2131+
2132+
storage := &grpc.Storage{
2133+
Driver: driverType,
2134+
Source: source,
2135+
Fstype: fstype,
2136+
Options: options,
2137+
MountPoint: guestMountPoint,
2138+
}
2139+
2140+
// Set fsGroup if specified
2141+
if mountConfig.FsGroup != nil {
2142+
storage.FsGroup = &grpc.FSGroup{
2143+
GroupId: uint32(*mountConfig.FsGroup),
2144+
GroupChangePolicy: pbTypes.FSGroupChangePolicy_Always,
2145+
}
2146+
}
2147+
2148+
storages = append(storages, storage)
2149+
2150+
// Mark device for removal from OCI spec
2151+
devicesToRemove[devicePath] = true
2152+
2153+
// Add mount entry to OCI spec
2154+
// The source is the guest mount point where agent will mount the device
2155+
spec.Mounts = append(spec.Mounts, specs.Mount{
2156+
Destination: mountConfig.Mount,
2157+
Source: guestMountPoint,
2158+
Type: "bind",
2159+
Options: []string{"bind"},
2160+
})
2161+
2162+
k.Logger().WithFields(logrus.Fields{
2163+
"device": devicePath,
2164+
"mount": mountConfig.Mount,
2165+
}).Debug("Created storage for annotation-based block mount")
2166+
}
2167+
2168+
// Remove devices from OCI spec that are now being mounted
2169+
if len(devicesToRemove) > 0 {
2170+
k.removeDevicesFromOCISpec(spec, devicesToRemove)
2171+
}
2172+
2173+
return storages, nil
2174+
}
2175+
2176+
// removeDevicesFromOCISpec removes specified devices from the OCI spec's Linux.Devices
2177+
func (k *kataAgent) removeDevicesFromOCISpec(spec *specs.Spec, devicesToRemove map[string]bool) {
2178+
if spec.Linux == nil || len(spec.Linux.Devices) == 0 {
2179+
return
2180+
}
2181+
2182+
filtered := make([]specs.LinuxDevice, 0, len(spec.Linux.Devices))
2183+
for _, dev := range spec.Linux.Devices {
2184+
if !devicesToRemove[dev.Path] {
2185+
filtered = append(filtered, dev)
2186+
} else {
2187+
k.Logger().WithField("device", dev.Path).Debug("Removing device from OCI spec (will be mounted instead)")
2188+
}
2189+
}
2190+
spec.Linux.Devices = filtered
2191+
}
2192+
19382193
// handlePidNamespace checks if Pid namespace for a container needs to be shared with its sandbox
19392194
// pid namespace. This function also modifies the grpc spec to remove the pid namespace
19402195
// from the list of namespaces passed to the agent.

0 commit comments

Comments
 (0)