Skip to content

Commit fb04ebe

Browse files
committed
feat(block): advertise VIRTIO_BLK_F_WRITE_ZEROES for non-read-only devices
Set VIRTIO_BLK_F_WRITE_ZEROES in avail_features for any non-read-only block device, alongside VIRTIO_BLK_F_DISCARD. Populate the max_write_zeroes_sectors, max_write_zeroes_seg, and write_zeroes_may_unmap config fields in both VirtioBlock::new() and the persist::restore() path so the values match what the guest sees on a fresh boot vs after a snapshot restore. write_zeroes_may_unmap=1 lets the guest set the UNMAP flag on individual segments, which we then translate to fallocate's PUNCH_HOLE mode (UNMAP=0 uses ZERO_RANGE). Update the test_virtio_features and test_virtio_read_config expectations to account for the new feature bit and config fields. Signed-off-by: Nikita Kalyazin <nikita.kalyazin@e2b.dev>
1 parent 2c033e4 commit fb04ebe

2 files changed

Lines changed: 37 additions & 6 deletions

File tree

src/vmm/src/devices/virtio/block/virtio/device.rs

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ use crate::devices::virtio::block::CacheType;
2727
use crate::devices::virtio::block::virtio::metrics::{BlockDeviceMetrics, BlockMetricsPerDevice};
2828
use crate::devices::virtio::device::{ActiveState, DeviceState, VirtioDevice};
2929
use crate::devices::virtio::generated::virtio_blk::{
30-
VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_RO, VIRTIO_BLK_ID_BYTES,
30+
VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_WRITE_ZEROES,
31+
VIRTIO_BLK_ID_BYTES,
3132
};
3233
use crate::devices::virtio::generated::virtio_config::VIRTIO_F_VERSION_1;
3334
use crate::devices::virtio::generated::virtio_ids::VIRTIO_ID_BLOCK;
@@ -365,6 +366,7 @@ impl VirtioBlock {
365366
avail_features |= 1u64 << VIRTIO_BLK_F_RO;
366367
} else {
367368
avail_features |= 1u64 << VIRTIO_BLK_F_DISCARD;
369+
avail_features |= 1u64 << VIRTIO_BLK_F_WRITE_ZEROES;
368370
}
369371

370372
let queue_evts = [EventFd::new(libc::EFD_NONBLOCK).map_err(VirtioBlockError::EventFd)?];
@@ -396,6 +398,25 @@ impl VirtioBlock {
396398
// accepts any byte offset/length and the kernel rounds internally
397399
// to FS-block granularity.
398400
discard_sector_alignment: if !config.is_read_only { 1 } else { 0 },
401+
max_write_zeroes_sectors: if !config.is_read_only {
402+
discard_sectors
403+
} else {
404+
0
405+
},
406+
// max_write_zeroes_seg = 1: each VIRTIO_BLK_T_WRITE_ZEROES
407+
// request carries exactly one (sector, num_sectors, flags) tuple.
408+
// Raising this would let the guest batch disjoint zero ranges
409+
// (e.g. mkfs zeroing several inode tables) into a single
410+
// multi-segment request, saving virtqueue round-trips. We keep
411+
// it at 1 in this iteration because the async io_uring engine
412+
// currently produces exactly one SQE per virtio request;
413+
// multi-segment would require submitting N SQEs and only
414+
// completing the virtio request after all N CQEs return (or
415+
// serialising them). max_write_zeroes_sectors is set to the
416+
// full disk so contiguous ranges are never split, regardless of
417+
// this limit.
418+
max_write_zeroes_seg: if !config.is_read_only { 1 } else { 0 },
419+
write_zeroes_may_unmap: if !config.is_read_only { 1 } else { 0 },
399420
..Default::default()
400421
};
401422

@@ -669,10 +690,12 @@ impl VirtioBlock {
669690
pub fn update_disk_image(&mut self, disk_image_path: String) -> Result<(), VirtioBlockError> {
670691
self.disk.update(disk_image_path, self.read_only)?;
671692
self.config_space.capacity = self.disk.nsectors.to_le(); // virtio_block_config_space();
672-
// Discard config fields derive from the new disk's sector count, so
673-
// refresh them alongside `capacity`.
693+
// Discard/write-zeroes config fields derive from the new disk's
694+
// sector count, so refresh them alongside `capacity`.
674695
let discard_sectors = u32::try_from(self.disk.nsectors).unwrap_or(u32::MAX);
675696
self.config_space.max_discard_sectors = if !self.read_only { discard_sectors } else { 0 };
697+
self.config_space.max_write_zeroes_sectors =
698+
if !self.read_only { discard_sectors } else { 0 };
676699

677700
// Kick the driver to pick up the changes. (But only if the device is already activated).
678701
if self.is_activated() {
@@ -927,10 +950,12 @@ mod tests {
927950

928951
assert_eq!(block.device_type(), VIRTIO_ID_BLOCK);
929952

930-
// default_block is non-read-only, so VIRTIO_BLK_F_DISCARD is advertised.
953+
// default_block is non-read-only, so VIRTIO_BLK_F_DISCARD and
954+
// VIRTIO_BLK_F_WRITE_ZEROES are advertised.
931955
let features: u64 = (1u64 << VIRTIO_F_VERSION_1)
932956
| (1u64 << VIRTIO_RING_F_EVENT_IDX)
933-
| (1u64 << VIRTIO_BLK_F_DISCARD);
957+
| (1u64 << VIRTIO_BLK_F_DISCARD)
958+
| (1u64 << VIRTIO_BLK_F_WRITE_ZEROES);
934959

935960
assert_eq!(
936961
block.avail_features_by_page(0),
@@ -957,12 +982,15 @@ mod tests {
957982
let mut actual_config_space = ConfigSpace::default();
958983
block.read_config(0, actual_config_space.as_mut_slice());
959984
// The block's backing file size is 0x1000, so there are 8 (4096/512) sectors.
960-
// default_block is non-read-only, so discard fields are populated.
985+
// default_block is non-read-only, so discard and write-zeroes fields are populated.
961986
let expected_config_space = ConfigSpace {
962987
capacity: 8,
963988
max_discard_sectors: 8,
964989
max_discard_seg: 1,
965990
discard_sector_alignment: 1,
991+
max_write_zeroes_sectors: 8,
992+
max_write_zeroes_seg: 1,
993+
write_zeroes_may_unmap: 1,
966994
..Default::default()
967995
};
968996
assert_eq!(actual_config_space, expected_config_space);

src/vmm/src/devices/virtio/block/virtio/persist.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,9 @@ impl Persist<'_> for VirtioBlock {
122122
// accepts any byte offset/length and the kernel rounds internally
123123
// to FS-block granularity.
124124
discard_sector_alignment: if !is_read_only { 1 } else { 0 },
125+
max_write_zeroes_sectors: if !is_read_only { discard_sectors } else { 0 },
126+
max_write_zeroes_seg: if !is_read_only { 1 } else { 0 },
127+
write_zeroes_may_unmap: if !is_read_only { 1 } else { 0 },
125128
..Default::default()
126129
};
127130

0 commit comments

Comments
 (0)