Skip to content

Commit fecebff

Browse files
committed
fix(dd): optimize O_DIRECT buffer alignment to reduce syscall overhead
Implement page-aligned buffer allocation and optimize O_DIRECT flag handling to match GNU dd behavior. Key changes: - Add allocate_aligned_buffer() for page-aligned memory allocation - Update buffer allocation to use aligned buffers - Modify handle_o_direct_write() to only remove O_DIRECT for partial blocks - Add Output::write_with_o_direct_handling() for proper O_DIRECT handling - Add comprehensive unit and integration tests Fixes #6078
1 parent 95b266f commit fecebff

2 files changed

Lines changed: 331 additions & 22 deletions

File tree

src/uu/dd/src/dd.rs

Lines changed: 185 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,38 @@ use uucore::{format_usage, show_error};
6565

6666
const BUF_INIT_BYTE: u8 = 0xDD;
6767

68+
/// Helper function to allocate a page-aligned buffer on Linux/Android.
69+
///
70+
/// O_DIRECT requires buffers to be aligned to page boundaries (typically 4096 bytes).
71+
/// This function allocates a `Vec<u8>` with proper alignment to support O_DIRECT
72+
/// without triggering EINVAL errors.
73+
#[cfg(any(target_os = "linux", target_os = "android"))]
74+
fn allocate_aligned_buffer(size: usize) -> Vec<u8> {
75+
let alignment = unsafe { libc::sysconf(libc::_SC_PAGESIZE) as usize };
76+
// cspell:disable-next-line
77+
let ptr = unsafe { libc::memalign(alignment, size) as *mut u8 };
78+
79+
assert!(
80+
!ptr.is_null(),
81+
"Failed to allocate aligned buffer of size {size}"
82+
);
83+
84+
// Initialize with BUF_INIT_BYTE
85+
unsafe {
86+
std::ptr::write_bytes(ptr, BUF_INIT_BYTE, size);
87+
}
88+
89+
// Convert raw pointer to Vec<u8>
90+
// SAFETY: We just allocated this memory with memalign, so it's valid
91+
unsafe { Vec::from_raw_parts(ptr, 0, size) }
92+
}
93+
94+
/// Fallback for non-Linux platforms - use regular Vec allocation
95+
#[cfg(not(any(target_os = "linux", target_os = "android")))]
96+
fn allocate_aligned_buffer(size: usize) -> Vec<u8> {
97+
vec![BUF_INIT_BYTE; size]
98+
}
99+
68100
/// Final settings after parsing
69101
#[derive(Default)]
70102
struct Settings {
@@ -688,8 +720,17 @@ fn is_sparse(buf: &[u8]) -> bool {
688720

689721
/// Handle O_DIRECT write errors by temporarily removing the flag and retrying.
690722
/// This follows GNU dd behavior for partial block writes with O_DIRECT.
723+
///
724+
/// With proper buffer alignment (page-aligned), O_DIRECT should only fail for
725+
/// partial blocks (size < output_blocksize). This function only removes O_DIRECT
726+
/// when necessary, matching GNU dd behavior and minimizing system call overhead.
691727
#[cfg(any(target_os = "linux", target_os = "android"))]
692-
fn handle_o_direct_write(f: &mut File, buf: &[u8], original_error: io::Error) -> io::Result<usize> {
728+
fn handle_o_direct_write(
729+
f: &mut File,
730+
buf: &[u8],
731+
output_blocksize: usize,
732+
original_error: io::Error,
733+
) -> io::Result<usize> {
693734
use nix::fcntl::{FcntlArg, OFlag, fcntl};
694735

695736
// Get current flags using nix
@@ -698,8 +739,10 @@ fn handle_o_direct_write(f: &mut File, buf: &[u8], original_error: io::Error) ->
698739
Err(_) => return Err(original_error),
699740
};
700741

701-
// If O_DIRECT is set, try removing it temporarily
702-
if oflags.contains(OFlag::O_DIRECT) {
742+
// If O_DIRECT is set, only remove it for partial blocks (size < output_blocksize)
743+
// This matches GNU dd behavior and minimizes system call overhead.
744+
// With proper buffer alignment, full blocks should not fail with EINVAL.
745+
if oflags.contains(OFlag::O_DIRECT) && buf.len() < output_blocksize {
703746
let flags_without_direct = oflags - OFlag::O_DIRECT;
704747

705748
// Remove O_DIRECT flag using nix
@@ -710,7 +753,7 @@ fn handle_o_direct_write(f: &mut File, buf: &[u8], original_error: io::Error) ->
710753
// Retry the write without O_DIRECT
711754
let write_result = f.write(buf);
712755

713-
// Restore O_DIRECT flag using nix (GNU doesn't restore it, but we'll be safer)
756+
// Restore O_DIRECT flag using nix
714757
// Log any restoration errors without failing the operation
715758
if let Err(os_err) = fcntl(&mut *f, FcntlArg::F_SETFL(oflags)) {
716759
// Just log the error, don't fail the whole operation
@@ -719,16 +762,18 @@ fn handle_o_direct_write(f: &mut File, buf: &[u8], original_error: io::Error) ->
719762

720763
write_result
721764
} else {
722-
// O_DIRECT wasn't set, return original error
765+
// O_DIRECT wasn't set or this is a full block, return original error
723766
Err(original_error)
724767
}
725768
}
726769

727770
/// Stub for non-Linux platforms - just return the original error.
728771
#[cfg(not(any(target_os = "linux", target_os = "android")))]
772+
#[allow(dead_code)]
729773
fn handle_o_direct_write(
730774
_f: &mut File,
731775
_buf: &[u8],
776+
_output_blocksize: usize,
732777
original_error: io::Error,
733778
) -> io::Result<usize> {
734779
Err(original_error)
@@ -745,21 +790,7 @@ impl Write for Dest {
745790
f.seek(SeekFrom::Current(seek_amt))?;
746791
Ok(buf.len())
747792
}
748-
Self::File(f, _) => {
749-
// Try the write first
750-
match f.write(buf) {
751-
Ok(len) => Ok(len),
752-
Err(e)
753-
if e.kind() == io::ErrorKind::InvalidInput
754-
&& e.raw_os_error() == Some(libc::EINVAL) =>
755-
{
756-
// This might be an O_DIRECT alignment issue.
757-
// Try removing O_DIRECT temporarily and retry.
758-
handle_o_direct_write(f, buf, e)
759-
}
760-
Err(e) => Err(e),
761-
}
762-
}
793+
Self::File(f, _) => f.write(buf),
763794
Self::Stdout(stdout) => stdout.write(buf),
764795
#[cfg(unix)]
765796
Self::Fifo(f) => f.write(buf),
@@ -922,6 +953,36 @@ impl<'a> Output<'a> {
922953
}
923954
}
924955

956+
/// Write to the destination with O_DIRECT awareness.
957+
///
958+
/// This method handles O_DIRECT write errors by temporarily removing the flag
959+
/// for partial blocks, matching GNU dd behavior.
960+
#[cfg(any(target_os = "linux", target_os = "android"))]
961+
fn write_with_o_direct_handling(&mut self, buf: &[u8]) -> io::Result<usize> {
962+
match self.dst.write(buf) {
963+
Ok(len) => Ok(len),
964+
Err(e)
965+
if e.kind() == io::ErrorKind::InvalidInput
966+
&& e.raw_os_error() == Some(libc::EINVAL) =>
967+
{
968+
// This might be an O_DIRECT alignment issue.
969+
// Try removing O_DIRECT temporarily and retry (only for partial blocks).
970+
if let Dest::File(f, _) = &mut self.dst {
971+
handle_o_direct_write(f, buf, self.settings.obs, e)
972+
} else {
973+
Err(e)
974+
}
975+
}
976+
Err(e) => Err(e),
977+
}
978+
}
979+
980+
/// Fallback for non-Linux platforms - use regular write
981+
#[cfg(not(any(target_os = "linux", target_os = "android")))]
982+
fn write_with_o_direct_handling(&mut self, buf: &[u8]) -> io::Result<usize> {
983+
self.dst.write(buf)
984+
}
985+
925986
/// writes a block of data. optionally retries when first try didn't complete
926987
///
927988
/// this is needed by gnu-test: tests/dd/stats.s
@@ -932,7 +993,7 @@ impl<'a> Output<'a> {
932993
let full_len = chunk.len();
933994
let mut base_idx = 0;
934995
loop {
935-
match self.dst.write(&chunk[base_idx..]) {
996+
match self.write_with_o_direct_handling(&chunk[base_idx..]) {
936997
Ok(wlen) => {
937998
base_idx += wlen;
938999
// take iflags.fullblock as oflags shall not have this option
@@ -1146,7 +1207,11 @@ fn dd_copy(mut i: Input, o: Output) -> io::Result<()> {
11461207

11471208
// Create a common buffer with a capacity of the block size.
11481209
// This is the max size needed.
1149-
let mut buf = vec![BUF_INIT_BYTE; bsize];
1210+
//
1211+
// On Linux/Android, use an aligned buffer for O_DIRECT support.
1212+
// O_DIRECT requires buffers to be aligned to page boundaries (typically 4096 bytes).
1213+
// This prevents EINVAL errors when writing with oflag=direct.
1214+
let mut buf = allocate_aligned_buffer(bsize);
11501215

11511216
// Spawn a timer thread to provide a scheduled signal indicating when we
11521217
// should send an update of our progress to the reporting thread.
@@ -1596,4 +1661,102 @@ mod tests {
15961661
Output::new_file(Path::new(settings.outfile.as_ref().unwrap()), &settings).is_err()
15971662
);
15981663
}
1664+
1665+
// ===== O_DIRECT Buffer Alignment Tests =====
1666+
1667+
#[test]
1668+
#[cfg(any(target_os = "linux", target_os = "android"))]
1669+
fn test_aligned_buffer_allocation() {
1670+
// Test that allocate_aligned_buffer creates page-aligned buffers
1671+
let buf = super::allocate_aligned_buffer(4096);
1672+
1673+
// Verify buffer is created
1674+
assert_eq!(buf.capacity(), 4096);
1675+
1676+
// Verify buffer pointer is page-aligned
1677+
let ptr = buf.as_ptr() as usize;
1678+
let page_size = unsafe { libc::sysconf(libc::_SC_PAGESIZE) as usize };
1679+
assert_eq!(ptr % page_size, 0, "Buffer should be page-aligned");
1680+
}
1681+
1682+
#[test]
1683+
#[cfg(any(target_os = "linux", target_os = "android"))]
1684+
fn test_aligned_buffer_various_sizes() {
1685+
// Test alignment for various buffer sizes
1686+
let sizes = vec![512, 1024, 2048, 4096, 8192, 16384];
1687+
let page_size = unsafe { libc::sysconf(libc::_SC_PAGESIZE) as usize };
1688+
1689+
for size in sizes {
1690+
let buf = super::allocate_aligned_buffer(size);
1691+
let ptr = buf.as_ptr() as usize;
1692+
assert_eq!(
1693+
ptr % page_size,
1694+
0,
1695+
"Buffer of size {size} should be page-aligned"
1696+
);
1697+
}
1698+
}
1699+
1700+
#[test]
1701+
#[cfg(any(target_os = "linux", target_os = "android"))]
1702+
fn test_aligned_buffer_initialization() {
1703+
// Test that buffer is initialized with BUF_INIT_BYTE
1704+
let buf = super::allocate_aligned_buffer(1024);
1705+
1706+
// Check that buffer is initialized (not all zeros)
1707+
let init_byte = super::BUF_INIT_BYTE;
1708+
for &byte in &buf {
1709+
assert_eq!(
1710+
byte, init_byte,
1711+
"Buffer should be initialized with BUF_INIT_BYTE"
1712+
);
1713+
}
1714+
}
1715+
1716+
#[test]
1717+
#[cfg(not(any(target_os = "linux", target_os = "android")))]
1718+
fn test_aligned_buffer_fallback() {
1719+
// Test that non-Linux platforms use regular Vec allocation
1720+
let buf = super::allocate_aligned_buffer(4096);
1721+
1722+
// Should still create a valid buffer
1723+
assert_eq!(buf.capacity(), 4096);
1724+
assert_eq!(buf.len(), 4096);
1725+
}
1726+
1727+
#[test]
1728+
fn test_calc_bsize_alignment() {
1729+
// Test that calculated buffer size is reasonable for O_DIRECT
1730+
let ibs = 4096;
1731+
let obs = 4096;
1732+
let bsize = calc_bsize(ibs, obs);
1733+
1734+
// Should be a multiple of both ibs and obs
1735+
assert_eq!(bsize % ibs, 0);
1736+
assert_eq!(bsize % obs, 0);
1737+
1738+
// Should be at least as large as both
1739+
assert!(bsize >= ibs);
1740+
assert!(bsize >= obs);
1741+
}
1742+
1743+
#[test]
1744+
fn test_calc_bsize_lcm() {
1745+
// Test LCM calculation for various block sizes
1746+
let test_cases = vec![
1747+
(512, 512, 512),
1748+
(512, 1024, 1024),
1749+
(1024, 2048, 2048),
1750+
(4096, 4096, 4096),
1751+
(512, 4096, 4096),
1752+
];
1753+
1754+
for (ibs, obs, expected) in test_cases {
1755+
let bsize = calc_bsize(ibs, obs);
1756+
assert_eq!(
1757+
bsize, expected,
1758+
"calc_bsize({ibs}, {obs}) should be {expected}"
1759+
);
1760+
}
1761+
}
15991762
}

0 commit comments

Comments
 (0)