Skip to content

Commit 7e5fe55

Browse files
committed
yes: Add zero copy fast-path
1 parent d830e84 commit 7e5fe55

5 files changed

Lines changed: 107 additions & 19 deletions

File tree

.vscode/cspell.dictionaries/workspace.wordlist.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ advapi32-sys
88
aho-corasick
99
backtrace
1010
blake2b_simd
11+
rustix
1112

1213
# * uutils project
1314
uutils

Cargo.lock

Lines changed: 31 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,7 @@ version = "0.7.0"
372372

373373
[workspace.dependencies]
374374
ansi-width = "0.1.0"
375+
aligned-vec = "0.6.4"
375376
bigdecimal = "0.4"
376377
binary-heap-plus = "0.5.0"
377378
bstr = "1.9.1"
@@ -434,6 +435,7 @@ rlimit = "0.11.0"
434435
rstest = "0.26.0"
435436
rustc-hash = "2.1.1"
436437
rust-ini = "0.21.0"
438+
rustix = { version = "1.1.4", features = ["fs", "param", "pipe"] }
437439
same-file = "1.0.6"
438440
self_cell = "1.0.4"
439441
selinux = "=0.6.0"

src/uu/yes/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,12 @@ workspace = true
1919
path = "src/yes.rs"
2020

2121
[dependencies]
22+
aligned-vec = { workspace = true }
2223
clap = { workspace = true }
2324
itertools = { workspace = true }
2425
fluent = { workspace = true }
26+
rustix = { workspace = true }
27+
2528

2629
[target.'cfg(unix)'.dependencies]
2730
uucore = { workspace = true, features = ["pipes", "signals"] }

src/uu/yes/src/yes.rs

Lines changed: 70 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@
33
// For the full copyright and license information, please view the LICENSE
44
// file that was distributed with this source code.
55

6-
// cSpell:ignore strs
6+
// cSpell:ignore strs setpipe
77

8+
use aligned_vec::{AVec, Alignment, ConstAlign};
89
use clap::{Arg, ArgAction, Command, builder::ValueParser};
910
use std::error::Error;
1011
use std::ffi::OsString;
@@ -13,18 +14,35 @@ use uucore::error::{UResult, USimpleError};
1314
use uucore::format_usage;
1415
use uucore::translate;
1516

17+
#[cfg(target_os = "linux")]
18+
use rustix::{
19+
fd::AsFd,
20+
param::page_size,
21+
pipe::{IoSliceRaw, SpliceFlags, fcntl_setpipe_size, vmsplice},
22+
};
23+
24+
// Should be multiple of page size
25+
const ROOTLESS_MAX_PIPE_SIZE: usize = 1024 * 1024;
1626
// it's possible that using a smaller or larger buffer might provide better performance on some
1727
// systems, but honestly this is good enough
1828
const BUF_SIZE: usize = 16 * 1024;
1929

2030
#[uucore::main]
2131
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
2232
let matches = uucore::clap_localization::handle_clap_result(uu_app(), args)?;
23-
24-
let mut buffer = Vec::with_capacity(BUF_SIZE);
33+
// use larger pipe if zero-copy is possible
34+
// todo: deduplicate logic
35+
// increase pipe size if stdout is pipe for zero-copy performance
36+
#[cfg(target_os = "linux")]
37+
let buf_size = fcntl_setpipe_size(io::stdout(), ROOTLESS_MAX_PIPE_SIZE).unwrap_or(BUF_SIZE);
38+
#[cfg(not(target_os = "linux"))]
39+
let buf_size = BUF_SIZE;
40+
41+
let mut buffer: AVec<u8, ConstAlign<ROOTLESS_MAX_PIPE_SIZE>> =
42+
AVec::with_capacity(ROOTLESS_MAX_PIPE_SIZE, buf_size);
2543
#[allow(clippy::unwrap_used, reason = "clap provides 'y' by default")]
2644
let _ = args_into_buffer(&mut buffer, matches.get_many::<OsString>("STRING").unwrap());
27-
prepare_buffer(&mut buffer);
45+
prepare_buffer(&mut buffer, buf_size);
2846

2947
match exec(&buffer) {
3048
Ok(()) => Ok(()),
@@ -55,8 +73,8 @@ pub fn uu_app() -> Command {
5573

5674
/// Copies words from `i` into `buf`, separated by spaces.
5775
#[allow(clippy::unnecessary_wraps, reason = "needed on some platforms")]
58-
fn args_into_buffer<'a>(
59-
buf: &mut Vec<u8>,
76+
fn args_into_buffer<'a, A: Alignment>(
77+
buf: &mut AVec<u8, A>,
6078
i: impl Iterator<Item = &'a OsString>,
6179
) -> Result<(), Box<dyn Error>> {
6280
// On Unix (and wasi), OsStrs are just &[u8]'s underneath...
@@ -91,23 +109,29 @@ fn args_into_buffer<'a>(
91109

92110
/// Assumes buf holds a single output line forged from the command line arguments, copies it
93111
/// repeatedly until the buffer holds as many copies as it can under [`BUF_SIZE`].
94-
fn prepare_buffer(buf: &mut Vec<u8>) {
95-
if buf.len() * 2 > BUF_SIZE {
112+
fn prepare_buffer<A: Alignment>(buf: &mut AVec<u8, A>, buf_size: usize) {
113+
if buf.len() * 2 > buf_size {
96114
return;
97115
}
98116

99117
assert!(!buf.is_empty());
100118

101119
let line_len = buf.len();
102-
let target_size = line_len * (BUF_SIZE / line_len);
120+
let target_size = line_len * (buf_size / line_len);
103121

104122
while buf.len() < target_size {
105-
let to_copy = std::cmp::min(target_size - buf.len(), buf.len());
123+
let current_len = buf.len();
124+
let to_copy = std::cmp::min(target_size - current_len, current_len);
106125
debug_assert_eq!(to_copy % line_len, 0);
107-
buf.extend_from_within(..to_copy);
126+
#[allow(
127+
clippy::unnecessary_to_owned,
128+
reason = "needs useless copy without unsafe"
129+
)]
130+
buf.extend_from_slice(&buf[..to_copy].to_vec());
108131
}
109132
}
110133

134+
#[cfg(not(target_os = "linux"))]
111135
pub fn exec(bytes: &[u8]) -> io::Result<()> {
112136
let stdout = io::stdout();
113137
let mut stdout = stdout.lock();
@@ -117,6 +141,26 @@ pub fn exec(bytes: &[u8]) -> io::Result<()> {
117141
}
118142
}
119143

144+
#[cfg(target_os = "linux")]
145+
pub fn exec(bytes: &[u8]) -> io::Result<()> {
146+
let stdout = io::stdout();
147+
//zero copy fast-path
148+
//large args might not be aligned
149+
//todo: align instead of giving up fast-path
150+
let aligned = bytes.len().is_multiple_of(page_size());
151+
if aligned {
152+
let fd = stdout.as_fd();
153+
let iovec = [IoSliceRaw::from_slice(bytes)];
154+
// we no longer edit bytes until vmsplice
155+
// bytes should not be dropped until vmsplice finishes since we have write fallback
156+
while unsafe { vmsplice(fd, &iovec, SpliceFlags::GIFT) }.is_ok() {}
157+
}
158+
let mut stdout = stdout.lock();
159+
loop {
160+
stdout.write_all(bytes)?;
161+
}
162+
}
163+
120164
#[cfg(test)]
121165
mod tests {
122166
use super::*;
@@ -142,33 +186,40 @@ mod tests {
142186
];
143187

144188
for (line, final_len) in tests {
145-
let mut v = std::iter::repeat_n(b'a', line).collect::<Vec<_>>();
146-
prepare_buffer(&mut v);
189+
let mut v: AVec<u8, ConstAlign<ROOTLESS_MAX_PIPE_SIZE>> =
190+
AVec::from_iter(ROOTLESS_MAX_PIPE_SIZE, std::iter::repeat_n(b'a', line));
191+
prepare_buffer(&mut v, BUF_SIZE);
147192
assert_eq!(v.len(), final_len);
148193
}
149194
}
150195

151196
#[test]
152197
fn test_args_into_buf() {
153198
{
154-
let mut v = Vec::with_capacity(BUF_SIZE);
199+
let mut v: AVec<u8, ConstAlign<ROOTLESS_MAX_PIPE_SIZE>> =
200+
AVec::with_capacity(ROOTLESS_MAX_PIPE_SIZE, BUF_SIZE);
155201
let default_args = ["y".into()];
156202
args_into_buffer(&mut v, default_args.iter()).unwrap();
157-
assert_eq!(String::from_utf8(v).unwrap(), "y\n");
203+
assert_eq!(String::from_utf8(v.to_vec()).unwrap(), "y\n");
158204
}
159205

160206
{
161-
let mut v = Vec::with_capacity(BUF_SIZE);
207+
let mut v: AVec<u8, ConstAlign<ROOTLESS_MAX_PIPE_SIZE>> =
208+
AVec::with_capacity(ROOTLESS_MAX_PIPE_SIZE, BUF_SIZE);
162209
let args = ["foo".into()];
163210
args_into_buffer(&mut v, args.iter()).unwrap();
164-
assert_eq!(String::from_utf8(v).unwrap(), "foo\n");
211+
assert_eq!(String::from_utf8(v.to_vec()).unwrap(), "foo\n");
165212
}
166213

167214
{
168-
let mut v = Vec::with_capacity(BUF_SIZE);
215+
let mut v: AVec<u8, ConstAlign<ROOTLESS_MAX_PIPE_SIZE>> =
216+
AVec::with_capacity(ROOTLESS_MAX_PIPE_SIZE, BUF_SIZE);
169217
let args = ["foo".into(), "bar baz".into(), "qux".into()];
170218
args_into_buffer(&mut v, args.iter()).unwrap();
171-
assert_eq!(String::from_utf8(v).unwrap(), "foo bar baz qux\n");
219+
assert_eq!(
220+
String::from_utf8(v.to_vec()).unwrap(),
221+
"foo bar baz qux\n"
222+
);
172223
}
173224
}
174225
}

0 commit comments

Comments
 (0)