add size limit option - unlimited size performs the same, and backpressure can be modeled with size limit

kvc0 · kvc0 · commit cfbbb6d44edc · 2026-05-12T12:32:42.000-07:00
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/benchmarks/benches/channel.rs b/benchmarks/benches/channel.rs
@@ -120,6 +120,54 @@ fn mpsc_or_queues(criterion: &mut Criterion) {
                 elapsed
             });
     });
+
+    group.bench_function("spillway_batch_limited", |bencher| {
+        bencher
+            .to_async(runtime(threads))
+            .iter_custom(async |size| {
+                let (send, mut receive) =
+                    spillway::channel_with_capacity_and_concurrency(65536, threads);
+                let receiver = tokio::spawn(async move {
+                    let mut i = 0;
+                    while let Some(v) = receive.next().await {
+                        i += v;
+                    }
+                    i
+                });
+
+                for _ in 0..threads {
+                    let send = send.clone();
+                    tokio::spawn(async move {
+                        let per_thread = (size as usize / threads).max(1);
+                        let batches = per_thread.div_ceil(32);
+                        for _ in 0..batches {
+                            let mut pending = 0_usize..32;
+                            loop {
+                                match send.send_many(pending) {
+                                    Ok(()) => break,
+                                    Err(spillway::Error::Full(returned)) => {
+                                        pending = returned;
+                                        tokio::task::yield_now().await;
+                                    }
+                                    Err(spillway::Error::Closed(_)) => return,
+                                }
+                            }
+                        }
+                    });
+                }
+                drop(send);
+
+                let start = Instant::now();
+                let n = receiver.await;
+                let elapsed = start.elapsed();
+                log::info!("ok {n:?}");
+                let per_thread = (size as usize / threads).max(1);
+                let batches = per_thread.div_ceil(32);
+                let expected: usize = (0..32).sum::<usize>() * batches * threads;
+                assert_eq!(n.expect("must join successfully"), expected);
+                elapsed
+            });
+    });
 }
 
 fn runtime(threads: usize) -> tokio::runtime::Runtime {
diff --git a/spillway/Cargo.toml b/spillway/Cargo.toml
@@ -13,6 +13,7 @@ categories.workspace = true
 futures                 = { workspace = true }
 log                     = { workspace = true }
 rand                    = { workspace = true }
+thiserror               = { workspace = true }
 
 [dev-dependencies]
 tokio-test              = { workspace = true }
diff --git a/spillway/src/error.rs b/spillway/src/error.rs
@@ -0,0 +1,13 @@
+/// Errors returned by Spillway senders.
+///
+/// The unsent value(s) are returned in the error variant so the caller can
+/// reuse or drop them as appropriate.
+#[derive(Debug, thiserror::Error)]
+pub enum Error<T> {
+    /// The channel is at or above its soft capacity. Nothing was enqueued.
+    #[error("spillway channel is full")]
+    Full(T),
+    /// The Receiver has been dropped. The channel will never accept more values.
+    #[error("spillway channel is closed")]
+    Closed(T),
+}
diff --git a/spillway/src/lib.rs b/spillway/src/lib.rs
@@ -1,16 +1,18 @@
 #![deny(missing_docs)]
 #![doc = include_str!("../README.md")]
 
+mod error;
 mod receiver;
 mod sender;
 mod shared;
 
 use std::sync::Arc;
 
+pub use error::Error;
 pub use receiver::Receiver;
 pub use sender::Sender;
 
-/// Get a new spillway channel with a default concurrency level.
+/// Get a new spillway channel with a default concurrency level and no capacity limit.
 pub fn channel<T>() -> (Sender<T>, Receiver<T>) {
     // const PARALLELISM: std::sync::LazyLock<usize> = std::sync::LazyLock::new(|| {
     //     std::thread::available_parallelism()
@@ -21,14 +23,30 @@ pub fn channel<T>() -> (Sender<T>, Receiver<T>) {
     channel_with_concurrency(8)
 }
 
-/// Get a new spillway channel with the given concurrency level.
+/// Get a new spillway channel with the given concurrency level and no capacity limit.
 ///
 /// Use this when you need lots of parallelism, or when you know how many Senders
 /// you will have. Higher numbers reduce contention, but increase the cost of
 /// parking the Receiver when idle. Thread count is a good starting point for
 /// concurrency.
 pub fn channel_with_concurrency<T>(concurrency: usize) -> (Sender<T>, Receiver<T>) {
-    let shared = Arc::new(shared::Shared::new(concurrency));
+    channel_with_capacity_and_concurrency(u64::MAX, concurrency)
+}
+
+/// Get a new spillway channel with a soft capacity limit and the given concurrency level.
+///
+/// `capacity` is an upper bound on the number of in-flight values. Sends are
+/// rejected with [`Error::Full`] when the channel is at or above this limit.
+///
+/// Mind your batch sizes when using a capacity limit. If you have a capacity of 10 and you
+/// send 11 values in a batch, the entire batch will be rejected.
+///
+/// Pass `u64::MAX` to disable the limit (this matches [`channel_with_concurrency`]).
+pub fn channel_with_capacity_and_concurrency<T>(
+    capacity: u64,
+    concurrency: usize,
+) -> (Sender<T>, Receiver<T>) {
+    let shared = Arc::new(shared::Shared::new(concurrency, capacity));
     let sender = Sender::new(shared.clone());
     let receiver = Receiver::new(shared);
 
diff --git a/spillway/src/receiver.rs b/spillway/src/receiver.rs
@@ -1,4 +1,7 @@
-use std::{collections::VecDeque, sync::Arc};
+use std::{
+    collections::VecDeque,
+    sync::{atomic::AtomicU64, Arc},
+};
 
 use crate::shared::Shared;
 
@@ -43,7 +46,10 @@ impl<T> Receiver<T> {
     /// * `Poll::Ready(None)` when all senders have been dropped and the Receiver is caught up. The Receiver will never receive more messages and you should drop it.
     pub fn poll_next(&mut self, context: &mut std::task::Context) -> std::task::Poll<Option<T>> {
         match self.buffer.pop_front() {
-            Some(next) => std::task::Poll::Ready(Some(next)),
+            Some(next) => {
+                self.decrement_size(1);
+                std::task::Poll::Ready(Some(next))
+            }
             None => {
                 let dirty_index = match self.shared.race_find_dirty(self.cursor) {
                     Some(dirty_index) => {
@@ -89,11 +95,20 @@ impl<T> Receiver<T> {
                     .buffer
                     .pop_front()
                     .expect("chutes are only dirty when they have contents");
+                self.decrement_size(1);
                 std::task::Poll::Ready(Some(next))
             }
         }
     }
 
+    fn decrement_size(&self, count: usize) {
+        if self.shared.capacity != u64::MAX {
+            self.shared
+                .channel_size
+                .fetch_sub(count as u64, std::sync::atomic::Ordering::Relaxed);
+        }
+    }
+
     /// The next value for the Receiver.
     ///
     /// * Some(T) is the next value.
@@ -116,11 +131,29 @@ impl<T> Receiver<T> {
                     // we got one, but let's see if we can get more while we're here.
                     // for convenience, we'll put the item back and drain the whole batch.
                     self.buffer.push_front(next);
+                    // poll_next decremented channel_size by 1; put that one back. BatchDrain will
+                    // decrement once on Drop for however many items the caller consumes.
+                    if self.shared.capacity != u64::MAX {
+                        self.shared
+                            .channel_size
+                            .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
+                    }
+                    let initial_len = self.buffer.len();
+                    let channel_size = if self.shared.capacity == u64::MAX {
+                        None
+                    } else {
+                        // SAFETY: same lifetime widening as `buffer` below — this borrow is into
+                        // `self.shared.channel_size`, which lives at least as long as 'a (it lives
+                        // as long as the Arc held by `self`).
+                        Some(unsafe { &*(&self.shared.channel_size as *const AtomicU64) })
+                    };
                     // SAFETY: we have exclusive access to self for 'a. self itself is not referenced out from the fnmut, but the buffer is, which is
                     //         causing some borrow checker consternation. But since the buffer mutable borrow cannot outlive 'a, and &mut self can't
                     //         outlive 'a either, the borrow of buffer should be sound for 'a.
                     std::task::Poll::Ready(Some(BatchDrain {
                         buffer: unsafe { &mut *(&mut self.buffer as *mut _) },
+                        channel_size,
+                        initial_len,
                     }))
                 }
                 std::task::Poll::Ready(None) => std::task::Poll::Ready(None),
@@ -133,6 +166,8 @@ impl<T> Receiver<T> {
 
 struct BatchDrain<'a, T> {
     buffer: &'a mut VecDeque<T>,
+    channel_size: Option<&'a AtomicU64>,
+    initial_len: usize,
 }
 impl<T> Iterator for BatchDrain<'_, T> {
     type Item = T;
@@ -153,6 +188,17 @@ impl<T> ExactSizeIterator for BatchDrain<'_, T> {
     }
 }
 
+impl<T> Drop for BatchDrain<'_, T> {
+    fn drop(&mut self) {
+        if let Some(channel_size) = self.channel_size {
+            let consumed = self.initial_len - self.buffer.len();
+            if consumed != 0 {
+                channel_size.fetch_sub(consumed as u64, std::sync::atomic::Ordering::Relaxed);
+            }
+        }
+    }
+}
+
 #[cfg(test)]
 mod test {
     use std::task::{Context, Poll, Waker};
diff --git a/spillway/src/sender.rs b/spillway/src/sender.rs
@@ -1,6 +1,6 @@
 use std::sync::Arc;
 
-use crate::shared::Shared;
+use crate::{shared::Shared, Error};
 
 /// The sending half of a Spillway channel.
 ///
@@ -56,7 +56,10 @@ impl<T> Sender<T> {
     /// However, you might receive 1, 4, 5, 2, 3, 6 or any other interleaving. But
     /// 1 will always appear before 2, and 2 before 3; and 4 will always appear before 5,
     /// and 5 before 6.
-    pub fn send(&self, value: T) -> Result<(), T> {
+    ///
+    /// Returns [`Error::Full`] if the channel has reached its capacity limit, or
+    /// [`Error::Closed`] if the Receiver has been dropped.
+    pub fn send(&self, value: T) -> Result<(), Error<T>> {
         self.shared.send(self.chute, value)
     }
 
@@ -77,7 +80,15 @@ impl<T> Sender<T> {
     /// | 1, 2, 3, 4, 5, 6 |
     /// | 4, 5, 1, 2, 3, 6 |
     /// | 4, 5, 6, 1, 2, 3 |
-    pub fn send_many<I: IntoIterator<Item = T>>(&self, values: I) -> Result<(), I> {
+    ///
+    /// Returns [`Error::Full`] if the batch would push the channel past its capacity
+    /// limit (the entire batch is rejected; partial enqueues never happen), or
+    /// [`Error::Closed`] if the Receiver has been dropped.
+    pub fn send_many<I>(&self, values: I) -> Result<(), Error<I::IntoIter>>
+    where
+        I: IntoIterator<Item = T>,
+        I::IntoIter: ExactSizeIterator,
+    {
         self.shared.send_many(self.chute, values)
     }
 }
diff --git a/spillway/src/shared.rs b/spillway/src/shared.rs
@@ -1,11 +1,13 @@
 use std::{
     collections::VecDeque,
     sync::{
-        atomic::{AtomicBool, AtomicUsize},
+        atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering},
         Mutex,
     },
 };
 
+use crate::Error;
+
 pub struct Chute<T> {
     queue: Mutex<VecDeque<T>>,
     clean: AtomicBool,
@@ -60,6 +62,8 @@ pub struct Shared<T> {
     pub(crate) waker: futures::task::AtomicWaker,
     pub(crate) senders: AtomicUsize,
     pub(crate) chute_clock: AtomicUsize,
+    pub(crate) channel_size: AtomicU64,
+    pub(crate) capacity: u64,
     dead: AtomicBool,
 }
 
@@ -70,57 +74,77 @@ impl<T> std::fmt::Debug for Shared<T> {
             .field("waker", &self.waker)
             .field("senders", &self.senders)
             .field("chute_clock", &self.chute_clock)
+            .field("channel_size", &self.channel_size)
+            .field("capacity", &self.capacity)
             .field("dead", &self.dead)
             .finish()
     }
 }
 
 impl<T> Shared<T> {
-    pub fn new(concurrency: usize) -> Self {
+    pub fn new(concurrency: usize, capacity: u64) -> Self {
         Self {
             chutes: (0..concurrency)
                 .map(|_| Default::default())
                 .collect::<Vec<_>>(),
             waker: futures::task::AtomicWaker::new(),
             senders: AtomicUsize::new(0),
             chute_clock: AtomicUsize::new(0),
+            channel_size: AtomicU64::new(0),
+            capacity,
             dead: AtomicBool::new(false),
         }
     }
 
     pub fn add_sender(&self) {
-        self.senders
-            .fetch_add(1, std::sync::atomic::Ordering::Release);
+        self.senders.fetch_add(1, Ordering::Release);
     }
 
     pub fn drop_sender(&self) -> usize {
-        self.senders
-            .fetch_sub(1, std::sync::atomic::Ordering::AcqRel)
+        self.senders.fetch_sub(1, Ordering::AcqRel)
     }
 
     pub fn choose_chute(&self) -> usize {
-        self.chute_clock
-            .fetch_add(1, std::sync::atomic::Ordering::Relaxed)
-            % self.chutes.len()
+        self.chute_clock.fetch_add(1, Ordering::Relaxed) % self.chutes.len()
     }
 
     pub fn wake(&self) {
         self.waker.wake();
     }
 
-    pub fn send(&self, chute: usize, value: T) -> Result<(), T> {
-        if self.dead.load(std::sync::atomic::Ordering::Relaxed) {
-            return Err(value);
+    pub fn send(&self, chute: usize, value: T) -> Result<(), Error<T>> {
+        if self.dead.load(Ordering::Relaxed) {
+            return Err(Error::Closed(value));
+        }
+        if self.capacity != u64::MAX {
+            let prev = self.channel_size.fetch_add(1, Ordering::Relaxed);
+            if self.capacity < prev + 1 {
+                self.channel_size.fetch_sub(1, Ordering::Relaxed);
+                return Err(Error::Full(value));
+            }
         }
         self.send_many_infallible(chute, [value]);
         Ok(())
     }
 
-    pub fn send_many<I: IntoIterator<Item = T>>(&self, chute: usize, values: I) -> Result<(), I> {
-        if self.dead.load(std::sync::atomic::Ordering::Relaxed) {
-            return Err(values);
+    pub fn send_many<I>(&self, chute: usize, values: I) -> Result<(), Error<I::IntoIter>>
+    where
+        I: IntoIterator<Item = T>,
+        I::IntoIter: ExactSizeIterator,
+    {
+        let iter = values.into_iter();
+        if self.dead.load(Ordering::Relaxed) {
+            return Err(Error::Closed(iter));
+        }
+        if self.capacity != u64::MAX {
+            let count = iter.len() as u64;
+            let prev = self.channel_size.fetch_add(count, Ordering::Relaxed);
+            if self.capacity < prev + count {
+                self.channel_size.fetch_sub(count, Ordering::Relaxed);
+                return Err(Error::Full(iter));
+            }
         }
-        self.send_many_infallible(chute, values);
+        self.send_many_infallible(chute, iter);
         Ok(())
     }