zero capacity queue

joshua-spacetime · joshua-spacetime · commit 5855974cd97f · 2026-03-18T15:25:42.000-07:00
diff --git a/crates/core/src/host/module_host.rs b/crates/core/src/host/module_host.rs
@@ -1239,7 +1239,7 @@ impl ModuleHost {
         .await
     }
 
-    async fn with_js_procedure_instance<R>(
+    async fn with_js_pooled_instance<R>(
         &self,
         label: &str,
         f: impl AsyncFnOnce(&JsInstance) -> R,
@@ -1248,12 +1248,12 @@ impl ModuleHost {
         let timer_guard = self.start_call_timer(label);
 
         scopeguard::defer_on_unwind!({
-            log::warn!("procedure {label} panicked");
+            log::warn!("pooled JS instance operation {label} panicked");
             (self.on_panic)();
         });
 
         Ok(match &*self.inner {
-            ModuleHostInner::Wasm(_) => unreachable!("WASM procedures should not use the JS procedure instance path"),
+            ModuleHostInner::Wasm(_) => unreachable!("WASM should not use the pooled JS instance path"),
             ModuleHostInner::Js(V8ModuleHost {
                 procedure_instances, ..
             }) => {
@@ -1268,6 +1268,24 @@ impl ModuleHost {
         })
     }
 
+    async fn call_view_command(&self, label: &str, cmd: ViewCommand) -> Result<ViewCommandResult, NoSuchModule> {
+        match &*self.inner {
+            ModuleHostInner::Wasm(_) => {
+                self.call(
+                    label,
+                    cmd,
+                    async |cmd, inst| inst.call_view(cmd),
+                    async |_cmd, _inst| unreachable!("JS view commands use the pooled JS instance path"),
+                )
+                .await
+            }
+            ModuleHostInner::Js(_) => {
+                self.with_js_pooled_instance(label, async move |inst| inst.call_view(cmd).await)
+                    .await
+            }
+        }
+    }
+
     pub async fn disconnect_client(&self, client_id: ClientActorId) {
         log::trace!("disconnecting client {client_id}");
         if let Err(e) = self
@@ -1610,12 +1628,7 @@ impl ModuleHost {
         };
 
         let res = self
-            .call(
-                "call_view_add_single_subscription",
-                cmd,
-                async |cmd, inst| inst.call_view(cmd),
-                async |cmd, inst| inst.call_view(cmd).await,
-            )
+            .call_view_command("call_view_add_single_subscription", cmd)
             .await
             //TODO: handle error better
             .map_err(|e| DBError::Other(anyhow::anyhow!(e)))?;
@@ -1643,12 +1656,7 @@ impl ModuleHost {
         };
 
         let res = self
-            .call(
-                "call_view_add_multi_subscription",
-                cmd,
-                async |cmd, inst| inst.call_view(cmd),
-                async |cmd, inst| inst.call_view(cmd).await,
-            )
+            .call_view_command("call_view_add_multi_subscription", cmd)
             .await
             //TODO: handle error better
             .map_err(|e| DBError::Other(anyhow::anyhow!(e)))?;
@@ -1676,12 +1684,7 @@ impl ModuleHost {
         };
 
         let res = self
-            .call(
-                "call_view_remove_v2_subscription",
-                cmd,
-                async |cmd, inst| inst.call_view(cmd),
-                async |cmd, inst| inst.call_view(cmd).await,
-            )
+            .call_view_command("call_view_remove_v2_subscription", cmd)
             .await
             .map_err(|e| DBError::Other(anyhow::anyhow!(e)))?;
 
@@ -1708,12 +1711,7 @@ impl ModuleHost {
         };
 
         let res = self
-            .call(
-                "call_view_add_multi_subscription",
-                cmd,
-                async |cmd, inst| inst.call_view(cmd),
-                async |cmd, inst| inst.call_view(cmd).await,
-            )
+            .call_view_command("call_view_add_multi_subscription", cmd)
             .await
             //TODO: handle error better
             .map_err(|e| DBError::Other(anyhow::anyhow!(e)))?;
@@ -1741,12 +1739,7 @@ impl ModuleHost {
         };
 
         let res = self
-            .call(
-                "call_view_add_legacy_subscription",
-                cmd,
-                async |cmd, inst| inst.call_view(cmd),
-                async |cmd, inst| inst.call_view(cmd).await,
-            )
+            .call_view_command("call_view_add_legacy_subscription", cmd)
             .await
             //TODO: handle error better
             .map_err(|e| DBError::Other(anyhow::anyhow!(e)))?;
@@ -1775,12 +1768,7 @@ impl ModuleHost {
         };
 
         let res = self
-            .call(
-                "call_view_sql",
-                cmd,
-                async |cmd, inst| inst.call_view(cmd),
-                async |cmd, inst| inst.call_view(cmd).await,
-            )
+            .call_view_command("call_view_sql", cmd)
             .await
             //TODO: handle error better
             .map_err(|e| DBError::Other(anyhow::anyhow!(e)))?;
@@ -1895,7 +1883,7 @@ impl ModuleHost {
                 .await
             }
             ModuleHostInner::Js(_) => {
-                self.with_js_procedure_instance(name, async move |inst| inst.call_procedure(params).await)
+                self.with_js_pooled_instance(name, async move |inst| inst.call_procedure(params).await)
                     .await
             }
         }
@@ -1925,7 +1913,7 @@ impl ModuleHost {
                         }
                     };
                 if use_procedure_lane {
-                    self.with_js_procedure_instance("unknown scheduled function", async move |inst| {
+                    self.with_js_pooled_instance("unknown scheduled function", async move |inst| {
                         inst.call_scheduled_function(params).await
                     })
                     .await
diff --git a/crates/core/src/host/v8/mod.rs b/crates/core/src/host/v8/mod.rs
@@ -590,6 +590,13 @@ impl JsReducerLane {
         *self.state.active.write() = next;
     }
 
+    /// Run a reducer-lane operation that may be retried if the active worker
+    /// disconnects before sending any reply.
+    ///
+    /// This is used for reducer-style requests whose arguments are cloneable and
+    /// can therefore be resubmitted to a fresh worker after trap recovery. It is
+    /// not used to hide a real trapped result: the trapping request still replies
+    /// first, and only later queued work is retried on the replacement worker.
     async fn run_replayable<A, R, F, Fut>(&self, arg: A, label: &'static str, work: F) -> R
     where
         A: Clone,
@@ -615,6 +622,13 @@ impl JsReducerLane {
         }
     }
 
+    /// Run a reducer-lane operation that must not be retried automatically after
+    /// worker loss.
+    ///
+    /// This is currently used for `run_on_thread`, where the reducer lane only
+    /// sees an opaque closure rather than a structured, cloneable request
+    /// payload. We still replace the worker for future requests, but this
+    /// request gets the disconnect error.
     async fn run_nonreplayable<R>(
         &self,
         label: &'static str,
@@ -635,6 +649,11 @@ impl JsReducerLane {
         }
     }
 
+    /// Run an arbitrary closure on the reducer worker thread without replay.
+    ///
+    /// This is non-replayable because the closure is opaque host code, not a
+    /// cloneable request payload, and it may have already produced host-side
+    /// effects before a worker disconnect is observed.
     pub async fn run_on_thread<F, R>(&self, f: F) -> anyhow::Result<R>
     where
         F: AsyncFnOnce() -> R + Send + 'static,
@@ -666,6 +685,11 @@ impl JsReducerLane {
         .map_err(|_| anyhow::anyhow!("reducer worker exited while running a non-replayable module-thread task"))
     }
 
+    /// Run a database update on the reducer lane with replay-on-worker-loss.
+    ///
+    /// This is replayable because its arguments are cloneable, and if a poisoned
+    /// worker exits before replying we can resubmit the same update request to
+    /// the replacement worker rather than dropping later queued work.
     pub async fn update_database(
         &self,
         program: Program,
@@ -688,6 +712,12 @@ impl JsReducerLane {
         .await
     }
 
+    /// Run a reducer on the reducer lane with replay-on-worker-loss.
+    ///
+    /// `CallReducerParams` is cloneable, so queued reducer requests can be
+    /// resubmitted to a fresh worker if the previous worker disappears before
+    /// replying. A reducer that actually traps is not replayed: it replies first,
+    /// then the poisoned worker exits and only later requests are retried.
     pub async fn call_reducer(&self, params: CallReducerParams) -> ReducerCallResult {
         self.run_replayable(params, "call_reducer", |inst, params| async move {
             inst.send_request(|reply_tx| JsWorkerRequest::CallReducer { reply_tx, params })
@@ -696,13 +726,22 @@ impl JsReducerLane {
         .await
     }
 
+    /// Clear all reducer-lane client state with replay-on-worker-loss.
+    ///
+    /// This request carries no non-cloneable payload, and retrying it after a
+    /// worker disconnect preserves the intended "all clients cleared" outcome.
     pub async fn clear_all_clients(&self) -> anyhow::Result<()> {
         self.run_replayable((), "clear_all_clients", |inst, _| async move {
             inst.send_request(JsWorkerRequest::ClearAllClients).await
         })
         .await
     }
 
+    /// Run the `client_connected` lifecycle reducer with replay-on-worker-loss.
+    ///
+    /// This follows the same retry model as ordinary reducers: the request
+    /// arguments are cloneable, and only requests that lose their worker before
+    /// receiving any reply are retried on the replacement worker.
     pub async fn call_identity_connected(
         &self,
         caller_auth: ConnectionAuthCtx,
@@ -723,6 +762,11 @@ impl JsReducerLane {
         .await
     }
 
+    /// Run the `client_disconnected` lifecycle reducer with replay-on-worker-loss.
+    ///
+    /// This is replayable for the same reason as reducer calls: its payload is
+    /// cloneable and queued lifecycle work should survive replacement of a
+    /// poisoned reducer worker.
     pub async fn call_identity_disconnected(
         &self,
         caller_identity: Identity,
@@ -743,6 +787,11 @@ impl JsReducerLane {
         .await
     }
 
+    /// Run disconnect cleanup on the reducer lane with replay-on-worker-loss.
+    ///
+    /// The request payload is just a cloneable client id, so if the active worker
+    /// disappears before replying the cleanup request can be resubmitted to the
+    /// replacement worker.
     pub async fn disconnect_client(&self, client_id: ClientActorId) -> Result<(), ReducerCallError> {
         self.run_replayable(client_id, "disconnect_client", |inst, client_id| async move {
             inst.send_request(|reply_tx| JsWorkerRequest::DisconnectClient { reply_tx, client_id })
@@ -751,6 +800,10 @@ impl JsReducerLane {
         .await
     }
 
+    /// Run reducer-style database initialization with replay-on-worker-loss.
+    ///
+    /// The initialization request is cloneable, so if the active reducer worker
+    /// dies before replying we can resubmit it to the replacement worker.
     pub async fn init_database(&self, program: Program) -> anyhow::Result<Option<ReducerCallResult>> {
         self.run_replayable(program, "init_database", |inst, program| async move {
             inst.send_request(|reply_tx| JsWorkerRequest::InitDatabase { reply_tx, program })
@@ -759,19 +812,11 @@ impl JsReducerLane {
         .await
     }
 
-    pub async fn call_view(&self, cmd: ViewCommand) -> ViewCommandResult {
-        // View/subscription commands are not cheaply replayable today because
-        // the websocket request payload types they carry are not cloneable.
-        // If a worker dies here we surface the failure rather than silently
-        // re-running a command that may already have produced side effects.
-        self.run_nonreplayable("call_view", async move |inst| {
-            inst.send_request(|reply_tx| JsWorkerRequest::CallView { reply_tx, cmd })
-                .await
-        })
-        .await
-        .unwrap_or_else(|_| panic!("reducer worker exited while handling a non-replayable view command"))
-    }
-
+    /// Run a scheduled reducer function with replay-on-worker-loss.
+    ///
+    /// Scheduled reducer requests carry cloneable params, so they use the same
+    /// recovery path as ordinary reducer calls. Scheduled procedures do not come
+    /// through this lane.
     pub(in crate::host) async fn call_scheduled_function(
         &self,
         params: ScheduledFunctionParams,
@@ -842,10 +887,12 @@ async fn spawn_instance_worker(
     load_balance_guard: Arc<LoadBalanceOnDropGuard>,
     mut core_pinner: CorePinner,
 ) -> anyhow::Result<(ModuleCommon, JsInstance)> {
-    // Spawn a FIFO queue for requests to the worker.
+    // Spawn a small FIFO queue for requests to the worker.
     // Multiple callers can enqueue concurrently, but the worker processes
-    // requests strictly one at a time on its dedicated thread.
-    let (request_tx, request_rx) = flume::unbounded();
+    // requests strictly one at a time on its dedicated thread. Keep a small
+    // buffer so the next request can already be waiting when the worker
+    // finishes the current one, without allowing unbounded backlog growth.
+    let (request_tx, request_rx) = flume::bounded(0);
 
     // This one-shot channel is used for initial startup error handling within the thread.
     let (result_tx, result_rx) = oneshot::channel();