TrogonStack
diff --git a/‎rsworkspace/crates/AGENTS.md‎
Lines changed: 1 addition & 1 deletion b/‎rsworkspace/crates/AGENTS.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎rsworkspace/crates/acp-nats/Cargo.toml‎
Lines changed: 1 addition & 0 deletions b/‎rsworkspace/crates/acp-nats/Cargo.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎rsworkspace/crates/acp-nats/src/agent/cancel.rs‎
Lines changed: 16 additions & 14 deletions b/‎rsworkspace/crates/acp-nats/src/agent/cancel.rs‎
Lines changed: 16 additions & 14 deletions
diff --git a/‎rsworkspace/crates/acp-nats/src/agent/load_session.rs‎
Lines changed: 1 addition & 3 deletions b/‎rsworkspace/crates/acp-nats/src/agent/load_session.rs‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎rsworkspace/crates/acp-nats/src/agent/mod.rs‎
Lines changed: 20 additions & 37 deletions b/‎rsworkspace/crates/acp-nats/src/agent/mod.rs‎
Lines changed: 20 additions & 37 deletions
diff --git a/‎rsworkspace/crates/acp-nats/src/agent/new_session.rs‎
Lines changed: 1 addition & 15 deletions b/‎rsworkspace/crates/acp-nats/src/agent/new_session.rs‎
Lines changed: 1 addition & 15 deletions
diff --git a/‎rsworkspace/crates/acp-nats/src/agent/pending_prompt_waiters.rs‎
Lines changed: 130 additions & 0 deletions b/‎rsworkspace/crates/acp-nats/src/agent/pending_prompt_waiters.rs‎
Lines changed: 130 additions & 0 deletions
@@ -1,6 +1,6 @@
 Prefer domain-specific value objects over primitives (e.g. `AcpPrefix` not `String`). Each type's factory must guarantee correctness at construction—invalid instances should be unrepresentable. Validate per-type, not per-aggregate: avoid validating unrelated fields together in a single constructor.
 
-Every value object lives in its own file named after the type (e.g. `acp_prefix.rs`, `ext_method_name.rs`, `session_id.rs`). Never inline a value object into a config, aggregate, or service file.
+Every value object lives in its own file named after the type (e.g. `acp_prefix.rs`, `ext_method_name.rs`, `session_id.rs`). Never inline a value object into a config, aggregate, or service file. File layout: `src/{type_snake_case}.rs`; export in `lib.rs` as `pub use {module}::{Type, TypeError}`.
 
 You must use the `test-support` feature to share test helpers between crates.
 Prefer one trait per operation over a single trait with multiple operations.
 
@@ -21,5 +21,6 @@ trogon-std = { path = "../trogon-std" }
 
 [dev-dependencies]
 opentelemetry_sdk = { version = "0.31.0", features = ["rt-tokio", "metrics", "testing"] }
+tokio = { version = "1.49.0", features = ["test-util"] }
 trogon-nats = { path = "../trogon-nats", features = ["test-support"] }
 trogon-std = { path = "../trogon-std", features = ["test-support"] }
@@ -5,9 +5,11 @@ use agent_client_protocol::{CancelNotification, Error, ErrorCode, Result};
 use tracing::{info, instrument, warn};
 use trogon_std::time::GetElapsed;
 
-/// Publishes the cancel notification to the backend via NATS (fire-and-forget).
-/// The publish failure is logged and recorded as a metric but does not propagate
-/// to the caller, so the client always receives `Ok(())`.
+/// Handles cancel notification requests.
+///
+/// Validates the session ID and publishes the cancellation to the backend (fire-and-forget).
+/// The backend owns session state and will respond to the in-flight prompt with `stopReason: cancelled`.
+/// Publish failure is logged and recorded in metrics but does not propagate to the caller.
 #[instrument(
     name = "acp.session.cancel",
     skip(bridge, args),
@@ -25,9 +27,7 @@ pub async fn handle<N: RequestClient + PublishClient + FlushClient, C: GetElapse
         bridge
             .metrics
             .record_request("cancel", bridge.clock.elapsed(start).as_secs_f64(), false);
-        bridge
-            .metrics
-            .record_error("session_validate", "invalid_session_id");
+        bridge.metrics.record_error("cancel", "invalid_session_id");
         Error::new(
             ErrorCode::InvalidParams.into(),
             format!("Invalid session ID: {}", e),
@@ -46,7 +46,7 @@ pub async fn handle<N: RequestClient + PublishClient + FlushClient, C: GetElapse
     )
     .await;
 
-    if let Err(error) = publish_result {
+    if let Err(error) = &publish_result {
         warn!(
             session_id = %args.session_id,
             error = %error,
@@ -57,9 +57,11 @@ pub async fn handle<N: RequestClient + PublishClient + FlushClient, C: GetElapse
             .record_error("cancel", "cancel_publish_failed");
     }
 
-    bridge
-        .metrics
-        .record_request("cancel", bridge.clock.elapsed(start).as_secs_f64(), true);
+    bridge.metrics.record_request(
+        "cancel",
+        bridge.clock.elapsed(start).as_secs_f64(),
+        publish_result.is_ok(),
+    );
 
     Ok(())
 }
@@ -223,8 +225,8 @@ mod tests {
             "expected acp.request.count with method=cancel, success=false on validation failure"
         );
         assert!(
-            has_error_metric(&finished_metrics, "session_validate", "invalid_session_id"),
-            "expected acp.errors.total with operation=session_validate, reason=invalid_session_id"
+            has_error_metric(&finished_metrics, "cancel", "invalid_session_id"),
+            "expected acp.errors.total with operation=cancel, reason=invalid_session_id"
         );
         provider.shutdown().unwrap();
     }
@@ -258,8 +260,8 @@ mod tests {
             "expected acp.errors.total with operation=cancel, reason=cancel_publish_failed"
         );
         assert!(
-            has_request_metric(&finished_metrics, "cancel", true),
-            "publish failure is fire-and-forget; caller still gets Ok, so success=true"
+            has_request_metric(&finished_metrics, "cancel", false),
+            "request metric records publish outcome; success=false when publish fails"
         );
         provider.shutdown().unwrap();
     }
 
@@ -1,5 +1,6 @@
 use super::Bridge;
 use crate::acp_prefix::AcpPrefix;
+use crate::config::SESSION_READY_DELAY;
 use crate::error::AGENT_UNAVAILABLE;
 use crate::nats::{
     self, ExtSessionReady, FlushClient, FlushPolicy, PublishClient, PublishOptions, RequestClient,
@@ -8,13 +9,10 @@ use crate::nats::{
 use crate::session_id::AcpSessionId;
 use crate::telemetry::metrics::Metrics;
 use agent_client_protocol::{Error, ErrorCode, LoadSessionRequest, LoadSessionResponse, Result};
-use std::time::Duration;
 use tracing::{info, instrument, warn};
 use trogon_nats::NatsError;
 use trogon_std::time::GetElapsed;
 
-const SESSION_READY_DELAY: Duration = Duration::from_millis(100);
-
 fn map_load_session_error(e: NatsError) -> Error {
     match &e {
         NatsError::Timeout { subject } => {
 
@@ -5,14 +5,18 @@ mod ext_notification;
 mod initialize;
 mod load_session;
 mod new_session;
+mod pending_prompt_waiters;
+mod prompt;
 mod set_session_mode;
 
+use pending_prompt_waiters::PendingSessionPromptResponseWaiters;
+
 use crate::config::Config;
 use crate::nats::{FlushClient, PublishClient, RequestClient};
+use crate::prompt_slot_counter::PromptSlotCounter;
 use crate::telemetry::metrics::Metrics;
-use agent_client_protocol::ErrorCode;
 use agent_client_protocol::{
-    Agent, AuthenticateRequest, AuthenticateResponse, CancelNotification, Error, ExtNotification,
+    Agent, AuthenticateRequest, AuthenticateResponse, CancelNotification, ExtNotification,
     ExtRequest, ExtResponse, InitializeRequest, InitializeResponse, LoadSessionRequest,
     LoadSessionResponse, NewSessionRequest, NewSessionResponse, PromptRequest, PromptResponse,
     Result, SetSessionModeRequest, SetSessionModeResponse,
@@ -23,17 +27,22 @@ use trogon_std::time::GetElapsed;
 pub struct Bridge<N: RequestClient + PublishClient + FlushClient, C: GetElapsed> {
     pub(crate) nats: N,
     pub(crate) clock: C,
-    pub(crate) config: Config,
     pub(crate) metrics: Metrics,
+    pub(crate) pending_session_prompt_responses: PendingSessionPromptResponseWaiters<C::Instant>,
+    pub(crate) prompt_slot_counter: PromptSlotCounter,
+    pub(crate) config: Config,
 }
 
 impl<N: RequestClient + PublishClient + FlushClient, C: GetElapsed> Bridge<N, C> {
     pub fn new(nats: N, clock: C, meter: &Meter, config: Config) -> Self {
+        let max_concurrent = config.max_concurrent_client_tasks();
         Self {
             nats,
             clock,
             config,
             metrics: Metrics::new(meter),
+            pending_session_prompt_responses: PendingSessionPromptResponseWaiters::new(),
+            prompt_slot_counter: PromptSlotCounter::new(max_concurrent),
         }
     }
 
@@ -67,11 +76,8 @@ impl<N: RequestClient + PublishClient + FlushClient, C: GetElapsed> Agent for Br
         set_session_mode::handle(self, args).await
     }
 
-    async fn prompt(&self, _args: PromptRequest) -> Result<PromptResponse> {
-        Err(Error::new(
-            ErrorCode::InternalError.into(),
-            "not yet implemented",
-        ))
+    async fn prompt(&self, args: PromptRequest) -> Result<PromptResponse> {
+        prompt::handle(self, args).await
     }
 
     async fn cancel(&self, args: CancelNotification) -> Result<()> {
@@ -88,37 +94,14 @@ impl<N: RequestClient + PublishClient + FlushClient, C: GetElapsed> Agent for Br
 }
 
 #[cfg(test)]
-mod tests {
+mod send_sync_tests {
     use super::Bridge;
-    use crate::config::Config;
-    use agent_client_protocol::{Agent, PromptRequest};
     use trogon_nats::AdvancedMockNatsClient;
+    use trogon_std::time::SystemClock;
 
-    fn mock_bridge() -> Bridge<AdvancedMockNatsClient, trogon_std::time::SystemClock> {
-        Bridge::new(
-            AdvancedMockNatsClient::new(),
-            trogon_std::time::SystemClock,
-            &opentelemetry::global::meter("acp-nats-test"),
-            Config::for_test("acp"),
-        )
-    }
-
-    #[tokio::test]
-    async fn stub_methods_return_not_implemented() {
-        let bridge = mock_bridge();
-        let msg = "not yet implemented";
-
-        assert!(
-            bridge
-                .prompt(PromptRequest::new("s1", vec![]))
-                .await
-                .is_err()
-        );
-
-        let err = bridge
-            .prompt(PromptRequest::new("s1", vec![]))
-            .await
-            .unwrap_err();
-        assert!(err.to_string().contains(msg));
+    #[test]
+    fn bridge_is_send_and_sync() {
+        fn assert_send_sync<T: Send + Sync>() {}
+        assert_send_sync::<Bridge<AdvancedMockNatsClient, SystemClock>>();
     }
 }
@@ -1,4 +1,5 @@
 use super::Bridge;
+use crate::config::SESSION_READY_DELAY;
 use crate::error::AGENT_UNAVAILABLE;
 use crate::nats::{
     self, ExtSessionReady, FlushClient, FlushPolicy, PublishClient, PublishOptions, RequestClient,
@@ -8,25 +9,10 @@ use crate::telemetry::metrics::Metrics;
 use agent_client_protocol::{
     Error, ErrorCode, NewSessionRequest, NewSessionResponse, Result, SessionId,
 };
-use std::time::Duration;
 use tracing::{Span, info, instrument, warn};
 use trogon_nats::NatsError;
 use trogon_std::time::GetElapsed;
 
-/// Delay before publishing `session.ready` to NATS.
-///
-/// The `Agent` trait returns the response value *before* the transport layer
-/// serializes and writes it to the client. Without a delay the spawned task
-/// could publish `session.ready` to NATS before the client has received the
-/// `session/new` response, violating the ordering guarantee documented on
-/// [`ExtSessionReady`].
-///
-/// A post-send callback from the transport would be the ideal fix, but the
-/// external `agent_client_protocol` crate does not expose one. This constant
-/// delay provides a practical safety margin (serialization + write is typically
-/// sub-millisecond).
-const SESSION_READY_DELAY: Duration = Duration::from_millis(100);
-
 fn map_new_session_error(e: NatsError) -> Error {
     match &e {
         NatsError::Timeout { subject } => {
 
@@ -0,0 +1,130 @@
+//! Waiter registry for bridging prompt request/response over NATS notifications.
+//!
+//! **When to use**
+//! - In the ACP prompt path where request and response are decoupled (`publish` now, response
+//!   arrives later via `client.ext.session.prompt_response`).
+//! - Before publishing prompt work, so an immediate backend response cannot race ahead of waiter
+//!   registration.
+//!
+//! **Why this exists**
+//! - Prompt responses are correlated by `SessionId`, not by direct request/reply transport.
+//! - Enforcing one active waiter per session avoids ambiguous delivery when clients duplicate
+//!   prompt calls.
+//! - Timed-out sessions are tracked briefly to suppress noisy duplicate timeout-related warnings
+//!   during late-response windows.
+
+use std::collections::HashMap;
+use std::sync::Mutex;
+
+use agent_client_protocol::{PromptResponse, SessionId};
+use tokio::sync::oneshot;
+use trogon_std::time::GetElapsed;
+use crate::config::PROMPT_TIMEOUT_WARNING_SUPPRESSION_WINDOW;
+
+type PromptResponseReceiver = oneshot::Receiver<std::result::Result<PromptResponse, String>>;
+
+/// Lifetime token for a registered session waiter.
+///
+/// Dropping the guard removes the waiter so cancellations and task aborts do not leak entries.
+pub(crate) struct PromptWaiterGuard<'a, I: Copy> {
+    waiters: &'a PendingSessionPromptResponseWaiters<I>,
+    session_id: SessionId,
+}
+
+impl<'a, I: Copy> PromptWaiterGuard<'a, I> {
+    fn new(waiters: &'a PendingSessionPromptResponseWaiters<I>, session_id: SessionId) -> Self {
+        Self {
+            waiters,
+            session_id,
+        }
+    }
+}
+
+impl<'a, I: Copy> Drop for PromptWaiterGuard<'a, I> {
+    fn drop(&mut self) {
+        self.waiters.remove_waiter(&self.session_id);
+    }
+}
+
+/// Process-local map of in-flight prompt waiters keyed by session.
+///
+/// Scope is intentionally local to this agent process; cross-process correlation belongs to NATS
+/// subjects and backend state.
+pub(crate) struct PendingSessionPromptResponseWaiters<I: Copy> {
+    waiters:
+        Mutex<HashMap<SessionId, oneshot::Sender<std::result::Result<PromptResponse, String>>>>,
+    timed_out: Mutex<HashMap<SessionId, I>>,
+}
+
+impl<I: Copy> PendingSessionPromptResponseWaiters<I> {
+    /// Creates an empty waiter registry.
+    pub fn new() -> Self {
+        Self {
+            waiters: Mutex::new(HashMap::new()),
+            timed_out: Mutex::new(HashMap::new()),
+        }
+    }
+
+    /// Registers the receiver for the next prompt response of `session_id`.
+    ///
+    /// Returns `Err(())` when another waiter is already active for the same session.
+    pub fn register_waiter(
+        &self,
+        session_id: SessionId,
+    ) -> std::result::Result<(PromptResponseReceiver, PromptWaiterGuard<'_, I>), ()> {
+        let (tx, rx) = oneshot::channel();
+        let mut waiters = self.waiters.lock().unwrap();
+        if waiters.contains_key(&session_id) {
+            return Err(());
+        }
+        self.timed_out.lock().unwrap().remove(&session_id);
+        waiters.insert(session_id.clone(), tx);
+        Ok((rx, PromptWaiterGuard::new(self, session_id)))
+    }
+
+    /// Marks a session as timed out to suppress transient duplicate warnings for late responses.
+    pub(crate) fn mark_prompt_waiter_timed_out<C: GetElapsed<Instant = I>>(
+        &self,
+        session_id: SessionId,
+        clock: &C,
+    ) {
+        self.purge_expired_timed_out_waiters(clock);
+        self.timed_out
+            .lock()
+            .unwrap()
+            .insert(session_id, clock.now());
+    }
+
+    /// Drops timeout-suppression markers after a short window.
+    ///
+    /// This keeps suppression bounded so future requests for the same session can emit warnings
+    /// again if they truly timeout.
+    pub(crate) fn purge_expired_timed_out_waiters<C: GetElapsed<Instant = I>>(&self, clock: &C) {
+        self.timed_out.lock().unwrap().retain(|_, seen_at| {
+            clock.elapsed(*seen_at) < PROMPT_TIMEOUT_WARNING_SUPPRESSION_WINDOW
+        });
+    }
+
+    /// Delivers a backend prompt result to the currently waiting caller for `session_id`.
+    #[allow(dead_code)]
+    pub fn resolve_waiter(
+        &self,
+        session_id: &SessionId,
+        response: std::result::Result<PromptResponse, String>,
+    ) -> bool {
+        let sender = self.waiters.lock().unwrap().remove(session_id);
+        self.timed_out.lock().unwrap().remove(session_id);
+        if let Some(sender) = sender {
+            sender.send(response).is_ok()
+        } else {
+            false
+        }
+    }
+
+    /// Removes a waiter for `session_id` without delivering a response.
+    ///
+    /// Used by cancellation/drop paths where the caller is no longer waiting.
+    pub fn remove_waiter(&self, session_id: &SessionId) {
+        self.waiters.lock().unwrap().remove(session_id);
+    }
+}