Skip to content

Commit 2821c3b

Browse files
committed
Fix audit follow-up bugs and apply MCP init timeout to HTTP transport
1 parent df9235e commit 2821c3b

17 files changed

Lines changed: 147 additions & 179 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ All notable changes to Sofos are documented in this file.
4848

4949
- **`web_fetch` follows at most three redirects, and only to http(s) targets.** The default `reqwest` redirect policy would let an LLM-supplied URL chain through up to ten hops including non-http(s) schemes (`file://`, `data:`, custom). Sofos now caps the hop count and rejects any redirect with a non-http(s) scheme so a content URL can't slip into a different protocol mid-chain.
5050
- **MCP server names are restricted to `[A-Za-z0-9_-]+`.** Two visually-identical Unicode names (`github` vs `gith\u{1d62}ub`) used to produce distinct map entries and route differently while looking the same in config; sofos now refuses non-ASCII server names at load time.
51-
- **MCP `initialize` uses a 15-second timeout.** Tool calls keep the existing two-minute ceiling, but the handshake no longer holds session startup hostage for two minutes per misconfigured server.
51+
- **MCP `initialize` uses a 15-second timeout on both stdio and HTTP transports.** Tool calls keep the existing two-minute ceiling, but the handshake no longer holds session startup hostage for two minutes per misconfigured server.
5252
- **The clipboard image binary check is the effective cap.** Base64 expansion (~33 %) used to make the post-encode check the actual gate, while the binary check at the same limit was dead. Sofos now caps the binary side at three-quarters of the limit so a marginally oversized image fails fast with a clearer reason.
5353

5454
### Changed

src/clipboard.rs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -71,11 +71,9 @@ pub fn get_clipboard_image() -> Option<PastedImage> {
7171
writer.write_image_data(&image.bytes).ok()?;
7272
}
7373

74-
// Base64 expands binary by ~33%, so a binary blob just under
75-
// MAX_CLIPBOARD_IMAGE_BYTES becomes ~1.33x that after encoding and
76-
// then trips the second check anyway. Cap the binary side at three
77-
// quarters of the limit so the binary check is the effective gate
78-
// and a marginally-oversize blob fails fast with a clearer reason.
74+
// Binary cap is 3/4 of the API limit because base64 inflates by
75+
// ~33% — without it the binary check is dead and the post-encode
76+
// check is the effective gate.
7977
let binary_cap = MAX_CLIPBOARD_IMAGE_BYTES * 3 / 4;
8078
if buf.len() > binary_cap {
8179
tracing::warn!(

src/mcp/transport/http.rs

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use crate::error::{Result, SofosError};
22
use crate::mcp::client::{
3-
MCP_REQUEST_TIMEOUT, create_call_tool_request, create_init_request, parse_call_tool_response,
4-
parse_list_tools_response,
3+
MCP_INIT_TIMEOUT, MCP_REQUEST_TIMEOUT, create_call_tool_request, create_init_request,
4+
parse_call_tool_response, parse_list_tools_response,
55
};
66
use crate::mcp::config::McpServerConfig;
77
use crate::mcp::protocol::*;
@@ -73,23 +73,28 @@ impl HttpClient {
7373
}
7474

7575
async fn initialize(&self) -> Result<()> {
76-
let response = self
77-
.send_request(
78-
"initialize",
79-
Some(serde_json::to_value(create_init_request())?),
80-
)
81-
.await?;
82-
83-
let _init_result: InitializeResult = serde_json::from_value(response)?;
84-
85-
// The MCP spec requires the client to confirm the handshake
86-
// with a `notifications/initialized` message before sending any
87-
// other request. The stdio transport already did this; HTTP
88-
// didn't, which caused strict servers to reject every later
89-
// request as "not initialized".
90-
self.send_notification_initialized().await?;
91-
92-
Ok(())
76+
// Handshake under a tighter ceiling than tool calls so a
77+
// frozen server can't hold session startup hostage.
78+
let handshake = async {
79+
let response = self
80+
.send_request(
81+
"initialize",
82+
Some(serde_json::to_value(create_init_request())?),
83+
)
84+
.await?;
85+
let _init_result: InitializeResult = serde_json::from_value(response)?;
86+
self.send_notification_initialized().await?;
87+
Ok::<(), SofosError>(())
88+
};
89+
tokio::time::timeout(MCP_INIT_TIMEOUT, handshake)
90+
.await
91+
.map_err(|_| {
92+
SofosError::McpError(format!(
93+
"MCP server '{}' initialize timed out after {}s",
94+
self.server_name,
95+
MCP_INIT_TIMEOUT.as_secs()
96+
))
97+
})?
9398
}
9499

95100
async fn send_notification_initialized(&self) -> Result<()> {

src/mcp/transport/stdio.rs

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -256,11 +256,8 @@ fn stdio_request_blocking(
256256
)));
257257
}
258258

259-
// Parse first as a generic Value so we can tell a server-initiated
260-
// request (`method`) apart from a response. We don't speak the
261-
// request side of the spec — `roots/list`, `sampling/createMessage`
262-
// — so a frame carrying `method` is rejected here rather than
263-
// confusing it with a response to our outgoing request.
259+
// Parse as Value first; a `method` field means a server-initiated
260+
// request, which sofos doesn't implement — reject explicitly.
264261
let raw: Value = serde_json::from_str(&response_line).map_err(|e| {
265262
SofosError::McpError(format!(
266263
"Failed to parse response from MCP server '{}': {}",
@@ -282,11 +279,8 @@ fn stdio_request_blocking(
282279
))
283280
})?;
284281

285-
// JSON-RPC requires the response id to match the request id. With
286-
// the in-flight `request_lock` this shouldn't fail today, but
287-
// checking guards against future concurrent variants and against
288-
// servers that emit an unsolicited frame. Accepts both numeric
289-
// and string-shaped echoes of our outgoing numeric id.
282+
// Reject id mismatches; accept either numeric or string echoes
283+
// of the outgoing numeric id (spec lets servers reshape).
290284
if !response.id.matches_outgoing(request_id) {
291285
return Err(SofosError::McpError(format!(
292286
"MCP server '{}' returned response with id {:?}, expected {}",
@@ -308,12 +302,10 @@ pub struct StdioClient {
308302
next_id: Arc<AtomicU64>,
309303
}
310304

311-
/// Send SIGKILL / `TerminateProcess` then poll `try_wait` for up to
312-
/// [`STDIO_DROP_WAIT_TOTAL`] before giving up. Shared by `Drop` and
313-
/// the timeout-recovery `kill_child_detached` path so the same
314-
/// bounded shutdown shape applies whether the reap runs inline or on
315-
/// a blocking task. Returns once the child has been reaped or the
316-
/// budget expires; the OS reaps any survivor when sofos exits.
305+
/// Kill and reap with a bounded `try_wait` loop. Shared by `Drop`
306+
/// and the detached kill path so neither blocks the executor on a
307+
/// child stuck in uninterruptible IO. The OS reaps any survivor
308+
/// when sofos exits.
317309
fn kill_and_reap_bounded(child: &mut Child) {
318310
let _ = child.kill();
319311
let start = std::time::Instant::now();
@@ -351,10 +343,8 @@ impl StdioClient {
351343
let args = config.args.unwrap_or_default();
352344
let env_vars = config.env.unwrap_or_default();
353345

354-
// Spawn the child on a blocking worker. `Command::spawn` performs
355-
// synchronous syscalls (fork+exec / CreateProcess) that on a slow
356-
// filesystem or under load can pause the tokio executor for tens
357-
// to hundreds of milliseconds.
346+
// Spawn off the executor — `Command::spawn` is synchronous and
347+
// can pause tokio noticeably on slow filesystems.
358348
let spawn_name = server_name.clone();
359349
let process = tokio::task::spawn_blocking(move || -> std::io::Result<Child> {
360350
spawn_stdio_child(&command, &args, &env_vars)

src/repl/mod.rs

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -364,10 +364,8 @@ impl Repl {
364364
let new_session_id = self.history_manager.generate_unique_session_id();
365365
self.session_state.conversation.clear();
366366
self.session_state.clear(new_session_id);
367-
// The executor stays in safe mode across `/clear`, so the model
368-
// also needs to keep seeing the safe-mode preamble. Without
369-
// this it confidently proposes write/bash and gets opaque
370-
// denials from the tool layer.
367+
// Safe mode survives `/clear`, so the preamble has to ride
368+
// along too — otherwise the model proposes blocked tools.
371369
if self.safe_mode {
372370
self.session_state
373371
.conversation
@@ -427,11 +425,8 @@ impl Repl {
427425
println!();
428426
return;
429427
}
430-
// Anthropic's legacy thinking model rejects requests where
431-
// `max_tokens` is below the configured budget ceiling. Startup
432-
// refuses that combination outright (see `new` above); the same
433-
// gate must fire on mid-session `/effort high` so the next
434-
// turn doesn't 400 with no clear hint at the cause.
428+
// Mirror the startup gate: legacy Anthropic thinking needs
429+
// max_tokens above the budget ceiling.
435430
if matches!(self.client, Anthropic(_))
436431
&& !self.uses_adaptive_thinking()
437432
&& effort.is_enabled()

src/repl/response_handler.rs

Lines changed: 8 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -217,13 +217,9 @@ impl ResponseHandler {
217217
}
218218

219219
if tool_uses.is_empty() {
220-
// Drain any in-flight steer messages even when the
221-
// assistant only produced text. The follow-up branch
222-
// below folds them into the tool-results turn; without
223-
// mirroring the drain here, a text-only turn leaves
224-
// the steer buffered and the next prompt fires as a
225-
// fresh user turn — losing the "mid-turn delivery"
226-
// association the user expected.
220+
// Drain pending steer messages on a text-only turn so
221+
// they aren't lost between this reply and the next
222+
// user prompt.
227223
if let Some(steer_text) = self.drain_steer_messages() {
228224
println!(
229225
"{} {}",
@@ -641,15 +637,9 @@ impl ResponseHandler {
641637
content: "[System: Maximum tool iterations reached]".to_string(),
642638
});
643639

644-
// Let the assistant respond to the interruption. Use
645-
// `run_interruptible` so ESC during this final summary cancels
646-
// the HTTP call instead of blocking on the server.
647-
//
648-
// The recovery request is built without tools. Sending the
649-
// tools array would let the assistant come back with another
650-
// `tool_use` block; persisting that block without the matching
651-
// `tool_result` puts the session in a shape the provider
652-
// rejects on the next request and breaks `--resume`.
640+
// Recovery turn is text-only: clearing `tools` stops the
641+
// assistant from emitting a `tool_use` we'd have to discard
642+
// or pair with a synthetic result before saving.
653643
let mut request = self.build_request();
654644
request.tools = None;
655645
let client = self.client.clone();
@@ -681,11 +671,8 @@ impl ResponseHandler {
681671
}
682672
}
683673

684-
// Even though the recovery request carried no tools,
685-
// strip any tool-related blocks defensively before
686-
// appending. A provider that ignores `tools = None`
687-
// (or returns a cached tool_use from a previous turn)
688-
// would otherwise leave an orphan in the saved session.
674+
// Defensive strip in case the provider returns a cached
675+
// tool_use despite `tools = None`.
689676
let message_blocks: Vec<crate::api::MessageContentBlock> = response
690677
.content
691678
.iter()

src/repl/tui/event_loop.rs

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -221,15 +221,9 @@ pub(super) async fn event_loop(
221221
UiEvent::WorkerShutdown(summary) => {
222222
app.exit_summary = Some(summary);
223223
app.should_quit = true;
224-
// Unblock a worker that is parked on `reply_rx.recv()`
225-
// for a confirmation modal. Without this, the
226-
// `worker_handle.thread.join()` in `mod.rs` would
227-
// hang forever: the worker is waiting for the
228-
// modal's responder, but the event loop is about
229-
// to drop `app` (and the responder with it) only
230-
// after the join returns. Sending the default
231-
// index back unblocks the worker exactly as if
232-
// the user had pressed Esc.
224+
// Unblock any worker parked on a confirmation modal
225+
// — without this, `thread.join()` would deadlock
226+
// because the responder lives inside `app`.
233227
if let Some(prompt) = app.confirmation.take() {
234228
let _ = prompt.responder.send(prompt.default_index);
235229
}

src/repl/tui/mod.rs

Lines changed: 11 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -43,21 +43,14 @@ use input::spawn_input_reader;
4343
use keymap::install_confirm_handler;
4444
use output::OutputCapture;
4545

46-
/// Real-tty handle saved by [`install_panic_hook`] so the hook can
47-
/// route restoration sequences past any `OutputCapture` redirection
48-
/// still in place at panic time. `OnceLock` because the hook is
49-
/// installed once per process and the handle never needs to change
50-
/// afterwards.
46+
/// Real-tty handle saved for the panic hook so its restoration
47+
/// sequences bypass any `OutputCapture` pipe in place at panic time.
5148
static PANIC_TTY: OnceLock<Mutex<std::fs::File>> = OnceLock::new();
5249

53-
/// Install a process-global panic hook that restores the terminal
54-
/// before chaining to the previous hook. Best-effort on every step:
55-
/// raw mode comes off, bracketed paste is disabled, kitty keyboard
56-
/// enhancement flags are popped, and the cursor is shown — written
57-
/// through [`PANIC_TTY`] (when set) so output reaches the real
58-
/// terminal even if `OutputCapture` has redirected stdout into a
59-
/// pipe. Chaining to the previous hook keeps the backtrace flowing
60-
/// to stderr so the user can still file a useful bug report.
50+
/// Process-global panic hook: disable raw mode, disable bracketed
51+
/// paste, pop kitty keyboard flags, show the cursor — all through
52+
/// [`PANIC_TTY`] when set — then chain to the previous hook so the
53+
/// backtrace still prints.
6154
fn install_panic_hook() {
6255
static INIT: std::sync::Once = std::sync::Once::new();
6356
INIT.call_once(|| {
@@ -233,10 +226,8 @@ pub fn run(mut repl: Repl) -> Result<()> {
233226
#[cfg(windows)]
234227
let tty = OpenOptions::new().write(true).open("CONOUT$")?;
235228
let tty_for_backend = tty.try_clone()?;
236-
// Stash a real-tty handle for the panic hook so a panic between
237-
// here and the normal teardown can still write disable-raw-mode +
238-
// disable-bracketed-paste + show-cursor through a writer that
239-
// bypasses the upcoming output-capture redirection.
229+
// Save a real-tty handle for the panic hook before output capture
230+
// redirects stdout.
240231
if let Ok(tty_for_panic) = tty.try_clone() {
241232
let _ = PANIC_TTY.set(Mutex::new(tty_for_panic));
242233
}
@@ -343,11 +334,9 @@ pub fn run(mut repl: Repl) -> Result<()> {
343334

344335
if let Some(summary) = app.exit_summary.take() {
345336
if summary.panicked {
346-
// The worker exited via panic rather than the normal
347-
// shutdown path. The counts in `summary` are zeroed
348-
// placeholders from `ShutdownSender::send_now`, so the
349-
// session-summary table would be misleading; surface the
350-
// explicit notice and skip straight to the goodbye line.
337+
// Counts are zeroed placeholders on the panic path, so
338+
// surface the cause explicitly before the (suppressed)
339+
// summary table.
351340
println!();
352341
UI::print_warning("Session ended unexpectedly. See backtrace above for details.");
353342
}

src/repl/tui/output.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,11 +311,20 @@ fn spawn_reader(
311311
// before sending so the batch-joiner sees a self-contained
312312
// styled line.
313313
let mut state = SgrState::default();
314+
// True when the previous read returned without a newline
315+
// (cap reached). The next read prepends a `[…continued]`
316+
// marker so the user can tell two consecutive captured
317+
// lines logically belong to the same original output.
318+
let mut previous_was_capped = false;
314319
loop {
315320
line.clear();
316321
match read_until_or_cap(&mut buf, b'\n', &mut line, READER_CHUNK_BYTES) {
317322
Ok(0) => break,
318323
Ok(_) => {
324+
// Whether THIS read ended at the cap rather
325+
// than on a real line terminator. Checked
326+
// before the trailing-newline strip below.
327+
let this_capped = !matches!(line.last(), Some(b'\n') | Some(b'\r'));
319328
// Log the *raw* bytes — including the `\n` /
320329
// `\r` suffix — before any stripping. That
321330
// way the log shows exactly what arrived in
@@ -341,6 +350,9 @@ fn spawn_reader(
341350
// resolved-state prefix is still accurate
342351
// across the drop.
343352
strip_trailing_incomplete_csi(&mut text);
353+
if previous_was_capped {
354+
text.insert_str(0, "[…continued] ");
355+
}
344356
let prefix = state.to_ansi_prefix();
345357
state.apply(&text);
346358
let payload = if prefix.is_empty() {
@@ -360,6 +372,7 @@ fn spawn_reader(
360372
{
361373
break;
362374
}
375+
previous_was_capped = this_capped;
363376
}
364377
Err(_) => break,
365378
}

src/repl/tui/slash_popup.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,9 +109,16 @@ impl SlashPopup {
109109
// text begins with the current input (backspaced). Switching
110110
// to an unrelated command (e.g. dismiss `/clear`, then type
111111
// `/list`) breaks both prefix tests and re-opens the popup.
112-
if let Some(prev) = self.dismissed_for.as_deref() {
113-
if prev == input || input.starts_with(prev) || prev.starts_with(input) {
114-
return;
112+
//
113+
// An empty input fully resets the dismissal: every dismissed
114+
// string trivially starts with "", which would otherwise keep
115+
// the popup suppressed even after the textarea has been
116+
// emptied and a fresh `/` is typed.
117+
if !input.is_empty() {
118+
if let Some(prev) = self.dismissed_for.as_deref() {
119+
if prev == input || input.starts_with(prev) || prev.starts_with(input) {
120+
return;
121+
}
115122
}
116123
}
117124
self.dismissed_for = None;

0 commit comments

Comments
 (0)