Tighten audit follow-up: name magic constants, share reap helper, dedupe tool-block filter

alexylon · alexylon · commit df9235e119a3 · 2026-05-20T00:40:57.000+03:00
diff --git a/README.md b/README.md
@@ -201,10 +201,12 @@ sofos --resume
 - **Enter** submits the current message.
 - **Shift+Enter** inserts a newline when the terminal supports it.
 - **Alt+Enter** or **Ctrl+Enter** can be used as newline fallbacks.
-- Typing a leading `/` opens an inline command suggestion list. Use **Up / Down** to highlight an entry, **Enter** to run the highlighted command, **Tab** to insert it into the input, and **Esc** to dismiss the list.
+- **Ctrl+U** deletes from the cursor to the start of the line; **Ctrl+W** deletes the previous word; **Ctrl+K** deletes to the end of the line — matching readline / bash / zsh / fish.
+- **Alt+Up** / **Alt+Down** walk through previously submitted prompts. The in-progress draft is preserved and restored when you walk past the newest entry.
+- Typing a leading `/` opens an inline command suggestion list. Use **Up / Down** to highlight an entry, **Enter** to run the highlighted command, **Tab** to insert it into the input, and **Esc** (or **Ctrl+C**) to dismiss the list.
 - You can keep typing while the model is working. New messages are queued and processed in order.
 - If the model is inside a tool loop, a queued message is delivered at the next tool-result boundary so it can steer the current turn without interrupting it.
-- The status line shows the model, mode, reasoning setting, and running token totals.
+- The status line shows the model, mode, reasoning setting, running token totals, and (when present) the cumulative cache-read and cache-creation tokens.
 
 ### One-shot prompts
 
diff --git a/src/api/types.rs b/src/api/types.rs
@@ -494,6 +494,22 @@ pub enum ImageSource {
 }
 
 impl MessageContentBlock {
+    /// True for the three block types that initiate a tool call — the
+    /// blocks that MUST be paired with a matching `tool_result` on the
+    /// next user turn or the provider rejects the request. Sofos
+    /// filters these out from truncated responses, max-iter recovery
+    /// turns, and tail-orphan resume cleanup, so the predicate lives
+    /// next to the enum it discriminates rather than being repeated
+    /// at every call site.
+    pub fn is_tool_call_initiator(&self) -> bool {
+        matches!(
+            self,
+            MessageContentBlock::ToolUse { .. }
+                | MessageContentBlock::ServerToolUse { .. }
+                | MessageContentBlock::WebSearchToolResult { .. }
+        )
+    }
+
     pub fn from_content_block_for_api(block: &ContentBlock) -> Self {
         match block {
             ContentBlock::Text { text } => MessageContentBlock::Text {
diff --git a/src/api/utils.rs b/src/api/utils.rs
@@ -373,6 +373,15 @@ pub const MAX_SSE_BUFFER_BYTES: usize = 16 * 1024 * 1024;
 /// expected to apply [`truncate_at_char_boundary`] separately if the
 /// body needs a length cap.
 pub fn redact_api_secrets(body: &str) -> String {
+    /// Minimum byte count for a `sk-…` run we treat as a real key.
+    /// Below this, the prefix is just an unrelated `sk-` substring
+    /// (a CSS class, an error code, a stray identifier).
+    const SK_KEY_MIN_LEN: usize = 11;
+    /// Same idea on the bearer side, sized against the random tail
+    /// that follows the `Bearer ` prefix.
+    const BEARER_TAIL_MIN_LEN: usize = 8;
+    const BEARER_PREFIX_LEN: usize = "Bearer ".len();
+
     fn is_key_byte(b: u8) -> bool {
         b.is_ascii_alphanumeric() || b == b'_' || b == b'-'
     }
@@ -386,20 +395,19 @@ pub fn redact_api_secrets(body: &str) -> String {
             while end < bytes.len() && is_key_byte(bytes[end]) {
                 end += 1;
             }
-            if end - i >= 11 {
+            if end - i >= SK_KEY_MIN_LEN {
                 out.push_str("sk-[redacted]");
                 i = end;
                 continue;
             }
         }
         if bytes[i..].starts_with(b"Bearer ") || bytes[i..].starts_with(b"bearer ") {
-            let prefix_len = 7;
-            let mut end = i + prefix_len;
+            let mut end = i + BEARER_PREFIX_LEN;
             while end < bytes.len() && is_key_byte(bytes[end]) {
                 end += 1;
             }
-            if end - i >= prefix_len + 8 {
-                out.push_str(&body[i..i + prefix_len]);
+            if end - i >= BEARER_PREFIX_LEN + BEARER_TAIL_MIN_LEN {
+                out.push_str(&body[i..i + BEARER_PREFIX_LEN]);
                 out.push_str("[redacted]");
                 i = end;
                 continue;
diff --git a/src/mcp/transport/stdio.rs b/src/mcp/transport/stdio.rs
@@ -308,6 +308,24 @@ pub struct StdioClient {
     next_id: Arc<AtomicU64>,
 }
 
+/// Send SIGKILL / `TerminateProcess` then poll `try_wait` for up to
+/// [`STDIO_DROP_WAIT_TOTAL`] before giving up. Shared by `Drop` and
+/// the timeout-recovery `kill_child_detached` path so the same
+/// bounded shutdown shape applies whether the reap runs inline or on
+/// a blocking task. Returns once the child has been reaped or the
+/// budget expires; the OS reaps any survivor when sofos exits.
+fn kill_and_reap_bounded(child: &mut Child) {
+    let _ = child.kill();
+    let start = std::time::Instant::now();
+    while start.elapsed() < STDIO_DROP_WAIT_TOTAL {
+        match child.try_wait() {
+            Ok(Some(_)) => return,
+            Ok(None) => std::thread::sleep(STDIO_DROP_POLL_INTERVAL),
+            Err(_) => return,
+        }
+    }
+}
+
 impl Drop for StdioClient {
     fn drop(&mut self) {
         // `Child::drop` does NOT wait on the subprocess, so without
@@ -317,22 +335,9 @@ impl Drop for StdioClient {
         // them, and the kill/wait pair is idempotent: a second `kill`
         // after the child already exited returns `InvalidInput`, and
         // a second `wait` after the child was already reaped returns
-        // a harmless error. Both are discarded.
-        //
-        // The wait is bounded with `try_wait` so a child stuck in
-        // uninterruptible IO cannot freeze session shutdown — after
-        // the budget we leave the child as a zombie and let the OS
-        // reap it when sofos itself exits.
+        // a harmless error.
         if let Ok(mut child) = self.process.lock() {
-            let _ = child.kill();
-            let start = std::time::Instant::now();
-            while start.elapsed() < STDIO_DROP_WAIT_TOTAL {
-                match child.try_wait() {
-                    Ok(Some(_)) => return,
-                    Ok(None) => std::thread::sleep(STDIO_DROP_POLL_INTERVAL),
-                    Err(_) => return,
-                }
-            }
+            kill_and_reap_bounded(&mut child);
         }
     }
 }
@@ -437,15 +442,7 @@ impl StdioClient {
         let process = Arc::clone(&self.process);
         tokio::task::spawn_blocking(move || {
             if let Ok(mut child) = process.lock() {
-                let _ = child.kill();
-                let start = std::time::Instant::now();
-                while start.elapsed() < STDIO_DROP_WAIT_TOTAL {
-                    match child.try_wait() {
-                        Ok(Some(_)) => return,
-                        Ok(None) => std::thread::sleep(STDIO_DROP_POLL_INTERVAL),
-                        Err(_) => return,
-                    }
-                }
+                kill_and_reap_bounded(&mut child);
             }
         });
     }
diff --git a/src/repl/conversation/lifecycle.rs b/src/repl/conversation/lifecycle.rs
@@ -230,25 +230,10 @@ impl ConversationHistory {
         let crate::api::MessageContent::Blocks { content } = &mut last.content else {
             return false;
         };
-        let had_initiator = content.iter().any(|b| {
-            matches!(
-                b,
-                crate::api::MessageContentBlock::ToolUse { .. }
-                    | crate::api::MessageContentBlock::ServerToolUse { .. }
-                    | crate::api::MessageContentBlock::WebSearchToolResult { .. }
-            )
-        });
-        if !had_initiator {
+        if !content.iter().any(|b| b.is_tool_call_initiator()) {
             return false;
         }
-        content.retain(|b| {
-            !matches!(
-                b,
-                crate::api::MessageContentBlock::ToolUse { .. }
-                    | crate::api::MessageContentBlock::ServerToolUse { .. }
-                    | crate::api::MessageContentBlock::WebSearchToolResult { .. }
-            )
-        });
+        content.retain(|b| !b.is_tool_call_initiator());
         if content.is_empty() {
             content.push(crate::api::MessageContentBlock::Text {
                 text: "[Tool call interrupted before execution]".to_string(),
diff --git a/src/repl/response_handler.rs b/src/repl/response_handler.rs
@@ -156,14 +156,7 @@ impl ResponseHandler {
                     // matching `tool_result` (or a server tool result
                     // without its `server_tool_use`) puts the next
                     // request in a shape the provider will reject.
-                    message_blocks.retain(|block| {
-                        !matches!(
-                            block,
-                            crate::api::MessageContentBlock::ToolUse { .. }
-                                | crate::api::MessageContentBlock::ServerToolUse { .. }
-                                | crate::api::MessageContentBlock::WebSearchToolResult { .. }
-                        )
-                    });
+                    message_blocks.retain(|block| !block.is_tool_call_initiator());
                     if message_blocks.is_empty() {
                         // The truncated response was tool-use only. Record
                         // a short placeholder so the conversation keeps
@@ -697,14 +690,7 @@ impl ResponseHandler {
                     .content
                     .iter()
                     .map(crate::api::MessageContentBlock::from_content_block_for_api)
-                    .filter(|block| {
-                        !matches!(
-                            block,
-                            crate::api::MessageContentBlock::ToolUse { .. }
-                                | crate::api::MessageContentBlock::ServerToolUse { .. }
-                                | crate::api::MessageContentBlock::WebSearchToolResult { .. }
-                        )
-                    })
+                    .filter(|block| !block.is_tool_call_initiator())
                     .collect();
                 if !message_blocks.is_empty() {
                     self.conversation.add_assistant_with_blocks(message_blocks);
diff --git a/src/tools/filesystem.rs b/src/tools/filesystem.rs
@@ -99,6 +99,14 @@ fn write_atomic(path: &Path, content: &str) -> std::io::Result<()> {
     Ok(())
 }
 
+/// Windows error code returned by `MoveFile` when source and
+/// destination are on different volumes. The `winapi` crate exposes
+/// this as `ERROR_NOT_SAME_DEVICE`; the literal is used directly so
+/// the Unix build doesn't pull in a Windows-only dependency for one
+/// integer.
+#[cfg(windows)]
+const ERROR_NOT_SAME_DEVICE: i32 = 17;
+
 /// True when `e` describes a rename that crossed a filesystem
 /// boundary. Uses the stable `ErrorKind::CrossesDevices` mapping
 /// first; falls back to the platform-specific raw code so a future
@@ -113,8 +121,7 @@ fn is_cross_device_error(e: &std::io::Error) -> bool {
     }
     #[cfg(windows)]
     {
-        // ERROR_NOT_SAME_DEVICE
-        e.raw_os_error() == Some(17)
+        e.raw_os_error() == Some(ERROR_NOT_SAME_DEVICE)
     }
     #[cfg(not(any(unix, windows)))]
     {
diff --git a/src/ui/cost.rs b/src/ui/cost.rs
@@ -8,20 +8,9 @@ use colored::Colorize;
 /// record.
 const CACHE_READ_RATE: f64 = 0.10;
 /// Multiplier applied to the base input price for tokens written to a
-/// 5-minute Anthropic cache breakpoint. OpenAI has no separate
-/// creation charge (the wire format never reports cache-creation
-/// tokens for OpenAI requests), so the multiplier only fires on
-/// Anthropic responses.
-///
-/// NOTE: Anthropic also exposes a 1-hour TTL on the *last* tool
-/// definition breakpoint, billed at 2× the input rate rather than
-/// 1.25×. Sofos stamps that breakpoint with `ephemeral_one_hour`
-/// today but uses the same constant here, so cost summaries
-/// under-report the cache-creation premium on the 1-hour anchor by
-/// a roughly 40% margin. Weighting by an empirical 5m/1h mix factor
-/// would close the gap, but the absolute amount on a typical session
-/// is small enough that documenting the under-report is more useful
-/// than a model-specific lookup.
+/// 5-minute Anthropic cache breakpoint. OpenAI has no creation charge.
+/// The 1-hour breakpoint Anthropic exposes for the last tool definition
+/// bills at 2×, not 1.25× — the cost summary under-reports that anchor.
 const CACHE_CREATION_RATE: f64 = 1.25;
 
 /// True for models hosted by OpenAI. Used by the cost and

Original file line number	Diff line number	Diff line change
`@@ -99,6 +99,14 @@ fn write_atomic(path: &Path, content: &str) -> std::io::Result<()> {`
`99`	`99`	`Ok(())`
`100`	`100`	`}`
`101`	`101`
	`102`	+/// Windows error code returned by `MoveFile` when source and
	`103`	+/// destination are on different volumes. The `winapi` crate exposes
	`104`	+/// this as `ERROR_NOT_SAME_DEVICE`; the literal is used directly so
	`105`	`+/// the Unix build doesn't pull in a Windows-only dependency for one`
	`106`	`+/// integer.`
	`107`	`+#[cfg(windows)]`
	`108`	`+const ERROR_NOT_SAME_DEVICE: i32 = 17;`
	`109`	`+`
`102`	`110`	/// True when `e` describes a rename that crossed a filesystem
`103`	`111`	/// boundary. Uses the stable `ErrorKind::CrossesDevices` mapping
`104`	`112`	`/// first; falls back to the platform-specific raw code so a future`
`@@ -113,8 +121,7 @@ fn is_cross_device_error(e: &std::io::Error) -> bool {`
`113`	`121`	`}`
`114`	`122`	`#[cfg(windows)]`
`115`	`123`	`{`
`116`		`- // ERROR_NOT_SAME_DEVICE`
`117`		`- e.raw_os_error() == Some(17)`
	`124`	`+ e.raw_os_error() == Some(ERROR_NOT_SAME_DEVICE)`
`118`	`125`	`}`
`119`	`126`	`#[cfg(not(any(unix, windows)))]`
`120`	`127`	`{`