From 38a13213766164dcfc823e453c251fd6199f5b39 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 11 May 2026 17:09:28 +0000 Subject: [PATCH 1/5] perf(reader): trim allocation hotspots beyond serde_json::Value Six independent reader-module hot paths flagged in #345: - `stable_stringify` now writes JSON directly via a custom `Serializer` with sorted-key behavior, avoiding the intermediate `serde_json::Value` tree for every hash input. - `short_sha256` hex-encodes only the 8 bytes it keeps instead of encoding the full 64-char digest and truncating. - `measure_content_bytes` uses a counting `io::Write` shim instead of materializing the JSON string just to read `.len()`. For tool results this avoids allocating payload-sized strings. - `relationship_key` returns a borrowed tuple of `&str` for the `has_relationship` linear scan (zero allocs per comparison). The cross-line dedup `HashSet` is now keyed by an owned `RelationshipKey` tuple. The duplicate kind/rt match has been collapsed onto the existing `wire_str()` methods. - Tool-result counter increments at four sites in claude.rs / codex.rs use the `entry` API (one lookup, no clone) instead of get-then-insert. - `ProjectResolver::resolve` now holds the cache lock across `resolve_uncached` so concurrent callers with the same `cwd` only walk the filesystem once. Refs #345 https://claude.ai/code/session_01HWW8moqvoV2oEbAZ1RBC1C --- CHANGELOG.md | 11 + crates/relayburn-sdk/src/reader/claude.rs | 70 +-- crates/relayburn-sdk/src/reader/codex.rs | 10 +- crates/relayburn-sdk/src/reader/git.rs | 11 +- crates/relayburn-sdk/src/reader/hash.rs | 421 +++++++++++++++++-- crates/relayburn-sdk/src/reader/user_turn.rs | 30 +- 6 files changed, 469 insertions(+), 84 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 94e618ae..91303a84 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,17 @@ Cross-package release notes for relayburn. Package changelogs contain package-le ## [Unreleased] +### Changed + +- `relayburn-sdk`: reader allocation hotspots trimmed — `stable_stringify` now + writes JSON directly via a custom `Serializer` (no intermediate `Value` + tree), `measure_content_bytes` uses a counting writer instead of + materializing the JSON string, `short_sha256` hex-encodes only the 8 bytes + it keeps, relationship dedup uses an owned-tuple `HashSet` key, tool-result + counter increments use the `entry` API, and `ProjectResolver::resolve` + holds the cache lock across `resolve_uncached` so concurrent callers don't + duplicate the filesystem walk. (#345) + ## [2.8.3] - 2026-05-11 ### Changed diff --git a/crates/relayburn-sdk/src/reader/claude.rs b/crates/relayburn-sdk/src/reader/claude.rs index 2a6d84c3..8b35c1dc 100644 --- a/crates/relayburn-sdk/src/reader/claude.rs +++ b/crates/relayburn-sdk/src/reader/claude.rs @@ -258,7 +258,7 @@ struct ParseState { next_event_index: u64, relationships: Vec, seen_root_session_ids: HashSet, - seen_explicit_relationship_ids: HashSet, + seen_explicit_relationship_ids: HashSet, file_session_id: Option, evidence: ClaudeRelationshipEvidence, } @@ -1379,8 +1379,9 @@ fn collect_tool_result_events( Some(s) if !s.is_empty() => s.to_string(), _ => continue, }; - let call_index = *counters.get(&tu).unwrap_or(&0); - counters.insert(tu.clone(), call_index + 1); + let entry = counters.entry(tu.clone()).or_insert(0); + let call_index = *entry; + *entry += 1; let is_error = bo.get("is_error").and_then(Value::as_bool) == Some(true); let mut record = ToolResultEventRecord { v: 1, @@ -1478,8 +1479,9 @@ fn build_claude_system_tool_result_event( if agent_id.is_none() && subagent_session_id.is_none() { return None; } - let call_index = *counters.get(&tool_use_id).unwrap_or(&0); - counters.insert(tool_use_id.clone(), call_index + 1); + let entry = counters.entry(tool_use_id.clone()).or_insert(0); + let call_index = *entry; + *entry += 1; let status = claude_system_event_status(line); let mut record = ToolResultEventRecord { v: 1, @@ -1788,7 +1790,7 @@ fn collect_explicit_claude_relationships( line: &serde_json::Map, evidence: &mut ClaudeRelationshipEvidence, out: &mut Vec, - seen: &mut HashSet, + seen: &mut HashSet, session_id: &str, fallback_ts: Option<&str>, ) { @@ -1897,38 +1899,40 @@ fn append_unique(values: Option>, value: String) -> Vec { v } -fn relationship_key(row: &SessionRelationshipRecord) -> String { - let source = match row.source { - RelationshipSourceKind::ClaudeCode => "claude-code", - RelationshipSourceKind::Codex => "codex", - RelationshipSourceKind::Opencode => "opencode", - RelationshipSourceKind::AnthropicApi => "anthropic-api", - RelationshipSourceKind::OpenaiApi => "openai-api", - RelationshipSourceKind::GeminiApi => "gemini-api", - RelationshipSourceKind::SpawnEnv => "spawn-env", - RelationshipSourceKind::NativeClaude => "native-claude", - RelationshipSourceKind::NativeOpencode => "native-opencode", - }; - let rt = match row.relationship_type { - RelationshipType::Root => "root", - RelationshipType::Continuation => "continuation", - RelationshipType::Fork => "fork", - RelationshipType::Subagent => "subagent", - }; - format!( - "{}|{}|{}|{}|{}|{}", - source, - row.session_id, - rt, +/// Owned, hashable identity for a relationship row. Used as a `HashSet` key +/// for cross-line dedup; cheap because the original `relationship_key` did one +/// `format!`-driven allocation per call but had to be re-run for every +/// candidate during `has_relationship`. +type RelationshipKey = ( + &'static str, + String, + &'static str, + String, + String, + String, +); + +fn relationship_key_borrowed<'a>( + row: &'a SessionRelationshipRecord, +) -> (&'static str, &'a str, &'static str, &'a str, &'a str, &'a str) { + ( + row.source.wire_str(), + row.session_id.as_str(), + row.relationship_type.wire_str(), row.related_session_id.as_deref().unwrap_or(""), row.agent_id.as_deref().unwrap_or(""), row.parent_tool_use_id.as_deref().unwrap_or(""), ) } +fn relationship_key(row: &SessionRelationshipRecord) -> RelationshipKey { + let b = relationship_key_borrowed(row); + (b.0, b.1.to_string(), b.2, b.3.to_string(), b.4.to_string(), b.5.to_string()) +} + fn has_relationship(rows: &[SessionRelationshipRecord], row: &SessionRelationshipRecord) -> bool { - let key = relationship_key(row); - rows.iter().any(|r| relationship_key(r) == key) + let key = relationship_key_borrowed(row); + rows.iter().any(|r| relationship_key_borrowed(r) == key) } fn collect_subagent_relationships(turns: &[TurnRecord], out: &mut Vec) { @@ -2426,7 +2430,7 @@ fn collect_explicit_claude_relationships_incremental( line: &serde_json::Map, evidence: &mut ClaudeRelationshipEvidence, out: &mut Vec<(u64, SessionRelationshipRecord)>, - seen: &mut HashSet, + seen: &mut HashSet, session_id: &str, fallback_ts: Option<&str>, line_offset: u64, @@ -2513,7 +2517,7 @@ fn run_incremental( let mut pending_relationships: Vec<(u64, SessionRelationshipRecord)> = Vec::new(); let mut pending_user_turns: Vec<(u64, UserTurnRecord)> = Vec::new(); let mut seen_root_session_ids: HashSet = HashSet::new(); - let mut seen_explicit_relationship_ids: HashSet = HashSet::new(); + let mut seen_explicit_relationship_ids: HashSet = HashSet::new(); let mut pending_user_turn_inc_idx: Option = None; let mut file = File::open(path)?; diff --git a/crates/relayburn-sdk/src/reader/codex.rs b/crates/relayburn-sdk/src/reader/codex.rs index 7061874e..bbc8c44d 100644 --- a/crates/relayburn-sdk/src/reader/codex.rs +++ b/crates/relayburn-sdk/src/reader/codex.rs @@ -715,8 +715,9 @@ fn parse_codex_buffer( Some(c) if !c.is_empty() => c.to_string(), _ => continue, }; - let call_index = *tool_result_counters.get(&call_id).unwrap_or(&0); - tool_result_counters.insert(call_id.clone(), call_index + 1); + let entry = tool_result_counters.entry(call_id.clone()).or_insert(0); + let call_index = *entry; + *entry += 1; let status = subagent_notification_status(payload); let mut ev = ToolResultEventRecord { v: 1, @@ -879,8 +880,9 @@ fn parse_codex_buffer( if user_turn_slot.ts.is_empty() && !item_ts.is_empty() { user_turn_slot.ts = item_ts.to_string(); } - let call_index = *tool_result_counters.get(&call_id).unwrap_or(&0); - tool_result_counters.insert(call_id.clone(), call_index + 1); + let entry = tool_result_counters.entry(call_id.clone()).or_insert(0); + let call_index = *entry; + *entry += 1; let initial_status = if open_turn .as_ref() .map(|o| o.errored_call_ids.contains(&call_id)) diff --git a/crates/relayburn-sdk/src/reader/git.rs b/crates/relayburn-sdk/src/reader/git.rs index 8ec32fe0..532a7f2b 100644 --- a/crates/relayburn-sdk/src/reader/git.rs +++ b/crates/relayburn-sdk/src/reader/git.rs @@ -40,16 +40,15 @@ impl ProjectResolver { } /// Resolve a project for `cwd`, consulting (and populating) the cache. + /// Holds the cache lock across `resolve_uncached` so concurrent callers + /// with the same `cwd` only do the filesystem walk once. pub fn resolve(&self, cwd: &str) -> ResolvedProject { - if let Some(hit) = self.cache.lock().unwrap().get(cwd) { + let mut cache = self.cache.lock().unwrap(); + if let Some(hit) = cache.get(cwd) { return hit.clone(); } let result = resolve_uncached(cwd); - self.cache - .lock() - .unwrap() - .entry(cwd.to_string()) - .or_insert_with(|| result.clone()); + cache.insert(cwd.to_string(), result.clone()); result } diff --git a/crates/relayburn-sdk/src/reader/hash.rs b/crates/relayburn-sdk/src/reader/hash.rs index f963962c..4df3bb41 100644 --- a/crates/relayburn-sdk/src/reader/hash.rs +++ b/crates/relayburn-sdk/src/reader/hash.rs @@ -8,53 +8,23 @@ //! [`args_hash`] / [`content_hash`] mirrors the TS slice so detector output //! stays visually consistent. -use serde::Serialize; -use serde_json::Value; +use std::fmt; + +use serde::ser::{ + self, Serialize, SerializeMap, SerializeSeq, SerializeStruct, SerializeStructVariant, + SerializeTuple, SerializeTupleStruct, SerializeTupleVariant, Serializer, +}; use sha2::{Digest, Sha256}; /// Stable JSON stringification: object keys are sorted, arrays keep order, /// primitives serialize the way `serde_json` does. Mirrors the TS /// `stableStringify` so hash inputs are byte-identical across the two ports. pub fn stable_stringify(value: &T) -> String { - let v = serde_json::to_value(value).unwrap_or(Value::Null); let mut out = String::new(); - write_stable(&v, &mut out); + let _ = value.serialize(StableSerializer { out: &mut out }); out } -fn write_stable(value: &Value, out: &mut String) { - match value { - Value::Null => out.push_str("null"), - Value::Bool(b) => out.push_str(if *b { "true" } else { "false" }), - Value::Number(n) => out.push_str(&n.to_string()), - Value::String(s) => out.push_str(&serde_json::to_string(s).unwrap()), - Value::Array(arr) => { - out.push('['); - for (i, v) in arr.iter().enumerate() { - if i > 0 { - out.push(','); - } - write_stable(v, out); - } - out.push(']'); - } - Value::Object(obj) => { - let mut keys: Vec<&String> = obj.keys().collect(); - keys.sort(); - out.push('{'); - for (i, k) in keys.iter().enumerate() { - if i > 0 { - out.push(','); - } - out.push_str(&serde_json::to_string(k).unwrap()); - out.push(':'); - write_stable(&obj[*k], out); - } - out.push('}'); - } - } -} - /// Short hash of any serializable value. Mirrors TS `argsHash`: sha256 over /// [`stable_stringify`], hex-encoded, truncated to 16 chars. pub fn args_hash(input: &T) -> String { @@ -70,9 +40,366 @@ pub fn content_hash(s: impl AsRef<[u8]>) -> String { fn short_sha256(bytes: &[u8]) -> String { let mut hasher = Sha256::new(); hasher.update(bytes); - let mut hex = hex::encode(hasher.finalize()); - hex.truncate(16); - hex + hex::encode(&hasher.finalize()[..8]) +} + +#[derive(Debug)] +struct StableError(String); + +impl fmt::Display for StableError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(&self.0) + } +} + +impl std::error::Error for StableError {} + +impl ser::Error for StableError { + fn custom(msg: T) -> Self { + Self(msg.to_string()) + } +} + +/// Custom serializer that writes JSON directly into a `String` buffer with +/// sorted object keys. Avoids materializing an intermediate `serde_json::Value` +/// tree for every hash input. Primitive formatting (numbers, escaped strings) +/// is delegated to `serde_json` so the output is byte-identical to a Value +/// roundtrip. +struct StableSerializer<'a> { + out: &'a mut String, +} + +fn write_primitive(out: &mut String, value: &T) -> Result<(), StableError> { + let s = serde_json::to_string(value).map_err(|e| StableError(e.to_string()))?; + out.push_str(&s); + Ok(()) +} + +impl<'a> Serializer for StableSerializer<'a> { + type Ok = (); + type Error = StableError; + type SerializeSeq = StableSeq<'a>; + type SerializeTuple = StableSeq<'a>; + type SerializeTupleStruct = StableSeq<'a>; + type SerializeTupleVariant = StableTupleVariant<'a>; + type SerializeMap = StableMap<'a>; + type SerializeStruct = StableMap<'a>; + type SerializeStructVariant = StableStructVariant<'a>; + + fn serialize_bool(self, v: bool) -> Result<(), StableError> { + self.out.push_str(if v { "true" } else { "false" }); + Ok(()) + } + fn serialize_i8(self, v: i8) -> Result<(), StableError> { + write_primitive(self.out, &v) + } + fn serialize_i16(self, v: i16) -> Result<(), StableError> { + write_primitive(self.out, &v) + } + fn serialize_i32(self, v: i32) -> Result<(), StableError> { + write_primitive(self.out, &v) + } + fn serialize_i64(self, v: i64) -> Result<(), StableError> { + write_primitive(self.out, &v) + } + fn serialize_i128(self, v: i128) -> Result<(), StableError> { + write_primitive(self.out, &v) + } + fn serialize_u8(self, v: u8) -> Result<(), StableError> { + write_primitive(self.out, &v) + } + fn serialize_u16(self, v: u16) -> Result<(), StableError> { + write_primitive(self.out, &v) + } + fn serialize_u32(self, v: u32) -> Result<(), StableError> { + write_primitive(self.out, &v) + } + fn serialize_u64(self, v: u64) -> Result<(), StableError> { + write_primitive(self.out, &v) + } + fn serialize_u128(self, v: u128) -> Result<(), StableError> { + write_primitive(self.out, &v) + } + fn serialize_f32(self, v: f32) -> Result<(), StableError> { + write_primitive(self.out, &v) + } + fn serialize_f64(self, v: f64) -> Result<(), StableError> { + write_primitive(self.out, &v) + } + fn serialize_char(self, v: char) -> Result<(), StableError> { + write_primitive(self.out, &v) + } + fn serialize_str(self, v: &str) -> Result<(), StableError> { + write_primitive(self.out, v) + } + fn serialize_bytes(self, v: &[u8]) -> Result<(), StableError> { + write_primitive(self.out, v) + } + fn serialize_none(self) -> Result<(), StableError> { + self.out.push_str("null"); + Ok(()) + } + fn serialize_some(self, v: &T) -> Result<(), StableError> { + v.serialize(self) + } + fn serialize_unit(self) -> Result<(), StableError> { + self.out.push_str("null"); + Ok(()) + } + fn serialize_unit_struct(self, _: &'static str) -> Result<(), StableError> { + self.out.push_str("null"); + Ok(()) + } + fn serialize_unit_variant( + self, + _: &'static str, + _: u32, + variant: &'static str, + ) -> Result<(), StableError> { + write_primitive(self.out, variant) + } + fn serialize_newtype_struct( + self, + _: &'static str, + v: &T, + ) -> Result<(), StableError> { + v.serialize(self) + } + fn serialize_newtype_variant( + self, + _: &'static str, + _: u32, + variant: &'static str, + v: &T, + ) -> Result<(), StableError> { + self.out.push('{'); + write_primitive(self.out, variant)?; + self.out.push(':'); + v.serialize(StableSerializer { out: self.out })?; + self.out.push('}'); + Ok(()) + } + fn serialize_seq(self, _: Option) -> Result, StableError> { + self.out.push('['); + Ok(StableSeq { + out: self.out, + first: true, + }) + } + fn serialize_tuple(self, len: usize) -> Result, StableError> { + self.serialize_seq(Some(len)) + } + fn serialize_tuple_struct( + self, + _: &'static str, + len: usize, + ) -> Result, StableError> { + self.serialize_seq(Some(len)) + } + fn serialize_tuple_variant( + self, + _: &'static str, + _: u32, + variant: &'static str, + _: usize, + ) -> Result, StableError> { + self.out.push('{'); + write_primitive(self.out, variant)?; + self.out.push(':'); + self.out.push('['); + Ok(StableTupleVariant { + out: self.out, + first: true, + }) + } + fn serialize_map(self, _: Option) -> Result, StableError> { + Ok(StableMap { + out: self.out, + entries: Vec::new(), + current_key: None, + }) + } + fn serialize_struct( + self, + _: &'static str, + _: usize, + ) -> Result, StableError> { + Ok(StableMap { + out: self.out, + entries: Vec::new(), + current_key: None, + }) + } + fn serialize_struct_variant( + self, + _: &'static str, + _: u32, + variant: &'static str, + _: usize, + ) -> Result, StableError> { + self.out.push('{'); + write_primitive(self.out, variant)?; + self.out.push(':'); + Ok(StableStructVariant { + out: self.out, + entries: Vec::new(), + }) + } +} + +struct StableSeq<'a> { + out: &'a mut String, + first: bool, +} + +impl<'a> SerializeSeq for StableSeq<'a> { + type Ok = (); + type Error = StableError; + fn serialize_element(&mut self, v: &T) -> Result<(), StableError> { + if !self.first { + self.out.push(','); + } + self.first = false; + v.serialize(StableSerializer { out: self.out }) + } + fn end(self) -> Result<(), StableError> { + self.out.push(']'); + Ok(()) + } +} + +impl<'a> SerializeTuple for StableSeq<'a> { + type Ok = (); + type Error = StableError; + fn serialize_element(&mut self, v: &T) -> Result<(), StableError> { + SerializeSeq::serialize_element(self, v) + } + fn end(self) -> Result<(), StableError> { + SerializeSeq::end(self) + } +} + +impl<'a> SerializeTupleStruct for StableSeq<'a> { + type Ok = (); + type Error = StableError; + fn serialize_field(&mut self, v: &T) -> Result<(), StableError> { + SerializeSeq::serialize_element(self, v) + } + fn end(self) -> Result<(), StableError> { + SerializeSeq::end(self) + } +} + +struct StableTupleVariant<'a> { + out: &'a mut String, + first: bool, +} + +impl<'a> SerializeTupleVariant for StableTupleVariant<'a> { + type Ok = (); + type Error = StableError; + fn serialize_field(&mut self, v: &T) -> Result<(), StableError> { + if !self.first { + self.out.push(','); + } + self.first = false; + v.serialize(StableSerializer { out: self.out }) + } + fn end(self) -> Result<(), StableError> { + self.out.push_str("]}"); + Ok(()) + } +} + +struct StableMap<'a> { + out: &'a mut String, + entries: Vec<(String, String)>, + current_key: Option, +} + +fn finalize_object(out: &mut String, entries: &mut [(String, String)]) { + entries.sort_by(|a, b| a.0.cmp(&b.0)); + out.push('{'); + for (i, (k, v)) in entries.iter().enumerate() { + if i > 0 { + out.push(','); + } + out.push_str(k); + out.push(':'); + out.push_str(v); + } + out.push('}'); +} + +impl<'a> SerializeMap for StableMap<'a> { + type Ok = (); + type Error = StableError; + fn serialize_key(&mut self, key: &T) -> Result<(), StableError> { + let mut buf = String::new(); + key.serialize(StableSerializer { out: &mut buf })?; + self.current_key = Some(buf); + Ok(()) + } + fn serialize_value(&mut self, value: &T) -> Result<(), StableError> { + let mut buf = String::new(); + value.serialize(StableSerializer { out: &mut buf })?; + let k = self + .current_key + .take() + .ok_or_else(|| StableError("value before key".into()))?; + self.entries.push((k, buf)); + Ok(()) + } + fn end(mut self) -> Result<(), StableError> { + finalize_object(self.out, &mut self.entries); + Ok(()) + } +} + +impl<'a> SerializeStruct for StableMap<'a> { + type Ok = (); + type Error = StableError; + fn serialize_field( + &mut self, + key: &'static str, + value: &T, + ) -> Result<(), StableError> { + let mut vbuf = String::new(); + value.serialize(StableSerializer { out: &mut vbuf })?; + let kbuf = serde_json::to_string(key).map_err(|e| StableError(e.to_string()))?; + self.entries.push((kbuf, vbuf)); + Ok(()) + } + fn end(mut self) -> Result<(), StableError> { + finalize_object(self.out, &mut self.entries); + Ok(()) + } +} + +struct StableStructVariant<'a> { + out: &'a mut String, + entries: Vec<(String, String)>, +} + +impl<'a> SerializeStructVariant for StableStructVariant<'a> { + type Ok = (); + type Error = StableError; + fn serialize_field( + &mut self, + key: &'static str, + value: &T, + ) -> Result<(), StableError> { + let mut vbuf = String::new(); + value.serialize(StableSerializer { out: &mut vbuf })?; + let kbuf = serde_json::to_string(key).map_err(|e| StableError(e.to_string()))?; + self.entries.push((kbuf, vbuf)); + Ok(()) + } + fn end(mut self) -> Result<(), StableError> { + finalize_object(self.out, &mut self.entries); + self.out.push('}'); + Ok(()) + } } #[cfg(test)] @@ -115,6 +442,20 @@ mod tests { assert_eq!(s, r#"{"a":1,"b":"two"}"#); } + #[test] + fn stable_stringify_sorts_struct_fields() { + #[derive(serde::Serialize)] + struct Args { + zebra: u32, + apple: u32, + } + let s = stable_stringify(&Args { + zebra: 1, + apple: 2, + }); + assert_eq!(s, r#"{"apple":2,"zebra":1}"#); + } + #[test] fn args_hash_is_16_chars_hex() { let h = args_hash(&json!({ "command": "ls", "cwd": "/tmp" })); diff --git a/crates/relayburn-sdk/src/reader/user_turn.rs b/crates/relayburn-sdk/src/reader/user_turn.rs index 616a3e85..5ea4ce80 100644 --- a/crates/relayburn-sdk/src/reader/user_turn.rs +++ b/crates/relayburn-sdk/src/reader/user_turn.rs @@ -79,7 +79,35 @@ impl UserTurnBlock { } pub fn measure_content_bytes(content: &Value) -> u64 { - stringify_measured_content(content).len() as u64 + match content { + Value::Null => 0, + Value::String(s) => s.len() as u64, + other => { + // Counting writer: tallies bytes serde_json would emit without + // materializing the JSON string. For tool results this avoids + // allocating an entire payload-sized String just to read `.len()`. + let mut counter = ByteCountWriter::default(); + if serde_json::to_writer(&mut counter, other).is_err() { + return 0; + } + counter.count + } + } +} + +#[derive(Default)] +struct ByteCountWriter { + count: u64, +} + +impl std::io::Write for ByteCountWriter { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + self.count += buf.len() as u64; + Ok(buf.len()) + } + fn flush(&mut self) -> std::io::Result<()> { + Ok(()) + } } pub fn stringify_measured_content(content: &Value) -> String { From 0557c8c34c7b6835194f8fcfa0afe1c471219261 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 11 May 2026 17:48:53 +0000 Subject: [PATCH 2/5] fix(reader): match serde_json key handling in stable_stringify Two regressions in the custom Serializer flagged in PR review: 1. Sort map keys by raw string value, not their JSON-encoded form. Encoding-time keys diverge when keys contain characters JSON escapes (control chars, quotes); the previous Value-roundtrip path sorted on raw `String`s, and the cross-port hash contract requires the same ordering. 2. Coerce non-string map keys to their string form (or error), mirroring serde_json's MapKeySerializer. Previously a HashMap emitted invalid `{1:...}`; now it emits `{"1":...}` as serde_json's Value-roundtrip path would. Also trim the CHANGELOG entry to impact-only per project guidance. Refs #345 https://claude.ai/code/session_01HWW8moqvoV2oEbAZ1RBC1C --- CHANGELOG.md | 11 +- crates/relayburn-sdk/src/reader/hash.rs | 191 +++++++++++++++++++++++- 2 files changed, 186 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 91303a84..03ef3cda 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,14 +6,9 @@ Cross-package release notes for relayburn. Package changelogs contain package-le ### Changed -- `relayburn-sdk`: reader allocation hotspots trimmed — `stable_stringify` now - writes JSON directly via a custom `Serializer` (no intermediate `Value` - tree), `measure_content_bytes` uses a counting writer instead of - materializing the JSON string, `short_sha256` hex-encodes only the 8 bytes - it keeps, relationship dedup uses an owned-tuple `HashSet` key, tool-result - counter increments use the `entry` API, and `ProjectResolver::resolve` - holds the cache lock across `resolve_uncached` so concurrent callers don't - duplicate the filesystem walk. (#345) +- `relayburn-sdk`: lower per-record allocations in reader hashing, tool-result + sizing, relationship dedup, and project resolution. Cuts overhead during + large session imports and concurrent `resolve_project` calls. ## [2.8.3] - 2026-05-11 diff --git a/crates/relayburn-sdk/src/reader/hash.rs b/crates/relayburn-sdk/src/reader/hash.rs index 4df3bb41..76be91c3 100644 --- a/crates/relayburn-sdk/src/reader/hash.rs +++ b/crates/relayburn-sdk/src/reader/hash.rs @@ -318,13 +318,20 @@ struct StableMap<'a> { } fn finalize_object(out: &mut String, entries: &mut [(String, String)]) { + // Sort by the raw (unescaped) key so the order matches serde_json's + // BTreeMap-backed Value path. Sorting by JSON-encoded keys diverges for + // keys containing characters that JSON escapes (control chars, quotes). entries.sort_by(|a, b| a.0.cmp(&b.0)); out.push('{'); for (i, (k, v)) in entries.iter().enumerate() { if i > 0 { out.push(','); } - out.push_str(k); + // JSON-encode the key at write time. Using `to_string` here matches + // `serde_json::to_string` for string values — i.e. the exact same + // escaping the previous `Value::String(s) => to_string(s)` path used. + let encoded = serde_json::to_string(k).unwrap_or_else(|_| String::from("\"\"")); + out.push_str(&encoded); out.push(':'); out.push_str(v); } @@ -335,9 +342,11 @@ impl<'a> SerializeMap for StableMap<'a> { type Ok = (); type Error = StableError; fn serialize_key(&mut self, key: &T) -> Result<(), StableError> { - let mut buf = String::new(); - key.serialize(StableSerializer { out: &mut buf })?; - self.current_key = Some(buf); + // JSON requires object keys to be strings. Mirror `serde_json`'s + // `MapKeySerializer`: accept &str, coerce primitive numeric/bool keys + // to their string form, reject anything else. + let raw = key.serialize(MapKeyCollector)?; + self.current_key = Some(raw); Ok(()) } fn serialize_value(&mut self, value: &T) -> Result<(), StableError> { @@ -366,8 +375,9 @@ impl<'a> SerializeStruct for StableMap<'a> { ) -> Result<(), StableError> { let mut vbuf = String::new(); value.serialize(StableSerializer { out: &mut vbuf })?; - let kbuf = serde_json::to_string(key).map_err(|e| StableError(e.to_string()))?; - self.entries.push((kbuf, vbuf)); + // Store the raw key; `finalize_object` JSON-encodes it. Lets us sort + // by raw value to match the previous Value-roundtrip path. + self.entries.push((key.to_string(), vbuf)); Ok(()) } fn end(mut self) -> Result<(), StableError> { @@ -391,8 +401,7 @@ impl<'a> SerializeStructVariant for StableStructVariant<'a> { ) -> Result<(), StableError> { let mut vbuf = String::new(); value.serialize(StableSerializer { out: &mut vbuf })?; - let kbuf = serde_json::to_string(key).map_err(|e| StableError(e.to_string()))?; - self.entries.push((kbuf, vbuf)); + self.entries.push((key.to_string(), vbuf)); Ok(()) } fn end(mut self) -> Result<(), StableError> { @@ -402,6 +411,151 @@ impl<'a> SerializeStructVariant for StableStructVariant<'a> { } } +/// Serializer for map keys. JSON requires object keys to be strings, so this +/// mirrors `serde_json`'s `MapKeySerializer`: accept strings as-is, coerce +/// primitive numeric/bool keys to their string form, error on composite types. +/// Returns the raw key string (without JSON quoting/escaping) so callers can +/// sort by raw value and encode at write time. +struct MapKeyCollector; + +impl Serializer for MapKeyCollector { + type Ok = String; + type Error = StableError; + type SerializeSeq = ser::Impossible; + type SerializeTuple = ser::Impossible; + type SerializeTupleStruct = ser::Impossible; + type SerializeTupleVariant = ser::Impossible; + type SerializeMap = ser::Impossible; + type SerializeStruct = ser::Impossible; + type SerializeStructVariant = ser::Impossible; + + fn serialize_str(self, v: &str) -> Result { + Ok(v.to_string()) + } + fn serialize_char(self, v: char) -> Result { + Ok(v.to_string()) + } + fn serialize_bool(self, v: bool) -> Result { + Ok(if v { "true".into() } else { "false".into() }) + } + fn serialize_i8(self, v: i8) -> Result { + Ok(v.to_string()) + } + fn serialize_i16(self, v: i16) -> Result { + Ok(v.to_string()) + } + fn serialize_i32(self, v: i32) -> Result { + Ok(v.to_string()) + } + fn serialize_i64(self, v: i64) -> Result { + Ok(v.to_string()) + } + fn serialize_i128(self, v: i128) -> Result { + Ok(v.to_string()) + } + fn serialize_u8(self, v: u8) -> Result { + Ok(v.to_string()) + } + fn serialize_u16(self, v: u16) -> Result { + Ok(v.to_string()) + } + fn serialize_u32(self, v: u32) -> Result { + Ok(v.to_string()) + } + fn serialize_u64(self, v: u64) -> Result { + Ok(v.to_string()) + } + fn serialize_u128(self, v: u128) -> Result { + Ok(v.to_string()) + } + fn serialize_f32(self, _: f32) -> Result { + Err(StableError("float map key not supported".into())) + } + fn serialize_f64(self, _: f64) -> Result { + Err(StableError("float map key not supported".into())) + } + fn serialize_bytes(self, _: &[u8]) -> Result { + Err(StableError("bytes map key not supported".into())) + } + fn serialize_none(self) -> Result { + Err(StableError("map key must be a string".into())) + } + fn serialize_some(self, v: &T) -> Result { + v.serialize(self) + } + fn serialize_unit(self) -> Result { + Err(StableError("map key must be a string".into())) + } + fn serialize_unit_struct(self, _: &'static str) -> Result { + Err(StableError("map key must be a string".into())) + } + fn serialize_unit_variant( + self, + _: &'static str, + _: u32, + variant: &'static str, + ) -> Result { + Ok(variant.to_string()) + } + fn serialize_newtype_struct( + self, + _: &'static str, + v: &T, + ) -> Result { + v.serialize(self) + } + fn serialize_newtype_variant( + self, + _: &'static str, + _: u32, + _: &'static str, + _: &T, + ) -> Result { + Err(StableError("map key must be a string".into())) + } + fn serialize_seq(self, _: Option) -> Result { + Err(StableError("map key must be a string".into())) + } + fn serialize_tuple(self, _: usize) -> Result { + Err(StableError("map key must be a string".into())) + } + fn serialize_tuple_struct( + self, + _: &'static str, + _: usize, + ) -> Result { + Err(StableError("map key must be a string".into())) + } + fn serialize_tuple_variant( + self, + _: &'static str, + _: u32, + _: &'static str, + _: usize, + ) -> Result { + Err(StableError("map key must be a string".into())) + } + fn serialize_map(self, _: Option) -> Result { + Err(StableError("map key must be a string".into())) + } + fn serialize_struct( + self, + _: &'static str, + _: usize, + ) -> Result { + Err(StableError("map key must be a string".into())) + } + fn serialize_struct_variant( + self, + _: &'static str, + _: u32, + _: &'static str, + _: usize, + ) -> Result { + Err(StableError("map key must be a string".into())) + } +} + #[cfg(test)] mod tests { use super::*; @@ -456,6 +610,27 @@ mod tests { assert_eq!(s, r#"{"apple":2,"zebra":1}"#); } + #[test] + fn stable_stringify_sorts_keys_by_raw_value_not_json_encoded() { + // `!` is raw 0x21; `\n` is raw 0x0a. Raw sort: \n < !. JSON-encoded + // sort (after the surrounding quote): `\\` (0x5c) > `!` (0x21), so + // encoded order would be !, \n — the opposite. The previous + // Value-roundtrip impl sorted by raw value; assert we still do. + let v = json!({ "!": 1, "\n": 2 }); + assert_eq!(stable_stringify(&v), "{\"\\n\":2,\"!\":1}"); + } + + #[test] + fn stable_stringify_coerces_numeric_map_keys_to_strings() { + use std::collections::BTreeMap; + let mut m: BTreeMap = BTreeMap::new(); + m.insert(10, 1); + m.insert(2, 2); + // Numeric keys get coerced to their string form, like `serde_json`. + // Sorted by raw string value, so "10" < "2" lexicographically. + assert_eq!(stable_stringify(&m), r#"{"10":1,"2":2}"#); + } + #[test] fn args_hash_is_16_chars_hex() { let h = args_hash(&json!({ "command": "ls", "cwd": "/tmp" })); From 886eca5c8b5718dbadc9d67aa41084a62f593522 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 11 May 2026 17:54:50 +0000 Subject: [PATCH 3/5] fix(reader): preallocate stable_stringify buffers and surface failures - Use the `Option` / `usize` length hints in `serialize_map`, `serialize_struct`, and `serialize_struct_variant` to seed `Vec::with_capacity`, avoiding regrow churn for buffered fields on larger objects. - Propagate the inner serializer result via `.expect`. Previously a rejected map key or non-finite float would silently truncate the hash input instead of failing; now it panics like the missing-key path. https://claude.ai/code/session_01HWW8moqvoV2oEbAZ1RBC1C --- crates/relayburn-sdk/src/reader/hash.rs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/crates/relayburn-sdk/src/reader/hash.rs b/crates/relayburn-sdk/src/reader/hash.rs index 76be91c3..1a773530 100644 --- a/crates/relayburn-sdk/src/reader/hash.rs +++ b/crates/relayburn-sdk/src/reader/hash.rs @@ -21,7 +21,9 @@ use sha2::{Digest, Sha256}; /// `stableStringify` so hash inputs are byte-identical across the two ports. pub fn stable_stringify(value: &T) -> String { let mut out = String::new(); - let _ = value.serialize(StableSerializer { out: &mut out }); + value + .serialize(StableSerializer { out: &mut out }) + .expect("stable_stringify only supports JSON-serializable values"); out } @@ -212,21 +214,21 @@ impl<'a> Serializer for StableSerializer<'a> { first: true, }) } - fn serialize_map(self, _: Option) -> Result, StableError> { + fn serialize_map(self, len: Option) -> Result, StableError> { Ok(StableMap { out: self.out, - entries: Vec::new(), + entries: Vec::with_capacity(len.unwrap_or(0)), current_key: None, }) } fn serialize_struct( self, _: &'static str, - _: usize, + len: usize, ) -> Result, StableError> { Ok(StableMap { out: self.out, - entries: Vec::new(), + entries: Vec::with_capacity(len), current_key: None, }) } @@ -235,14 +237,14 @@ impl<'a> Serializer for StableSerializer<'a> { _: &'static str, _: u32, variant: &'static str, - _: usize, + len: usize, ) -> Result, StableError> { self.out.push('{'); write_primitive(self.out, variant)?; self.out.push(':'); Ok(StableStructVariant { out: self.out, - entries: Vec::new(), + entries: Vec::with_capacity(len), }) } } From 52e6c3f5a359bd81e4ffff8e4a6f478eff2976af Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 11 May 2026 17:58:12 +0000 Subject: [PATCH 4/5] fix(reader): expect-on-impossible-error in stable_stringify key encoding `serde_json::to_string` on a `String` cannot fail in practice, so the `unwrap_or_else(|_| "\"\"")` fallback was both dead and a silent corruption hatch. Switch to `.expect` to match the error-surfacing posture in the rest of the file. https://claude.ai/code/session_01HWW8moqvoV2oEbAZ1RBC1C --- crates/relayburn-sdk/src/reader/hash.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/relayburn-sdk/src/reader/hash.rs b/crates/relayburn-sdk/src/reader/hash.rs index 1a773530..ff4ed269 100644 --- a/crates/relayburn-sdk/src/reader/hash.rs +++ b/crates/relayburn-sdk/src/reader/hash.rs @@ -332,7 +332,7 @@ fn finalize_object(out: &mut String, entries: &mut [(String, String)]) { // JSON-encode the key at write time. Using `to_string` here matches // `serde_json::to_string` for string values — i.e. the exact same // escaping the previous `Value::String(s) => to_string(s)` path used. - let encoded = serde_json::to_string(k).unwrap_or_else(|_| String::from("\"\"")); + let encoded = serde_json::to_string(k).expect("string is always JSON-encodable"); out.push_str(&encoded); out.push(':'); out.push_str(v); From 60643c44b9bf81ecda9b3a84e3b2e52b11bece4d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 11 May 2026 18:25:47 +0000 Subject: [PATCH 5/5] test(reader): cover incremental parse result conversion Agent-Logs-Url: https://github.com/AgentWorkforce/burn/sessions/d35c510c-543f-4b3a-9304-c4b56c942c3c Co-authored-by: willwashburn <957608+willwashburn@users.noreply.github.com> --- crates/relayburn-sdk/src/reader/claude.rs | 178 ++++++++++++++++++++++ 1 file changed, 178 insertions(+) diff --git a/crates/relayburn-sdk/src/reader/claude.rs b/crates/relayburn-sdk/src/reader/claude.rs index b501cac4..232b934c 100644 --- a/crates/relayburn-sdk/src/reader/claude.rs +++ b/crates/relayburn-sdk/src/reader/claude.rs @@ -2603,6 +2603,184 @@ mod tests { .join(name) } + #[test] + fn parse_result_from_incremental_result_copies_all_fields() { + let turn = TurnRecord { + v: 1, + source: SourceKind::ClaudeCode, + session_id: "session-1".to_string(), + session_path: Some("/tmp/session.jsonl".to_string()), + message_id: "msg-1".to_string(), + turn_index: 7, + ts: "2026-05-11T00:00:00.000Z".to_string(), + model: "claude-sonnet-4-6".to_string(), + project: Some("/tmp/project".to_string()), + project_key: Some("project-key".to_string()), + usage: Usage { + input: 1, + output: 2, + reasoning: 3, + cache_read: 4, + cache_create_5m: 5, + cache_create_1h: 6, + }, + tool_calls: vec![], + files_touched: Some(vec!["/tmp/project/src/lib.rs".to_string()]), + subagent: Some(Subagent { + is_sidechain: false, + parent_tool_use_id: Some("tool-1".to_string()), + agent_id: Some("agent-1".to_string()), + parent_agent_id: Some("parent-agent".to_string()), + subagent_type: Some("general-purpose".to_string()), + description: Some("delegate".to_string()), + }), + stop_reason: Some("end_turn".to_string()), + activity: Some(crate::reader::types::ActivityCategory::Coding), + retries: Some(1), + has_edits: Some(true), + fidelity: Some(Fidelity { + granularity: UsageGranularity::PerTurn, + coverage: Coverage { + has_input_tokens: true, + has_output_tokens: true, + has_reasoning_tokens: true, + has_cache_read_tokens: true, + has_cache_create_tokens: true, + has_tool_calls: true, + has_tool_result_events: true, + has_session_relationships: true, + has_raw_content: true, + }, + class: crate::reader::types::FidelityClass::Full, + }), + }; + let content = ContentRecord { + v: 1, + source: SourceKind::ClaudeCode, + session_id: "session-1".to_string(), + message_id: "msg-1".to_string(), + ts: "2026-05-11T00:00:00.000Z".to_string(), + role: ContentRole::Assistant, + kind: ContentKind::Text, + text: Some("hello".to_string()), + tool_use: None, + tool_result: None, + }; + let event = CompactionEvent { + v: 1, + source: SourceKind::ClaudeCode, + session_id: "session-1".to_string(), + ts: "2026-05-11T00:01:00.000Z".to_string(), + preceding_message_id: Some("msg-0".to_string()), + tokens_before_compact: Some(42), + }; + let relationship = SessionRelationshipRecord { + v: 1, + source: RelationshipSourceKind::ClaudeCode, + session_id: "session-1".to_string(), + related_session_id: Some("session-0".to_string()), + relationship_type: RelationshipType::Continuation, + ts: Some("2026-05-11T00:02:00.000Z".to_string()), + source_session_id: Some("source-session".to_string()), + source_version: Some("1.2.3".to_string()), + parent_tool_use_id: Some("tool-1".to_string()), + agent_id: Some("agent-1".to_string()), + subagent_type: Some("general-purpose".to_string()), + description: Some("continued".to_string()), + }; + let tool_result_event = ToolResultEventRecord { + v: 1, + source: SourceKind::ClaudeCode, + session_id: "session-1".to_string(), + message_id: Some("msg-1".to_string()), + tool_use_id: "tool-1".to_string(), + call_index: Some(0), + event_index: 9, + ts: Some("2026-05-11T00:03:00.000Z".to_string()), + status: ToolResultStatus::Completed, + event_source: ToolResultEventSource::ToolResult, + content_length: Some(5), + content_hash: Some("abc123".to_string()), + is_error: Some(false), + usage: Some(Usage::default()), + usage_attribution: Some(crate::reader::types::UsageAttribution::SingleToolTurn), + subagent_session_id: Some("sub-session".to_string()), + agent_id: Some("agent-1".to_string()), + replaced_tools: Some(vec!["old-tool".to_string()]), + collapsed_calls: Some(2), + }; + let user_turn = UserTurnRecord { + v: 1, + source: SourceKind::ClaudeCode, + session_id: "session-1".to_string(), + user_uuid: "user-1".to_string(), + ts: "2026-05-11T00:04:00.000Z".to_string(), + preceding_message_id: Some("msg-0".to_string()), + following_message_id: Some("msg-1".to_string()), + blocks: vec![UserTurnBlock { + kind: crate::reader::types::UserTurnBlockKind::Text, + tool_use_id: None, + byte_len: 5, + approx_tokens: 1, + is_error: None, + }], + }; + let evidence = ClaudeRelationshipEvidence { + file_session_id: Some("session-1".to_string()), + first_ts: Some("2026-05-11T00:00:00.000Z".to_string()), + in_log_session_ids: vec!["session-1".to_string()], + source_version: Some("1.2.3".to_string()), + first_parent_uuid: Some("parent-1".to_string()), + seen_uuids: vec!["uuid-1".to_string()], + has_resume_marker: true, + resume_target_session_id: Some("session-0".to_string()), + explicit_continuation_target_session_ids: Some(vec!["session-0".to_string()]), + explicit_fork_target_session_ids: Some(vec!["session-2".to_string()]), + user_seen: true, + }; + + let incremental = ParseIncrementalResult { + turns: vec![turn.clone()], + content: vec![content.clone()], + events: vec![event.clone()], + relationships: vec![relationship.clone()], + tool_result_events: vec![tool_result_event.clone()], + user_turns: vec![user_turn.clone()], + end_offset: 123, + last_user_text: "latest user turn".to_string(), + evidence: evidence.clone(), + }; + + let full = ParseResult::from(incremental); + + assert_eq!(full.turns, vec![turn]); + assert_eq!(full.content, vec![content]); + assert_eq!(full.events, vec![event]); + assert_eq!(full.relationships, vec![relationship]); + assert_eq!(full.tool_result_events, vec![tool_result_event]); + assert_eq!(full.user_turns, vec![user_turn]); + assert_eq!(full.evidence.file_session_id, evidence.file_session_id); + assert_eq!(full.evidence.first_ts, evidence.first_ts); + assert_eq!(full.evidence.in_log_session_ids, evidence.in_log_session_ids); + assert_eq!(full.evidence.source_version, evidence.source_version); + assert_eq!(full.evidence.first_parent_uuid, evidence.first_parent_uuid); + assert_eq!(full.evidence.seen_uuids, evidence.seen_uuids); + assert_eq!(full.evidence.has_resume_marker, evidence.has_resume_marker); + assert_eq!( + full.evidence.resume_target_session_id, + evidence.resume_target_session_id + ); + assert_eq!( + full.evidence.explicit_continuation_target_session_ids, + evidence.explicit_continuation_target_session_ids + ); + assert_eq!( + full.evidence.explicit_fork_target_session_ids, + evidence.explicit_fork_target_session_ids + ); + assert_eq!(full.evidence.user_seen, evidence.user_seen); + } + #[test] fn simple_turn_parses() { let path = fixture("simple-turn.jsonl");