Skip to content

Commit 546e2dd

Browse files
willwashburnclaude
andauthored
perf(sdk): stop cloning turn streams in analyze hot paths (#422)
Aggregate per-session/per-file groups by `&TurnRecord` instead of cloning the full record. The biggest win is `overhead.rs`, which used to clone every applicable turn per file (~3x for Claude turns with two CLAUDE.md candidates plus AGENTS.md cross-checks). - `overhead`: per-file filter into `Vec<&TurnRecord>`; route through a new internal `attribute_claude_md_refs` so the public `attribute_claude_md` is a thin wrapper. - `hotspots`: `IndexMap<String, Vec<&TurnRecord>>` for the per-session groups; private `attribute_session` / `index_tool_results` now take `&[&TurnRecord]`. - `quality`: per-session groups borrow turns; private `infer_outcome_refs` / `compute_one_shot_rate_refs` drive the work and the public functions adapt via a single ref collect. - `compare`: hoist the model-name clone above the three `entry` calls so we allocate once per *new* model, not four times per turn. Closes #325. Co-authored-by: Claude <noreply@anthropic.com>
1 parent cb71f3e commit 546e2dd

6 files changed

Lines changed: 85 additions & 38 deletions

File tree

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,13 @@ Cross-package release notes for relayburn. Package changelogs contain package-le
44

55
## [Unreleased]
66

7+
### Changed
8+
9+
- `relayburn-sdk`: analyze hot paths (`overhead`, `hotspots`, `quality`,
10+
`compare`) now aggregate per-session/per-file groups by reference instead
11+
of cloning `TurnRecord`s, cutting working-set memory on the most expensive
12+
verbs. Behavior is unchanged.
13+
714
## [2.8.6] - 2026-05-12
815

916
### Changed

crates/relayburn-sdk/src/analyze/claude_md.rs

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,20 @@ fn matches_close_fence(s: &str, ch: char, min_len: usize) -> bool {
306306
}
307307

308308
pub fn attribute_claude_md(input: &AttributeClaudeMdInput<'_>) -> ClaudeMdAttributionResult {
309-
let total_tokens: u64 = input.files.iter().map(|f| f.tokens).sum();
309+
let turns: Vec<&TurnRecord> = input.turns.iter().collect();
310+
attribute_claude_md_refs(input.files, &turns, input.pricing)
311+
}
312+
313+
/// Reference-borrow variant of [`attribute_claude_md`] used by callers that
314+
/// have already pre-filtered turns into a `Vec<&TurnRecord>` (e.g. the
315+
/// per-file overhead attribution loop). Avoids the per-turn `Vec<TurnRecord>`
316+
/// clone that the public entry point would otherwise force.
317+
pub(crate) fn attribute_claude_md_refs(
318+
files: &[ParsedClaudeMd],
319+
turns: &[&TurnRecord],
320+
pricing: &PricingTable,
321+
) -> ClaudeMdAttributionResult {
322+
let total_tokens: u64 = files.iter().map(|f| f.tokens).sum();
310323
if total_tokens == 0 {
311324
return ClaudeMdAttributionResult {
312325
total_tokens: 0,
@@ -320,8 +333,8 @@ pub fn attribute_claude_md(input: &AttributeClaudeMdInput<'_>) -> ClaudeMdAttrib
320333
}
321334

322335
let mut by_session: IndexMap<String, Vec<&TurnRecord>> = IndexMap::new();
323-
for t in input.turns {
324-
by_session.entry(t.session_id.clone()).or_default().push(t);
336+
for t in turns {
337+
by_session.entry(t.session_id.clone()).or_default().push(*t);
325338
}
326339

327340
let mut session_costs: Vec<SessionClaudeMdCost> = Vec::new();
@@ -333,7 +346,7 @@ pub fn attribute_claude_md(input: &AttributeClaudeMdInput<'_>) -> ClaudeMdAttrib
333346
let mut riding_turns: u64 = 0;
334347
let mut model_counts: IndexMap<String, u64> = IndexMap::new();
335348
for t in &turns {
336-
let Some(rate) = lookup_model_rate(&t.model, input.pricing) else {
349+
let Some(rate) = lookup_model_rate(&t.model, pricing) else {
337350
continue;
338351
};
339352
*model_counts.entry(t.model.clone()).or_insert(0) += 1;
@@ -363,9 +376,9 @@ pub fn attribute_claude_md(input: &AttributeClaudeMdInput<'_>) -> ClaudeMdAttrib
363376
};
364377
let per_session_p95 = percentile(&session_cost_values, 0.95);
365378

366-
let total_bytes: u64 = input.files.iter().map(|f| f.bytes).sum();
379+
let total_bytes: u64 = files.iter().map(|f| f.bytes).sum();
367380
let mut section_costs: Vec<SectionCost> = Vec::new();
368-
for f in input.files {
381+
for f in files {
369382
for section in &f.sections {
370383
let token_share = if total_bytes > 0 {
371384
section.bytes as f64 / total_bytes as f64

crates/relayburn-sdk/src/analyze/compare.rs

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -126,25 +126,38 @@ pub fn build_compare_table(turns: &[EnrichedTurn], opts: &CompareOptions<'_>) ->
126126

127127
for et in turns {
128128
let t = &et.turn;
129-
let model = if t.model.is_empty() {
130-
"unknown".to_string()
129+
let model: &str = if t.model.is_empty() {
130+
"unknown"
131131
} else {
132-
t.model.clone()
132+
t.model.as_str()
133133
};
134134
if let Some(ref filter) = model_filter {
135-
if !filter.iter().any(|m| m == &model) {
135+
if !filter.iter().any(|m| m == model) {
136136
continue;
137137
}
138138
}
139139
let cat = activity_label(t.activity);
140-
model_set.insert(model.clone(), ());
141-
category_set.insert(cat.clone(), ());
140+
// One clone per *new* model — `by_model_category` is the canonical
141+
// per-turn map (the other two may be pre-seeded by the model
142+
// filter), so gate on it and reuse the owned key across the three
143+
// maps to avoid the four clones the naive form does.
144+
if !by_model_category.contains_key(model) {
145+
let owned = model.to_string();
146+
model_set.insert(owned.clone(), ());
147+
model_totals
148+
.entry(owned.clone())
149+
.or_insert_with(CompareTotals::default);
150+
by_model_category.insert(owned, BTreeMap::new());
151+
}
152+
if !category_set.contains_key(&cat) {
153+
category_set.insert(cat.clone(), ());
154+
}
142155

143-
let by_cat = by_model_category.entry(model.clone()).or_default();
156+
let by_cat = by_model_category.get_mut(model).expect("model just inserted");
144157
let acc = by_cat.entry(cat).or_default();
145158

146159
acc.turns += 1;
147-
let mt = model_totals.entry(model.clone()).or_default();
160+
let mt = model_totals.get_mut(model).expect("model just inserted");
148161
mt.turns += 1;
149162
if let Some(c) = cost_for_turn(t, opts.pricing) {
150163
acc.priced_turns += 1;

crates/relayburn-sdk/src/analyze/hotspots.rs

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -207,12 +207,11 @@ static FILE_TOOLS: phf::Set<&'static str> = phf_set! {
207207
/// charged via Codex `included_in_output`, etc.) lands in `unattributed_*`.
208208
pub fn attribute_hotspots(turns: &[TurnRecord], opts: &HotspotsOptions<'_>) -> HotspotsResult {
209209
// First-seen session ordering matches the TS `Map` iteration semantics.
210-
let mut by_session: IndexMap<String, Vec<TurnRecord>> = IndexMap::new();
210+
// Borrow turns rather than cloning — nothing below mutates them and the
211+
// input slice outlives every aggregation step.
212+
let mut by_session: IndexMap<String, Vec<&TurnRecord>> = IndexMap::new();
211213
for t in turns {
212-
by_session
213-
.entry(t.session_id.clone())
214-
.or_default()
215-
.push(t.clone());
214+
by_session.entry(t.session_id.clone()).or_default().push(t);
216215
}
217216

218217
let mut attributions: Vec<ToolAttribution> = Vec::new();
@@ -284,7 +283,7 @@ struct SessionAttribution {
284283
}
285284

286285
fn attribute_session(
287-
turns: &[TurnRecord],
286+
turns: &[&TurnRecord],
288287
pricing: &PricingTable,
289288
tool_results_by_turn: Option<&HashMap<u64, PerTurnContent>>,
290289
user_turns: &[UserTurnRecord],
@@ -341,7 +340,7 @@ fn attribute_session(
341340
let mut riding_active: Vec<usize> = Vec::new();
342341
let mut grand_total = 0.0_f64;
343342

344-
for turn in turns {
343+
for &turn in turns {
345344
let turn_rate = lookup_model_rate(&turn.model, pricing);
346345

347346
// Accumulate the per-turn grand total in this same pass. Routes
@@ -490,7 +489,7 @@ fn attribute_session(
490489

491490
fn index_tool_results(
492491
content: &[ContentRecord],
493-
turns: &[TurnRecord],
492+
turns: &[&TurnRecord],
494493
) -> HashMap<u64, PerTurnContent> {
495494
let mut by_turn: HashMap<u64, PerTurnContent> = HashMap::new();
496495
let mut turn_index_by_tool_use_id: HashMap<String, u64> = HashMap::new();

crates/relayburn-sdk/src/analyze/overhead.rs

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@ use crate::reader::{SourceKind, TurnRecord};
1515
use serde::{Deserialize, Serialize};
1616

1717
use crate::analyze::claude_md::{
18-
attribute_claude_md, load_claude_md_file, AttributeClaudeMdInput, ClaudeMdAttributionResult,
19-
ParsedClaudeMd,
18+
attribute_claude_md_refs, load_claude_md_file, ClaudeMdAttributionResult, ParsedClaudeMd,
2019
};
2120
use crate::analyze::pricing::PricingTable;
2221

@@ -128,17 +127,16 @@ pub fn attribute_overhead(input: AttributeOverheadInput<'_>) -> OverheadAttribut
128127
std::collections::HashMap::new();
129128

130129
for pf in input.files {
131-
let filtered: Vec<TurnRecord> = input
130+
let filtered: Vec<&TurnRecord> = input
132131
.turns
133132
.iter()
134133
.filter(|t| pf.file.applies_to.contains(&t.source))
135-
.cloned()
136134
.collect();
137-
let attribution = attribute_claude_md(&AttributeClaudeMdInput {
138-
files: std::slice::from_ref(&pf.parsed),
139-
turns: &filtered,
140-
pricing: input.pricing,
141-
});
135+
let attribution = attribute_claude_md_refs(
136+
std::slice::from_ref(&pf.parsed),
137+
&filtered,
138+
input.pricing,
139+
);
142140

143141
for sc in &attribution.session_costs {
144142
let prev = max_riding_by_session

crates/relayburn-sdk/src/analyze/quality.rs

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -112,14 +112,16 @@ const FAILURE_STREAK_THRESHOLD: u64 = 3;
112112

113113
pub fn compute_quality(turns: &[TurnRecord], opts: &ComputeQualityOptions) -> QualityResult {
114114
// Preserve TS Map iteration order: insertion-order across sessionIds.
115-
let mut by_session: Vec<(String, Vec<TurnRecord>)> = Vec::new();
115+
// Borrow rather than clone — nothing here mutates the turns and the
116+
// input slice outlives every per-session aggregation.
117+
let mut by_session: Vec<(String, Vec<&TurnRecord>)> = Vec::new();
116118
let mut idx: HashMap<String, usize> = HashMap::new();
117119
for t in turns {
118120
if let Some(&i) = idx.get(&t.session_id) {
119-
by_session[i].1.push(t.clone());
121+
by_session[i].1.push(t);
120122
} else {
121123
idx.insert(t.session_id.clone(), by_session.len());
122-
by_session.push((t.session_id.clone(), vec![t.clone()]));
124+
by_session.push((t.session_id.clone(), vec![t]));
123125
}
124126
}
125127

@@ -129,13 +131,13 @@ pub fn compute_quality(turns: &[TurnRecord], opts: &ComputeQualityOptions) -> Qu
129131
let mut one_shot = Vec::with_capacity(by_session.len());
130132
for (session_id, mut session_turns) in by_session {
131133
session_turns.sort_by_key(|t| t.turn_index);
132-
outcomes.push(infer_outcome(
134+
outcomes.push(infer_outcome_refs(
133135
&session_id,
134136
&session_turns,
135137
opts.content_by_session,
136138
now,
137139
));
138-
one_shot.push(compute_one_shot_rate(&session_id, &session_turns));
140+
one_shot.push(compute_one_shot_rate_refs(&session_id, &session_turns));
139141
}
140142

141143
QualityResult { outcomes, one_shot }
@@ -146,6 +148,16 @@ pub fn infer_outcome(
146148
turns: &[TurnRecord],
147149
content_by_session: Option<&HashMap<String, Vec<ContentRecord>>>,
148150
now_ms: i64,
151+
) -> SessionOutcome {
152+
let refs: Vec<&TurnRecord> = turns.iter().collect();
153+
infer_outcome_refs(session_id, &refs, content_by_session, now_ms)
154+
}
155+
156+
fn infer_outcome_refs(
157+
session_id: &str,
158+
turns: &[&TurnRecord],
159+
content_by_session: Option<&HashMap<String, Vec<ContentRecord>>>,
160+
now_ms: i64,
149161
) -> SessionOutcome {
150162
if turns.is_empty() {
151163
return SessionOutcome {
@@ -260,6 +272,11 @@ pub fn infer_outcome(
260272
}
261273

262274
pub fn compute_one_shot_rate(session_id: &str, turns: &[TurnRecord]) -> OneShotMetrics {
275+
let refs: Vec<&TurnRecord> = turns.iter().collect();
276+
compute_one_shot_rate_refs(session_id, &refs)
277+
}
278+
279+
fn compute_one_shot_rate_refs(session_id: &str, turns: &[&TurnRecord]) -> OneShotMetrics {
263280
let mut edit_turns: u64 = 0;
264281
let mut one_shot_turns: u64 = 0;
265282
let mut total_retries: u64 = 0;
@@ -299,7 +316,7 @@ enum EndingRole {
299316
Unknown,
300317
}
301318

302-
fn ending_role(turns: &[TurnRecord]) -> EndingRole {
319+
fn ending_role(turns: &[&TurnRecord]) -> EndingRole {
303320
// TurnRecord represents assistant turns; "ended-with-assistant" means the
304321
// final turn reached a natural stop (`end_turn`). A non-`end_turn` stop
305322
// reason means user-ended (session died after a tool_use). When the
@@ -312,7 +329,7 @@ fn ending_role(turns: &[TurnRecord]) -> EndingRole {
312329
}
313330
}
314331

315-
fn trailing_failure_streak(turns: &[TurnRecord]) -> u64 {
332+
fn trailing_failure_streak(turns: &[&TurnRecord]) -> u64 {
316333
let mut streak: u64 = 0;
317334
for t in turns.iter().rev() {
318335
let calls = &t.tool_calls;

0 commit comments

Comments
 (0)