Skip to content

Commit 23d997c

Browse files
fix: [SLES-2810] prune sorted_reparenting_info on context release to stop warning flood
After on_platform_report removes a context from context_buffer, the corresponding ReparentingInfo entry was left in sorted_reparenting_info indefinitely. Every subsequent trace batch caused update_reparenting to iterate all stale entries and emit a WARN for each one, producing a flood of "Mismatched request info. Context not found for request_id" messages in CloudWatch. Fix: retain only entries whose request_id matches a live context when releasing in on_platform_report. Add two regression tests covering the pruning behaviour and the update_reparenting read path. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent d668142 commit 23d997c

1 file changed

Lines changed: 134 additions & 0 deletions

File tree

bottlecap/src/lifecycle/invocation/processor.rs

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -793,6 +793,11 @@ impl Processor {
793793
// Release the context now that all processing for this invocation is complete.
794794
// This prevents unbounded memory growth across warm invocations.
795795
self.context_buffer.remove(request_id);
796+
// Prune the corresponding reparenting entry so that update_reparenting does not
797+
// warn about a missing context for already-completed invocations.
798+
self.context_buffer
799+
.sorted_reparenting_info
800+
.retain(|info| info.request_id != *request_id);
796801
trace!(
797802
"Context released (buffer size after remove: {})",
798803
self.context_buffer.size()
@@ -2264,4 +2269,133 @@ mod tests {
22642269
);
22652270
}
22662271
}
2272+
2273+
fn make_trace_sender(config: Arc<config::Config>) -> Arc<SendingTraceProcessor> {
2274+
use libdd_trace_obfuscation::obfuscation_config::ObfuscationConfig;
2275+
let (stats_concentrator_service, stats_concentrator_handle) =
2276+
StatsConcentratorService::new(Arc::clone(&config));
2277+
tokio::spawn(stats_concentrator_service.run());
2278+
Arc::new(SendingTraceProcessor {
2279+
appsec: None,
2280+
processor: Arc::new(trace_processor::ServerlessTraceProcessor {
2281+
obfuscation_config: Arc::new(
2282+
ObfuscationConfig::new().expect("Failed to create ObfuscationConfig"),
2283+
),
2284+
}),
2285+
trace_tx: tokio::sync::mpsc::channel(1).0,
2286+
stats_generator: Arc::new(StatsGenerator::new(stats_concentrator_handle)),
2287+
})
2288+
}
2289+
2290+
// Regression test for SLES-2810: sorted_reparenting_info must be pruned when the
2291+
// context is released in on_platform_report, so that update_reparenting does not
2292+
// emit spurious warnings for already-completed invocations.
2293+
#[tokio::test]
2294+
async fn test_reparenting_info_pruned_after_on_platform_report() {
2295+
let mut p = setup();
2296+
let request_id = String::from("test-request-id");
2297+
2298+
p.on_invoke_event(request_id.clone());
2299+
p.add_reparenting(request_id.clone(), 42, 0);
2300+
assert_eq!(
2301+
p.context_buffer.sorted_reparenting_info.len(),
2302+
1,
2303+
"reparenting entry must exist before report"
2304+
);
2305+
2306+
let config = Arc::new(config::Config::default());
2307+
let tags_provider = Arc::new(provider::Provider::new(
2308+
Arc::clone(&config),
2309+
LAMBDA_RUNTIME_SLUG.to_string(),
2310+
&HashMap::from([("function_arn".to_string(), "test-arn".to_string())]),
2311+
));
2312+
let trace_sender = make_trace_sender(config);
2313+
2314+
p.on_platform_report(
2315+
&request_id,
2316+
ReportMetrics::OnDemand(OnDemandReportMetrics {
2317+
duration_ms: 10.0,
2318+
billed_duration_ms: 11,
2319+
memory_size_mb: 128,
2320+
max_memory_used_mb: 64,
2321+
init_duration_ms: None,
2322+
restore_duration_ms: None,
2323+
}),
2324+
chrono::Utc::now().timestamp(),
2325+
Status::Success,
2326+
None,
2327+
None,
2328+
tags_provider,
2329+
trace_sender,
2330+
)
2331+
.await;
2332+
2333+
assert!(
2334+
p.context_buffer.sorted_reparenting_info.is_empty(),
2335+
"reparenting entry must be pruned after on_platform_report"
2336+
);
2337+
}
2338+
2339+
// Regression test for SLES-2810: update_reparenting must not visit stale entries
2340+
// whose context has already been released, preventing the warning flood.
2341+
#[tokio::test]
2342+
async fn test_update_reparenting_ignores_completed_invocations() {
2343+
let mut p = setup();
2344+
2345+
// Invocation A completes fully.
2346+
let request_id_a = String::from("request-a");
2347+
p.on_invoke_event(request_id_a.clone());
2348+
p.add_reparenting(request_id_a.clone(), 11, 0);
2349+
2350+
let config = Arc::new(config::Config::default());
2351+
let tags_provider = Arc::new(provider::Provider::new(
2352+
Arc::clone(&config),
2353+
LAMBDA_RUNTIME_SLUG.to_string(),
2354+
&HashMap::from([("function_arn".to_string(), "test-arn".to_string())]),
2355+
));
2356+
let trace_sender = make_trace_sender(Arc::clone(&config));
2357+
2358+
p.on_platform_report(
2359+
&request_id_a,
2360+
ReportMetrics::OnDemand(OnDemandReportMetrics {
2361+
duration_ms: 10.0,
2362+
billed_duration_ms: 11,
2363+
memory_size_mb: 128,
2364+
max_memory_used_mb: 64,
2365+
init_duration_ms: None,
2366+
restore_duration_ms: None,
2367+
}),
2368+
chrono::Utc::now().timestamp(),
2369+
Status::Success,
2370+
None,
2371+
None,
2372+
tags_provider,
2373+
trace_sender,
2374+
)
2375+
.await;
2376+
2377+
// Invocation B is in-flight (context still live).
2378+
let request_id_b = String::from("request-b");
2379+
p.on_invoke_event(request_id_b.clone());
2380+
p.add_reparenting(request_id_b.clone(), 22, 0);
2381+
2382+
// Simulate the trace agent path: clone reparenting_info, then call update_reparenting.
2383+
// Before the fix this clone contained the stale entry for request-a, causing a warning.
2384+
let reparenting_info = p.get_reparenting_info();
2385+
2386+
// Only the live invocation B should be present.
2387+
assert_eq!(
2388+
reparenting_info.len(),
2389+
1,
2390+
"only the in-flight invocation must remain in reparenting_info"
2391+
);
2392+
assert_eq!(reparenting_info[0].request_id, request_id_b);
2393+
2394+
// update_reparenting must return no contexts to send (span IDs still unset).
2395+
let ctx_to_send = p.update_reparenting(reparenting_info);
2396+
assert!(
2397+
ctx_to_send.is_empty(),
2398+
"no contexts should be ready to send yet"
2399+
);
2400+
}
22672401
}

0 commit comments

Comments
 (0)