Skip to content

Commit 6e318d8

Browse files
author
Teryl Taylor
committed
fix: potential double free after use bug.
1 parent 3f1566c commit 6e318d8

3 files changed

Lines changed: 187 additions & 28 deletions

File tree

crates/cpex-ffi/src/lib.rs

Lines changed: 64 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -706,7 +706,29 @@ pub unsafe extern "C" fn cpex_plugin_names(
706706
/// Returns MessagePack-encoded PipelineResult + opaque handles for
707707
/// context table and background tasks.
708708
///
709-
/// Returns 0 on success, -1 on failure.
709+
/// # Ownership contract
710+
///
711+
/// **The caller's input `context_table` is unconditionally consumed
712+
/// by this function** — even on error paths (RC_INVALID_HANDLE,
713+
/// RC_INVALID_INPUT, RC_PARSE_ERROR, RC_TIMEOUT, RC_PANIC, etc.).
714+
/// The Box is freed inside `cpex_invoke`; the caller's pointer is
715+
/// dead once this function returns. This mirrors the pattern used
716+
/// by `cpex_wait_background` and lets the Go binding nil its handle
717+
/// unconditionally after the call without leaking the underlying Box.
718+
///
719+
/// On `RC_OK`, a **fresh** `CpexContextTableInner` Box is allocated
720+
/// and its raw pointer is written to `*context_table_out`. On any
721+
/// non-OK return, `*context_table_out` is left as a null pointer
722+
/// (initialized at function entry). The other out parameters
723+
/// (`result_msgpack_out`, `result_len_out`, `bg_handle_out`) follow
724+
/// the same discipline: null/zero on error, populated on success.
725+
///
726+
/// Pre-P0-1 the function consumed the input only after validation
727+
/// passed but before `run_safely`. On `RC_TIMEOUT` / `RC_PANIC` the
728+
/// input had been consumed but `*context_table_out` was never written,
729+
/// so the Go wrapper kept its stale handle and a subsequent
730+
/// `ContextTable.Close()` ran `cpex_release_context_table` on
731+
/// already-freed memory.
710732
///
711733
/// # Safety
712734
/// All pointer parameters must be valid or NULL where documented.
@@ -731,7 +753,43 @@ pub unsafe extern "C" fn cpex_invoke(
731753
context_table_out: *mut *mut CpexContextTableInner,
732754
bg_handle_out: *mut *mut CpexBackgroundTasksInner,
733755
) -> c_int {
734-
// Validate manager handle
756+
// Initialize all out params to safe defaults. Any early return
757+
// from here on leaves a consistent state for the caller: every
758+
// out pointer is null/zero, so a downstream attempt to dereference
759+
// produces a clean null-deref crash rather than reading uninit
760+
// stack memory. The success path overwrites these at the end.
761+
*result_msgpack_out = std::ptr::null_mut();
762+
*result_len_out = 0;
763+
*context_table_out = std::ptr::null_mut();
764+
*bg_handle_out = std::ptr::null_mut();
765+
766+
// Take ownership of the input context_table *immediately*, before
767+
// any validation that could return an error code. From this point
768+
// on, the caller's `context_table` pointer is dead — equivalent
769+
// to free'd memory from the caller's perspective. This mirrors
770+
// how `cpex_wait_background` handles `bg_handle`: ownership
771+
// transfers on entry, the caller nils its reference, and Rust
772+
// is responsible for the Box's lifetime from then on. Pre-fix,
773+
// consumption happened mid-function after some validations, which
774+
// meant validation errors left the input alive (one ownership
775+
// model) and post-validation errors left it consumed without
776+
// writing `*context_table_out` (a *different* ownership model).
777+
// Two contracts in one function is exactly what produced the
778+
// P0-1 UAF.
779+
let input_ctx_table: Option<PluginContextTable> = if context_table.is_null() {
780+
None
781+
} else {
782+
// Box::from_raw consumes the allocation; it'll drop at the
783+
// end of this scope if not moved into Some(...). When moved
784+
// into Some(...), the table value lives until invoke_by_name
785+
// either uses it or it's dropped on a Future-cancellation
786+
// path (RC_TIMEOUT). Either way the Box is gone.
787+
let ct = Box::from_raw(context_table);
788+
Some(ct.table)
789+
};
790+
791+
// Validate manager handle. `input_ctx_table` already owns the
792+
// input data — if we return here, it drops cleanly.
735793
let inner = match mgr.as_ref() {
736794
Some(m) => m,
737795
None => return RC_INVALID_HANDLE,
@@ -774,22 +832,17 @@ pub unsafe extern "C" fn cpex_invoke(
774832
Extensions::default()
775833
};
776834

777-
// Get or create context table
778-
let ctx_table: Option<PluginContextTable> = if context_table.is_null() {
779-
None
780-
} else {
781-
let ct = Box::from_raw(context_table);
782-
Some(ct.table)
783-
};
784-
785835
// Invoke the hook with wall-clock timeout + panic catch.
786836
let (mut result, bg) = match run_safely(
787837
inner
788838
.manager
789-
.invoke_by_name(name, payload, extensions, ctx_table),
839+
.invoke_by_name(name, payload, extensions, input_ctx_table),
790840
"cpex_invoke",
791841
) {
792842
SafeRun::Ok(r) => r,
843+
// *context_table_out is already null (set at function entry);
844+
// the input table has been consumed by invoke_by_name's call
845+
// frame and dropped. Caller's handle is dead, no replacement.
793846
other => return other.rc(), // RC_TIMEOUT or RC_PANIC; already logged
794847
};
795848

go/cpex/manager.go

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -341,19 +341,18 @@ func (m *PluginManager) InvokeByName(
341341
cHookName := C.CString(hookName)
342342
defer C.free(unsafe.Pointer(cHookName))
343343

344-
// Pass the context-table handle to Rust but DO NOT nil our local
345-
// reference until we know Rust succeeded. Rust consumes the handle
346-
// only at the moment of invoke (after all input validation), so
347-
// pre-invoke failures (bad payload, bad extensions, etc.) leave
348-
// the handle untouched and the caller's ContextTable remains valid.
349-
//
350-
// Caveat: on a post-invoke failure (rare — only result-serialization
351-
// OOM), Rust has consumed the box but doesn't write ctOut, so the
352-
// caller's ContextTable handle becomes dangling. The caller should
353-
// not reuse a ContextTable after an InvokeByName error.
344+
// Pass the context-table handle to Rust. Per the post-P0-1 FFI
345+
// contract, `cpex_invoke` takes ownership of `ctHandle`
346+
// UNCONDITIONALLY on entry — same pattern as `cpex_wait_background`
347+
// with `bg_handle`. We nil our local reference immediately so the
348+
// caller's `ContextTable` can't be accidentally reused after this
349+
// call (its underlying Box is gone regardless of the eventual rc).
350+
// On RC_OK a fresh handle lands in `ctOut`; on any error path
351+
// `ctOut` stays nil and the caller's context-table chain ends here.
354352
var ctHandle C.CpexContextTable
355353
if contextTable != nil {
356354
ctHandle = contextTable.handle
355+
contextTable.handle = nil
357356
}
358357

359358
var resultPtr *C.uint8_t
@@ -386,16 +385,13 @@ func (m *PluginManager) InvokeByName(
386385
)
387386

388387
if rc != 0 {
388+
// `ctOut` is null on every non-OK return per the post-P0-1
389+
// contract. The caller's `contextTable.handle` is already nil
390+
// (we cleared it above before the call), so there's no
391+
// dangling-handle risk on error paths.
389392
return nil, nil, nil, errorFromRC(int(rc), "InvokeByName")
390393
}
391394

392-
// Rust succeeded — it consumed ctHandle and produced ctOut.
393-
// NOW it's safe to nil the caller's reference (the original Box
394-
// was consumed by Rust; its successor is in ctOut).
395-
if contextTable != nil {
396-
contextTable.handle = nil
397-
}
398-
399395
// Deserialize result from MessagePack
400396
resultBytes := C.GoBytes(unsafe.Pointer(resultPtr), resultLen)
401397
C.cpex_free_bytes((*C.uint8_t)(unsafe.Pointer(resultPtr)), resultLen)

go/cpex/manager_test.go

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1173,3 +1173,113 @@ func TestLoadConfigInvalidYAML(t *testing.T) {
11731173
t.Error("expected error for invalid YAML")
11741174
}
11751175
}
1176+
1177+
// TestInvokeByNameErrorDoesNotUAFContextTable is the regression guard
1178+
// for P0-1. Pre-fix, `cpex_invoke` consumed the input ContextTable's
1179+
// Box mid-function but didn't write *context_table_out on
1180+
// RC_TIMEOUT / RC_PANIC / RC_PARSE_ERROR — the Go wrapper kept its
1181+
// stale handle and a subsequent Close() called
1182+
// cpex_release_context_table on already-freed memory.
1183+
//
1184+
// Post-fix, the Go wrapper nils its `contextTable.handle` immediately
1185+
// after handing it to Rust (mirroring the bg_handle pattern), so even
1186+
// if Rust errors out without producing a replacement, no dangling
1187+
// handle survives.
1188+
//
1189+
// This test:
1190+
// 1. Performs a successful invoke to get a real ContextTable.
1191+
// 2. Calls InvokeByName again with that ContextTable PLUS an
1192+
// invalid payload_type that forces Rust to return RC_PARSE_ERROR
1193+
// AFTER the consumption point.
1194+
// 3. Confirms the second call errored (sanity).
1195+
// 4. Calls Close() on the original ContextTable — must NOT crash.
1196+
// Pre-fix this was a UAF (free of already-freed memory).
1197+
func TestInvokeByNameErrorDoesNotUAFContextTable(t *testing.T) {
1198+
mgr, err := NewPluginManagerDefault()
1199+
if err != nil {
1200+
t.Fatalf("NewPluginManagerDefault failed: %v", err)
1201+
}
1202+
defer mgr.Shutdown()
1203+
if err := mgr.Initialize(); err != nil {
1204+
t.Fatalf("Initialize failed: %v", err)
1205+
}
1206+
1207+
// 1. Successful first invoke — gives us a real, Rust-allocated
1208+
// ContextTable. Calling Close() on this pre-fix would have
1209+
// been the second free.
1210+
payload := map[string]any{"tool_name": "test"}
1211+
_, ctxTable, bg, err := mgr.InvokeByName("hook1", PayloadGeneric, payload, &Extensions{}, nil)
1212+
if err != nil {
1213+
t.Fatalf("first invoke failed: %v", err)
1214+
}
1215+
bg.Close()
1216+
if ctxTable == nil || ctxTable.handle == nil {
1217+
t.Fatal("expected a non-nil ContextTable from the first invoke")
1218+
}
1219+
1220+
// 2. Second invoke with an UNKNOWN payload_type (99). The Rust
1221+
// side validates payload_type against its registry; an
1222+
// unknown value forces a RC_PARSE_ERROR return. Critically,
1223+
// that error path is now POST-consumption of the input
1224+
// context_table.
1225+
const unknownPayloadType uint8 = 99
1226+
_, _, _, err = mgr.InvokeByName("hook2", unknownPayloadType, payload, &Extensions{}, ctxTable)
1227+
if err == nil {
1228+
t.Fatal("expected error from invoke with unknown payload_type")
1229+
}
1230+
1231+
// 3. Per the P0-1 contract, ctxTable's handle was nil'd in Go
1232+
// *before* the C call returned. So whether or not Rust wrote
1233+
// *context_table_out, our local handle is nil.
1234+
if ctxTable.handle != nil {
1235+
t.Errorf("input ContextTable.handle should be nil after invoke error; got %p", ctxTable.handle)
1236+
}
1237+
1238+
// 4. The actual UAF check: Close() must be safe. Pre-fix this
1239+
// called cpex_release_context_table on already-freed memory.
1240+
// Post-fix, Close() short-circuits on a nil handle and is a
1241+
// no-op. Either it crashes (fail) or it doesn't (pass).
1242+
ctxTable.Close()
1243+
}
1244+
1245+
// TestInvokeByNameConsumesContextTableOnRcError pins the other half
1246+
// of the P0-1 contract — even when the manager rejects the call with
1247+
// a validation-class error (here: shutdown after first invoke), the
1248+
// caller's ContextTable handle is nil'd unconditionally.
1249+
//
1250+
// Verifies: no leak of the input Box when Rust never gets to write
1251+
// the output; Close() on the input is a safe no-op.
1252+
func TestInvokeByNameConsumesContextTableEvenOnShutdownPath(t *testing.T) {
1253+
mgr, err := NewPluginManagerDefault()
1254+
if err != nil {
1255+
t.Fatalf("NewPluginManagerDefault failed: %v", err)
1256+
}
1257+
if err := mgr.Initialize(); err != nil {
1258+
mgr.Shutdown()
1259+
t.Fatalf("Initialize failed: %v", err)
1260+
}
1261+
1262+
payload := map[string]any{"tool_name": "test"}
1263+
_, ctxTable, bg, err := mgr.InvokeByName("hook1", PayloadGeneric, payload, &Extensions{}, nil)
1264+
if err != nil {
1265+
mgr.Shutdown()
1266+
t.Fatalf("first invoke failed: %v", err)
1267+
}
1268+
bg.Close()
1269+
1270+
// Shut down the manager — Go-side short-circuit will return
1271+
// ErrCpexInvalidHandle WITHOUT calling cpex_invoke. The Go
1272+
// wrapper hasn't touched ctxTable yet in this case (early
1273+
// return at m.handle == nil), so ctxTable.handle remains live.
1274+
mgr.Shutdown()
1275+
1276+
_, _, _, err = mgr.InvokeByName("hook2", PayloadGeneric, payload, &Extensions{}, ctxTable)
1277+
if !errors.Is(err, ErrCpexInvalidHandle) {
1278+
t.Errorf("expected ErrCpexInvalidHandle after shutdown, got %v", err)
1279+
}
1280+
1281+
// Even though the Go-side short-circuit didn't transit our
1282+
// handle to Rust, Close() must still be safe — it's a legal
1283+
// thing for callers to do.
1284+
ctxTable.Close()
1285+
}

0 commit comments

Comments
 (0)