Skip to content

Commit 8f3c5a0

Browse files
proggeramlugRalph Küpper
andauthored
fix(fetch+runtime): generic object ops must not deref fetch-band Headers/Request handles (#5606)
* fix(fetch): route Headers value stringify through a registry-aware helper The fetch thunk serialized init.headers via the generic js_json_stringify, which dereferenced a Headers handle (a fetch-band registry id) as a heap pointer -> EXC_BAD_ACCESS in gc_obj_type. Add js_fetch_headers_to_json that reads a Headers handle from its registry instead, and route the fetch(url, { headers }) path through it. Same handle-band family as the string_from_header / inline-.length guards. (Advances the bundle's -p path past the json-stringify crash; a further handle-band deref remains in js_object_has_property on a Headers handle.) * fix(runtime): js_object_has_property must reject handle-band receivers `key in <handle>` where the receiver is a Web Fetch Headers/Request/Response handle (a fetch-band registry id, e.g. 0x40007) dereferenced the id as a heap object -> EXC_BAD_ACCESS. Return false for handle-band receivers instead, same family as the string_from_header / inline-.length / json_stringify guards. * address CodeRabbit review + file-size lint (#5606) - js_object_has_property: narrow the handle-band crash guard to the fetch/zlib bands (>= COMMON_HANDLE_BAND_END). Common/small handles now fall through to the registered small-handle property path instead of always returning false. - headers_init_json_ptr: normalize null/undefined headers to {} in the shared helper so the codegen js_fetch_headers_to_json path matches the runtime thunk (previously serialized headers:null as "null"). - Move the headers_fetch_object_json test out of fetch/mod.rs into a sibling headers_json_test.rs submodule so fetch/mod.rs is back under the 2000-LOC CI limit (2020 -> 1992). --------- Co-authored-by: Ralph Küpper <ralph2@skelpo.com>
1 parent 911a979 commit 8f3c5a0

10 files changed

Lines changed: 254 additions & 18 deletions

File tree

crates/perry-codegen/src/expr/logical_collections.rs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -122,12 +122,17 @@ pub(crate) fn lower(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result<String> {
122122
};
123123

124124
let blk = ctx.block();
125-
// js_json_stringify(value: f64, indent: i32) -> i64 string handle.
126-
let zero_i = "0".to_string();
125+
// Stringify the headers value into the flat `{name:value}` JSON that
126+
// `js_fetch_with_options` parses. Routed through
127+
// `js_fetch_headers_to_json` (not the generic `js_json_stringify`) so
128+
// a `Headers` instance — a fetch-band registry handle, e.g. `headers:
129+
// new Headers(h)` — is read from its registry instead of being
130+
// dereferenced as a heap pointer (the `js_json_stringify`-on-handle
131+
// SIGSEGV; same #5559/#5560 handle-band family).
127132
let headers_str = blk.call(
128133
I64,
129-
"js_json_stringify",
130-
&[(DOUBLE, &headers_obj_box), (I32, &zero_i)],
134+
"js_fetch_headers_to_json",
135+
&[(DOUBLE, &headers_obj_box)],
131136
);
132137

133138
// The runtime takes raw StringHeader pointers (i64). Unbox each

crates/perry-codegen/src/runtime_decls/stdlib_ffi.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1699,6 +1699,10 @@ pub fn declare_stdlib_ffi(module: &mut LlModule) {
16991699
module.declare_function("js_fetch_stream_status", DOUBLE, &[DOUBLE]);
17001700
module.declare_function("js_fetch_text", I64, &[I64]);
17011701
module.declare_function("js_fetch_with_options", I64, &[I64, I64, I64, I64]);
1702+
// Headers-aware JSON stringify for the `fetch(url, { headers })` request
1703+
// path: takes the headers value (f64) and returns a `*const StringHeader`
1704+
// (i64) holding `{name:value}` JSON, treating a `Headers` handle safely.
1705+
module.declare_function("js_fetch_headers_to_json", I64, &[DOUBLE]);
17021706

17031707
// ========== Net ==========
17041708
module.declare_function("js_net_create_connection", DOUBLE, &[I32, I64, I64]);

crates/perry-runtime/src/object/field_get_set.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2263,6 +2263,23 @@ pub extern "C" fn js_object_has_property(obj: f64, key: f64) -> f64 {
22632263
};
22642264
}
22652265

2266+
// A Web Fetch / zlib handle-band value (Headers/Request/Response, zlib
2267+
// streams) at or above the fetch band is a registry id, not a heap object —
2268+
// the pointer paths below would dereference the id and segfault. `key in
2269+
// <handle>` has no own-property meaning for these, so report `false`.
2270+
// Common/small handles (below the fetch band) are intentionally NOT caught
2271+
// here: they fall through to the registered small-handle property path later
2272+
// in this function. Same family as the string_from_header / inline-`.length`
2273+
// guards.
2274+
if obj_val.is_pointer() {
2275+
let addr = (obj_val.bits() & crate::value::POINTER_MASK) as usize;
2276+
if addr >= crate::value::addr_class::COMMON_HANDLE_BAND_END
2277+
&& crate::value::addr_class::is_handle_band(addr)
2278+
{
2279+
return nanbox_false;
2280+
}
2281+
}
2282+
22662283
// #1758: a SYMBOL key. The class-ref path below + the keys_array scan
22672284
// (string keys only) can't see a class-object's static `[Sym]` props nor
22682285
// ones inherited from a class-expression parent. Delegate to the symbol

crates/perry-runtime/src/object/global_fetch.rs

Lines changed: 82 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,14 @@ static GLOBAL_FETCH_BODY_INIT_PTR: AtomicPtr<()> = AtomicPtr::new(null_mut());
6262
/// of a direct perry-stdlib symbol dependency (which would link-break a
6363
/// stdlib-less build — the #5112 regression class).
6464
static GLOBAL_HEADERS_ENTRIES_JSON: AtomicPtr<()> = AtomicPtr::new(null_mut());
65+
/// perry-stdlib's `Headers` → flat `{name: value}` object-JSON producer, used by
66+
/// the `fetch(url, { headers })` request path. A `Headers` instance is a
67+
/// fetch-band registry *handle*, not a heap pointer, so feeding it straight to
68+
/// `js_json_stringify` faults on the `gc_obj_type` back-read (the `claude -p`
69+
/// SIGSEGV). Routing handle-band `headers` values here reads the registry
70+
/// instead of dereferencing the id. Registered separately from the constructors
71+
/// so a stdlib-less runtime build stays link-clean (the #5112 regression class).
72+
static GLOBAL_HEADERS_OBJECT_JSON: AtomicPtr<()> = AtomicPtr::new(null_mut());
6573

6674
type HeadersEntriesJsonFn = extern "C" fn(f64) -> *mut crate::StringHeader;
6775

@@ -139,20 +147,23 @@ fn fetch_option_string_ptr(init: f64, name: &[u8]) -> *const crate::StringHeader
139147
crate::value::js_get_string_pointer_unified(value) as *const crate::StringHeader
140148
}
141149

150+
/// Codegen entry point for the `fetch(url, { headers })` request path: stringify
151+
/// the already-evaluated `headers` value into the flat `{name:value}` JSON that
152+
/// `js_fetch_with_options` parses, treating a `Headers` handle safely (no
153+
/// dereference of a fetch-band id). Mirrors the runtime thunk's
154+
/// `headers_init_json_ptr`; returned as an i64 `*const StringHeader` so the
155+
/// codegen call site can pass it straight into `js_fetch_with_options`.
156+
#[no_mangle]
157+
pub extern "C" fn js_fetch_headers_to_json(headers: f64) -> i64 {
158+
headers_init_json_ptr(headers) as i64
159+
}
160+
142161
fn fetch_headers_json_ptr(init: f64) -> *const crate::StringHeader {
143162
let headers = fetch_option(init, b"headers");
144-
if matches!(
145-
headers.to_bits(),
146-
crate::value::TAG_UNDEFINED | crate::value::TAG_NULL
147-
) {
148-
return crate::string::js_string_from_bytes(b"{}".as_ptr(), 2);
149-
}
150-
let json = unsafe { crate::json::js_json_stringify(headers, 0) };
151-
if json.is_null() {
152-
crate::string::js_string_from_bytes(b"{}".as_ptr(), 2)
153-
} else {
154-
json
155-
}
163+
// `init.headers` may be a `Headers` instance (a fetch-band handle), a plain
164+
// object, or null/undefined — `headers_init_json_ptr` normalizes all three
165+
// (Headers handle read from its registry, null/undefined → `{}`).
166+
headers_init_json_ptr(headers)
156167
}
157168

158169
#[cfg(feature = "external-fetch-symbols")]
@@ -255,6 +266,65 @@ fn call_global_headers_entries_json(value: f64) -> *mut crate::StringHeader {
255266
func(value)
256267
}
257268

269+
/// Register perry-stdlib's `Headers` → flat `{name: value}` object-JSON producer
270+
/// (used by the `fetch(url, { headers: Headers })` request path).
271+
#[no_mangle]
272+
pub extern "C" fn js_register_global_headers_object_json(f: HeadersEntriesJsonFn) {
273+
GLOBAL_HEADERS_OBJECT_JSON.store(f as *mut (), Ordering::Release);
274+
}
275+
276+
fn call_global_headers_object_json(value: f64) -> *mut crate::StringHeader {
277+
let f = GLOBAL_HEADERS_OBJECT_JSON.load(Ordering::Acquire);
278+
if f.is_null() {
279+
return null_mut();
280+
}
281+
let func: HeadersEntriesJsonFn = unsafe { std::mem::transmute(f) };
282+
func(value)
283+
}
284+
285+
/// JSON-stringify a fetch `init.headers` value into the flat `{name: value}`
286+
/// object that `js_fetch_with_options` parses, WITHOUT ever dereferencing a
287+
/// `Headers` registry handle.
288+
///
289+
/// A `Headers` instance is a fetch-band POINTER_TAG handle (its first id is
290+
/// `0x40000`), not a heap object. The generic `js_json_stringify` walker reaches
291+
/// `gc_obj_type` and back-reads `id - 8` as a `GcHeader`, faulting on unmapped
292+
/// memory — the consistent `claude -p` SIGSEGV during request setup. Classify by
293+
/// address band first: a handle-band `Headers` value is delegated to the
294+
/// registered stdlib producer (which reads its own registry); everything else (a
295+
/// plain `{ … }` object, a `Map`, …) is a real heap value and stringifies
296+
/// safely. Same family as #5559/#5560 (handle-band ids mis-dereferenced as heap
297+
/// pointers).
298+
fn headers_init_json_ptr(headers: f64) -> *const crate::StringHeader {
299+
// Normalize null/undefined to an empty object so BOTH entry points — the
300+
// codegen `js_fetch_headers_to_json` and the runtime `fetch_headers_json_ptr`
301+
// — serialize `headers: null` as `{}` rather than the literal `"null"` that
302+
// `js_fetch_with_options` cannot parse.
303+
if matches!(
304+
headers.to_bits(),
305+
crate::value::TAG_UNDEFINED | crate::value::TAG_NULL
306+
) {
307+
return crate::string::js_string_from_bytes(b"{}".as_ptr(), 2);
308+
}
309+
let jsv = crate::value::JSValue::from_bits(headers.to_bits());
310+
if jsv.is_pointer() {
311+
let addr = (headers.to_bits() & 0x0000_FFFF_FFFF_FFFF) as usize;
312+
if crate::value::addr_class::is_handle_band(addr) {
313+
let p = call_global_headers_object_json(headers);
314+
if !p.is_null() {
315+
return p;
316+
}
317+
return crate::string::js_string_from_bytes(b"{}".as_ptr(), 2);
318+
}
319+
}
320+
let json = unsafe { crate::json::js_json_stringify(headers, 0) };
321+
if json.is_null() {
322+
crate::string::js_string_from_bytes(b"{}".as_ptr(), 2)
323+
} else {
324+
json
325+
}
326+
}
327+
258328
/// Normalize a `res.setHeaders(x)` argument into a JSON array of
259329
/// `[name, value]` entries. Node accepts only `Headers` and `Map`; this
260330
/// returns null for anything else so the http layer can raise

crates/perry-runtime/src/object/mod.rs

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2393,8 +2393,23 @@ pub unsafe extern "C" fn js_object_to_string(value: f64) -> f64 {
23932393
}
23942394
// Heap-allocated pointers: discriminate Array / Error from generic
23952395
// Object via the GC header type byte.
2396+
//
2397+
// A handle-band value (`< 0x100000`: Web Fetch `Headers`/`Request`/
2398+
// `Response`/`Blob` ids, net/http small handles, …) is a registry id, NOT a
2399+
// heap pointer. It reaches here when the SDK coerces such a handle to a
2400+
// string — e.g. an implicit `ToString(headers)` while assembling a request —
2401+
// and the bare id lands in `raw_addr`. The `>= GC_HEADER_SIZE + 0x1000`
2402+
// floor below only rejects sub-`0x1008` addresses, so a fetch handle
2403+
// (`0x40000`+) sails through and the `(*gc_header).obj_type` back-read
2404+
// dereferences `id - 8` (the unmapped `0x3FFFB` in the `claude -p` SIGSEGV).
2405+
// Treat the whole handle band as a non-heap value so it falls through to the
2406+
// generic `[object Object]` tag instead of being dereferenced (same
2407+
// #5559/#5560 family as `string_from_header` / `gc_obj_type`).
23962408
let raw_ptr = raw_addr as *const u8;
2397-
if !raw_ptr.is_null() && (raw_ptr as usize) >= crate::gc::GC_HEADER_SIZE + 0x1000 {
2409+
if !raw_ptr.is_null()
2410+
&& (raw_ptr as usize) >= crate::gc::GC_HEADER_SIZE + 0x1000
2411+
&& !crate::value::addr_class::is_handle_band(raw_addr)
2412+
{
23982413
if let Some(tag) = arguments_object_to_string_tag(value) {
23992414
return tag;
24002415
}
@@ -2423,7 +2438,14 @@ pub unsafe extern "C" fn js_object_to_string(value: f64) -> f64 {
24232438
let mut tag_str: Option<String> = None;
24242439
if (bits & 0xFFFF_0000_0000_0000) == POINTER_TAG {
24252440
let obj_ptr = (bits & POINTER_MASK) as *const ObjectHeader;
2426-
if !obj_ptr.is_null() && (obj_ptr as usize) >= 0x1000 {
2441+
// Skip handle-band ids (Web Fetch / net / http registry handles) — they
2442+
// are POINTER_TAG-boxed but are NOT `ObjectHeader` pointers, so reading
2443+
// `(*obj_ptr).class_id` would dereference the bare id (the same fetch
2444+
// handle that faults at the GcHeader back-read above).
2445+
if !obj_ptr.is_null()
2446+
&& (obj_ptr as usize) >= 0x1000
2447+
&& !crate::value::addr_class::is_handle_band(obj_ptr as usize)
2448+
{
24272449
let class_id = (*obj_ptr).class_id;
24282450
if class_id == crate::object::CLASS_ID_COMPRESSION_STREAM {
24292451
tag_str = Some("CompressionStream".to_string());

crates/perry-runtime/src/object/tests.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -758,3 +758,32 @@ fn wide_object_index_reads_and_descriptor_writes() {
758758
assert_eq!(f64::from_bits(v43.bits()), 4343.0);
759759
}
760760
}
761+
762+
/// `js_object_to_string` must NOT dereference a handle-band value (a Web Fetch
763+
/// `Headers`/`Request`/`Response`/`Blob` registry id, or any other small native
764+
/// handle) as a heap pointer. Such ids are NaN-boxed as `POINTER_TAG` values but
765+
/// are not `GcHeader`-prefixed objects; reading the GC type byte at `id - 8` (or
766+
/// `(*ObjectHeader).class_id` at `id`) faults on unmapped low memory. This is
767+
/// the `claude -p` SIGSEGV (`EXC_BAD_ACCESS` at `0x3FFFB` == `0x40003 - 8`),
768+
/// where the SDK coerced a `Headers` handle to a string while building a
769+
/// request. The brand must fall through to the generic `[object Object]` tag.
770+
#[test]
771+
fn object_to_string_rejects_handle_band_ids() {
772+
use crate::value::addr_class;
773+
for &id in &[
774+
addr_class::FETCH_HANDLE_BAND_START, // 0x40000
775+
addr_class::FETCH_HANDLE_BAND_START + 3, // the 0x40003 from the crash
776+
addr_class::HANDLE_BAND_MAX - 1, // 0xFFFFF
777+
1usize, // common native handle
778+
] {
779+
assert!(addr_class::is_handle_band(id));
780+
let handle = crate::value::js_nanbox_pointer(id as i64);
781+
// Must return a string brand without dereferencing the bogus pointer.
782+
let result = unsafe { js_object_to_string(handle) };
783+
let s = js_string_to_rust(JSValue::from_bits(result.to_bits()));
784+
assert_eq!(
785+
s, "[object Object]",
786+
"handle-band id {id:#x} must brand as [object Object], got {s:?}"
787+
);
788+
}
789+
}

crates/perry-stdlib/src/common/dispatch.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3265,6 +3265,13 @@ pub unsafe extern "C" fn js_stdlib_init_dispatch() {
32653265
fn js_register_global_headers_entries_json(
32663266
f: extern "C" fn(f64) -> *mut perry_runtime::StringHeader,
32673267
);
3268+
// Headers → flat `{name:value}` object-JSON producer for the
3269+
// `fetch(url, { headers: Headers })` request path (avoids the
3270+
// `js_json_stringify`-on-handle SIGSEGV).
3271+
#[cfg(feature = "web-fetch")]
3272+
fn js_register_global_headers_object_json(
3273+
f: extern "C" fn(f64) -> *mut perry_runtime::StringHeader,
3274+
);
32683275
fn js_register_worker_threads_namespace_getters(
32693276
worker_data: extern "C" fn() -> f64,
32703277
is_main_thread: extern "C" fn() -> f64,
@@ -3301,6 +3308,8 @@ pub unsafe extern "C" fn js_stdlib_init_dispatch() {
33013308
js_register_global_fetch_body_init_ptr(crate::fetch::js_response_body_init_ptr);
33023309
#[cfg(feature = "http-client")]
33033310
js_register_global_headers_entries_json(crate::fetch::js_headers_setheaders_entries_json);
3311+
#[cfg(feature = "web-fetch")]
3312+
js_register_global_headers_object_json(crate::fetch::js_headers_fetch_object_json);
33043313
// Probe / `on` hook / constructor all route through the shared
33053314
// `extern "C"` events surface declared above dispatch_event_emitter_method
33063315
// (#4995): the linker resolves them to whichever EventEmitter impl is in

crates/perry-stdlib/src/fetch/headers.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,48 @@ pub extern "C" fn js_headers_setheaders_entries_json(handle: f64) -> *mut String
366366
js_string_from_bytes(s.as_ptr(), s.len() as u32)
367367
}
368368

369+
/// Produce a flat `{ "name": "value", … }` JSON object from a `Headers` handle,
370+
/// for the `fetch(url, { headers })` request path (which parses headers-JSON as
371+
/// a `HashMap<String, String>`).
372+
///
373+
/// The global `fetch` thunk and the codegen `headers_dynamic` path both
374+
/// JSON-stringify the `init.headers` value. A `Headers` instance is a
375+
/// fetch-band registry *handle* (its first id is `0x40000`), NOT a heap
376+
/// pointer, so the generic `js_json_stringify` walker reaches `gc_obj_type`
377+
/// and dereferences `id - 8` as a `GcHeader` → SIGSEGV (the `claude -p` crash;
378+
/// same #5559/#5560 family of handle-band ids treated as heap pointers). The
379+
/// fetch entry points classify by address band BEFORE any dereference and route
380+
/// `Headers` handles here, reading the request's own header registry instead of
381+
/// walking a bogus pointer. Returns null for an unknown handle so the caller
382+
/// falls back to `{}`.
383+
#[no_mangle]
384+
pub extern "C" fn js_headers_fetch_object_json(handle: f64) -> *mut StringHeader {
385+
let id = handle_id(handle);
386+
let guard = HEADERS_REGISTRY.lock().unwrap();
387+
let Some(store) = guard.get(&id) else {
388+
return std::ptr::null_mut();
389+
};
390+
// Preserve insertion order; collapse repeated names (incl. Set-Cookie) the
391+
// same way `HeadersStore::get` does so the request carries the combined
392+
// value. A `serde_json::Map` keeps first-seen insertion order under the
393+
// `preserve_order` feature; without it the object is still a valid flat map
394+
// that `serde_json::from_str::<HashMap<_,_>>` accepts.
395+
let mut seen: Vec<String> = Vec::new();
396+
let mut out = serde_json::Map::new();
397+
for (k, _) in &store.entries {
398+
if seen.iter().any(|s| s == k) {
399+
continue;
400+
}
401+
seen.push(k.clone());
402+
if let Some(v) = store.get(k) {
403+
out.insert(k.clone(), serde_json::Value::String(v));
404+
}
405+
}
406+
let s =
407+
serde_json::to_string(&serde_json::Value::Object(out)).unwrap_or_else(|_| "{}".to_string());
408+
js_string_from_bytes(s.as_ptr(), s.len() as u32)
409+
}
410+
369411
#[no_mangle]
370412
pub unsafe extern "C" fn js_headers_has(handle: f64, key_ptr: *const StringHeader) -> f64 {
371413
let id = handle_id(handle);
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
//! Test for `js_headers_fetch_object_json`, kept out of `mod.rs` so that file
2+
//! stays under the 2000-LOC CI limit. `use super::*` carries the fetch
3+
//! module's items (`pub use headers::*` re-exports `js_headers_fetch_object_json`).
4+
5+
use super::*;
6+
7+
/// `js_headers_fetch_object_json` must read a `Headers` handle from the
8+
/// registry and emit a flat `{name:value}` JSON object that
9+
/// `js_fetch_with_options` can parse — WITHOUT dereferencing the handle id
10+
/// as a heap pointer (the `claude -p` `fetch(url, { headers: Headers })`
11+
/// SIGSEGV). Unknown handles yield a null pointer so the caller falls back
12+
/// to `{}`.
13+
#[test]
14+
fn headers_fetch_object_json_serializes_registry_store() {
15+
let mut store = HeadersStore::default();
16+
store.set("Content-Type", "application/json");
17+
store.set("X-Api-Key", "secret");
18+
let id = alloc_headers(store);
19+
let handle = handle_to_f64(id);
20+
21+
let ptr = js_headers_fetch_object_json(handle);
22+
assert!(!ptr.is_null());
23+
let json = unsafe { string_from_header(ptr as *const StringHeader) }.unwrap();
24+
let parsed: std::collections::HashMap<String, String> =
25+
serde_json::from_str(&json).expect("flat object JSON");
26+
assert_eq!(
27+
parsed.get("content-type").map(String::as_str),
28+
Some("application/json")
29+
);
30+
assert_eq!(parsed.get("x-api-key").map(String::as_str), Some("secret"));
31+
32+
// An unknown handle (never allocated) must not be dereferenced.
33+
let bogus = handle_to_f64(perry_runtime::value::addr_class::FETCH_HANDLE_BAND_START + 0xABCD);
34+
assert!(js_headers_fetch_object_json(bogus).is_null());
35+
}

crates/perry-stdlib/src/fetch/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,9 @@ fn alloc_fetch_handle_id() -> usize {
104104
id
105105
}
106106

107+
#[cfg(test)]
108+
mod headers_json_test;
109+
107110
#[cfg(test)]
108111
mod tests {
109112
use super::*;

0 commit comments

Comments
 (0)