diff --git a/crates/perry-codegen/src/expr/logical_collections.rs b/crates/perry-codegen/src/expr/logical_collections.rs index 0b4b436ab3..662acdcd85 100644 --- a/crates/perry-codegen/src/expr/logical_collections.rs +++ b/crates/perry-codegen/src/expr/logical_collections.rs @@ -122,12 +122,17 @@ pub(crate) fn lower(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { }; let blk = ctx.block(); - // js_json_stringify(value: f64, indent: i32) -> i64 string handle. - let zero_i = "0".to_string(); + // Stringify the headers value into the flat `{name:value}` JSON that + // `js_fetch_with_options` parses. Routed through + // `js_fetch_headers_to_json` (not the generic `js_json_stringify`) so + // a `Headers` instance — a fetch-band registry handle, e.g. `headers: + // new Headers(h)` — is read from its registry instead of being + // dereferenced as a heap pointer (the `js_json_stringify`-on-handle + // SIGSEGV; same #5559/#5560 handle-band family). let headers_str = blk.call( I64, - "js_json_stringify", - &[(DOUBLE, &headers_obj_box), (I32, &zero_i)], + "js_fetch_headers_to_json", + &[(DOUBLE, &headers_obj_box)], ); // The runtime takes raw StringHeader pointers (i64). Unbox each diff --git a/crates/perry-codegen/src/runtime_decls/stdlib_ffi.rs b/crates/perry-codegen/src/runtime_decls/stdlib_ffi.rs index e33dc26b70..d5cd92c501 100644 --- a/crates/perry-codegen/src/runtime_decls/stdlib_ffi.rs +++ b/crates/perry-codegen/src/runtime_decls/stdlib_ffi.rs @@ -1699,6 +1699,10 @@ pub fn declare_stdlib_ffi(module: &mut LlModule) { module.declare_function("js_fetch_stream_status", DOUBLE, &[DOUBLE]); module.declare_function("js_fetch_text", I64, &[I64]); module.declare_function("js_fetch_with_options", I64, &[I64, I64, I64, I64]); + // Headers-aware JSON stringify for the `fetch(url, { headers })` request + // path: takes the headers value (f64) and returns a `*const StringHeader` + // (i64) holding `{name:value}` JSON, treating a `Headers` handle safely. + module.declare_function("js_fetch_headers_to_json", I64, &[DOUBLE]); // ========== Net ========== module.declare_function("js_net_create_connection", DOUBLE, &[I32, I64, I64]); diff --git a/crates/perry-runtime/src/object/field_get_set.rs b/crates/perry-runtime/src/object/field_get_set.rs index f0effc5d2e..0facc80c77 100644 --- a/crates/perry-runtime/src/object/field_get_set.rs +++ b/crates/perry-runtime/src/object/field_get_set.rs @@ -2263,6 +2263,23 @@ pub extern "C" fn js_object_has_property(obj: f64, key: f64) -> f64 { }; } + // A Web Fetch / zlib handle-band value (Headers/Request/Response, zlib + // streams) at or above the fetch band is a registry id, not a heap object — + // the pointer paths below would dereference the id and segfault. `key in + // ` has no own-property meaning for these, so report `false`. + // Common/small handles (below the fetch band) are intentionally NOT caught + // here: they fall through to the registered small-handle property path later + // in this function. Same family as the string_from_header / inline-`.length` + // guards. + if obj_val.is_pointer() { + let addr = (obj_val.bits() & crate::value::POINTER_MASK) as usize; + if addr >= crate::value::addr_class::COMMON_HANDLE_BAND_END + && crate::value::addr_class::is_handle_band(addr) + { + return nanbox_false; + } + } + // #1758: a SYMBOL key. The class-ref path below + the keys_array scan // (string keys only) can't see a class-object's static `[Sym]` props nor // ones inherited from a class-expression parent. Delegate to the symbol diff --git a/crates/perry-runtime/src/object/global_fetch.rs b/crates/perry-runtime/src/object/global_fetch.rs index 21fd079040..f82eaf573e 100644 --- a/crates/perry-runtime/src/object/global_fetch.rs +++ b/crates/perry-runtime/src/object/global_fetch.rs @@ -62,6 +62,14 @@ static GLOBAL_FETCH_BODY_INIT_PTR: AtomicPtr<()> = AtomicPtr::new(null_mut()); /// of a direct perry-stdlib symbol dependency (which would link-break a /// stdlib-less build — the #5112 regression class). static GLOBAL_HEADERS_ENTRIES_JSON: AtomicPtr<()> = AtomicPtr::new(null_mut()); +/// perry-stdlib's `Headers` → flat `{name: value}` object-JSON producer, used by +/// the `fetch(url, { headers })` request path. A `Headers` instance is a +/// fetch-band registry *handle*, not a heap pointer, so feeding it straight to +/// `js_json_stringify` faults on the `gc_obj_type` back-read (the `claude -p` +/// SIGSEGV). Routing handle-band `headers` values here reads the registry +/// instead of dereferencing the id. Registered separately from the constructors +/// so a stdlib-less runtime build stays link-clean (the #5112 regression class). +static GLOBAL_HEADERS_OBJECT_JSON: AtomicPtr<()> = AtomicPtr::new(null_mut()); type HeadersEntriesJsonFn = extern "C" fn(f64) -> *mut crate::StringHeader; @@ -139,20 +147,23 @@ fn fetch_option_string_ptr(init: f64, name: &[u8]) -> *const crate::StringHeader crate::value::js_get_string_pointer_unified(value) as *const crate::StringHeader } +/// Codegen entry point for the `fetch(url, { headers })` request path: stringify +/// the already-evaluated `headers` value into the flat `{name:value}` JSON that +/// `js_fetch_with_options` parses, treating a `Headers` handle safely (no +/// dereference of a fetch-band id). Mirrors the runtime thunk's +/// `headers_init_json_ptr`; returned as an i64 `*const StringHeader` so the +/// codegen call site can pass it straight into `js_fetch_with_options`. +#[no_mangle] +pub extern "C" fn js_fetch_headers_to_json(headers: f64) -> i64 { + headers_init_json_ptr(headers) as i64 +} + fn fetch_headers_json_ptr(init: f64) -> *const crate::StringHeader { let headers = fetch_option(init, b"headers"); - if matches!( - headers.to_bits(), - crate::value::TAG_UNDEFINED | crate::value::TAG_NULL - ) { - return crate::string::js_string_from_bytes(b"{}".as_ptr(), 2); - } - let json = unsafe { crate::json::js_json_stringify(headers, 0) }; - if json.is_null() { - crate::string::js_string_from_bytes(b"{}".as_ptr(), 2) - } else { - json - } + // `init.headers` may be a `Headers` instance (a fetch-band handle), a plain + // object, or null/undefined — `headers_init_json_ptr` normalizes all three + // (Headers handle read from its registry, null/undefined → `{}`). + headers_init_json_ptr(headers) } #[cfg(feature = "external-fetch-symbols")] @@ -255,6 +266,65 @@ fn call_global_headers_entries_json(value: f64) -> *mut crate::StringHeader { func(value) } +/// Register perry-stdlib's `Headers` → flat `{name: value}` object-JSON producer +/// (used by the `fetch(url, { headers: Headers })` request path). +#[no_mangle] +pub extern "C" fn js_register_global_headers_object_json(f: HeadersEntriesJsonFn) { + GLOBAL_HEADERS_OBJECT_JSON.store(f as *mut (), Ordering::Release); +} + +fn call_global_headers_object_json(value: f64) -> *mut crate::StringHeader { + let f = GLOBAL_HEADERS_OBJECT_JSON.load(Ordering::Acquire); + if f.is_null() { + return null_mut(); + } + let func: HeadersEntriesJsonFn = unsafe { std::mem::transmute(f) }; + func(value) +} + +/// JSON-stringify a fetch `init.headers` value into the flat `{name: value}` +/// object that `js_fetch_with_options` parses, WITHOUT ever dereferencing a +/// `Headers` registry handle. +/// +/// A `Headers` instance is a fetch-band POINTER_TAG handle (its first id is +/// `0x40000`), not a heap object. The generic `js_json_stringify` walker reaches +/// `gc_obj_type` and back-reads `id - 8` as a `GcHeader`, faulting on unmapped +/// memory — the consistent `claude -p` SIGSEGV during request setup. Classify by +/// address band first: a handle-band `Headers` value is delegated to the +/// registered stdlib producer (which reads its own registry); everything else (a +/// plain `{ … }` object, a `Map`, …) is a real heap value and stringifies +/// safely. Same family as #5559/#5560 (handle-band ids mis-dereferenced as heap +/// pointers). +fn headers_init_json_ptr(headers: f64) -> *const crate::StringHeader { + // Normalize null/undefined to an empty object so BOTH entry points — the + // codegen `js_fetch_headers_to_json` and the runtime `fetch_headers_json_ptr` + // — serialize `headers: null` as `{}` rather than the literal `"null"` that + // `js_fetch_with_options` cannot parse. + if matches!( + headers.to_bits(), + crate::value::TAG_UNDEFINED | crate::value::TAG_NULL + ) { + return crate::string::js_string_from_bytes(b"{}".as_ptr(), 2); + } + let jsv = crate::value::JSValue::from_bits(headers.to_bits()); + if jsv.is_pointer() { + let addr = (headers.to_bits() & 0x0000_FFFF_FFFF_FFFF) as usize; + if crate::value::addr_class::is_handle_band(addr) { + let p = call_global_headers_object_json(headers); + if !p.is_null() { + return p; + } + return crate::string::js_string_from_bytes(b"{}".as_ptr(), 2); + } + } + let json = unsafe { crate::json::js_json_stringify(headers, 0) }; + if json.is_null() { + crate::string::js_string_from_bytes(b"{}".as_ptr(), 2) + } else { + json + } +} + /// Normalize a `res.setHeaders(x)` argument into a JSON array of /// `[name, value]` entries. Node accepts only `Headers` and `Map`; this /// returns null for anything else so the http layer can raise diff --git a/crates/perry-runtime/src/object/mod.rs b/crates/perry-runtime/src/object/mod.rs index 4459b32ea3..6b45e83ce3 100644 --- a/crates/perry-runtime/src/object/mod.rs +++ b/crates/perry-runtime/src/object/mod.rs @@ -2393,8 +2393,23 @@ pub unsafe extern "C" fn js_object_to_string(value: f64) -> f64 { } // Heap-allocated pointers: discriminate Array / Error from generic // Object via the GC header type byte. + // + // A handle-band value (`< 0x100000`: Web Fetch `Headers`/`Request`/ + // `Response`/`Blob` ids, net/http small handles, …) is a registry id, NOT a + // heap pointer. It reaches here when the SDK coerces such a handle to a + // string — e.g. an implicit `ToString(headers)` while assembling a request — + // and the bare id lands in `raw_addr`. The `>= GC_HEADER_SIZE + 0x1000` + // floor below only rejects sub-`0x1008` addresses, so a fetch handle + // (`0x40000`+) sails through and the `(*gc_header).obj_type` back-read + // dereferences `id - 8` (the unmapped `0x3FFFB` in the `claude -p` SIGSEGV). + // Treat the whole handle band as a non-heap value so it falls through to the + // generic `[object Object]` tag instead of being dereferenced (same + // #5559/#5560 family as `string_from_header` / `gc_obj_type`). let raw_ptr = raw_addr as *const u8; - if !raw_ptr.is_null() && (raw_ptr as usize) >= crate::gc::GC_HEADER_SIZE + 0x1000 { + if !raw_ptr.is_null() + && (raw_ptr as usize) >= crate::gc::GC_HEADER_SIZE + 0x1000 + && !crate::value::addr_class::is_handle_band(raw_addr) + { if let Some(tag) = arguments_object_to_string_tag(value) { return tag; } @@ -2423,7 +2438,14 @@ pub unsafe extern "C" fn js_object_to_string(value: f64) -> f64 { let mut tag_str: Option = None; if (bits & 0xFFFF_0000_0000_0000) == POINTER_TAG { let obj_ptr = (bits & POINTER_MASK) as *const ObjectHeader; - if !obj_ptr.is_null() && (obj_ptr as usize) >= 0x1000 { + // Skip handle-band ids (Web Fetch / net / http registry handles) — they + // are POINTER_TAG-boxed but are NOT `ObjectHeader` pointers, so reading + // `(*obj_ptr).class_id` would dereference the bare id (the same fetch + // handle that faults at the GcHeader back-read above). + if !obj_ptr.is_null() + && (obj_ptr as usize) >= 0x1000 + && !crate::value::addr_class::is_handle_band(obj_ptr as usize) + { let class_id = (*obj_ptr).class_id; if class_id == crate::object::CLASS_ID_COMPRESSION_STREAM { tag_str = Some("CompressionStream".to_string()); diff --git a/crates/perry-runtime/src/object/tests.rs b/crates/perry-runtime/src/object/tests.rs index 6dc536b25e..113b1f6d3a 100644 --- a/crates/perry-runtime/src/object/tests.rs +++ b/crates/perry-runtime/src/object/tests.rs @@ -758,3 +758,32 @@ fn wide_object_index_reads_and_descriptor_writes() { assert_eq!(f64::from_bits(v43.bits()), 4343.0); } } + +/// `js_object_to_string` must NOT dereference a handle-band value (a Web Fetch +/// `Headers`/`Request`/`Response`/`Blob` registry id, or any other small native +/// handle) as a heap pointer. Such ids are NaN-boxed as `POINTER_TAG` values but +/// are not `GcHeader`-prefixed objects; reading the GC type byte at `id - 8` (or +/// `(*ObjectHeader).class_id` at `id`) faults on unmapped low memory. This is +/// the `claude -p` SIGSEGV (`EXC_BAD_ACCESS` at `0x3FFFB` == `0x40003 - 8`), +/// where the SDK coerced a `Headers` handle to a string while building a +/// request. The brand must fall through to the generic `[object Object]` tag. +#[test] +fn object_to_string_rejects_handle_band_ids() { + use crate::value::addr_class; + for &id in &[ + addr_class::FETCH_HANDLE_BAND_START, // 0x40000 + addr_class::FETCH_HANDLE_BAND_START + 3, // the 0x40003 from the crash + addr_class::HANDLE_BAND_MAX - 1, // 0xFFFFF + 1usize, // common native handle + ] { + assert!(addr_class::is_handle_band(id)); + let handle = crate::value::js_nanbox_pointer(id as i64); + // Must return a string brand without dereferencing the bogus pointer. + let result = unsafe { js_object_to_string(handle) }; + let s = js_string_to_rust(JSValue::from_bits(result.to_bits())); + assert_eq!( + s, "[object Object]", + "handle-band id {id:#x} must brand as [object Object], got {s:?}" + ); + } +} diff --git a/crates/perry-stdlib/src/common/dispatch.rs b/crates/perry-stdlib/src/common/dispatch.rs index fcba7c859d..4ff64e29ed 100644 --- a/crates/perry-stdlib/src/common/dispatch.rs +++ b/crates/perry-stdlib/src/common/dispatch.rs @@ -3265,6 +3265,13 @@ pub unsafe extern "C" fn js_stdlib_init_dispatch() { fn js_register_global_headers_entries_json( f: extern "C" fn(f64) -> *mut perry_runtime::StringHeader, ); + // Headers → flat `{name:value}` object-JSON producer for the + // `fetch(url, { headers: Headers })` request path (avoids the + // `js_json_stringify`-on-handle SIGSEGV). + #[cfg(feature = "web-fetch")] + fn js_register_global_headers_object_json( + f: extern "C" fn(f64) -> *mut perry_runtime::StringHeader, + ); fn js_register_worker_threads_namespace_getters( worker_data: extern "C" fn() -> f64, is_main_thread: extern "C" fn() -> f64, @@ -3301,6 +3308,8 @@ pub unsafe extern "C" fn js_stdlib_init_dispatch() { js_register_global_fetch_body_init_ptr(crate::fetch::js_response_body_init_ptr); #[cfg(feature = "http-client")] js_register_global_headers_entries_json(crate::fetch::js_headers_setheaders_entries_json); + #[cfg(feature = "web-fetch")] + js_register_global_headers_object_json(crate::fetch::js_headers_fetch_object_json); // Probe / `on` hook / constructor all route through the shared // `extern "C"` events surface declared above dispatch_event_emitter_method // (#4995): the linker resolves them to whichever EventEmitter impl is in diff --git a/crates/perry-stdlib/src/fetch/headers.rs b/crates/perry-stdlib/src/fetch/headers.rs index 05bf01eab2..1cff0d5567 100644 --- a/crates/perry-stdlib/src/fetch/headers.rs +++ b/crates/perry-stdlib/src/fetch/headers.rs @@ -366,6 +366,48 @@ pub extern "C" fn js_headers_setheaders_entries_json(handle: f64) -> *mut String js_string_from_bytes(s.as_ptr(), s.len() as u32) } +/// Produce a flat `{ "name": "value", … }` JSON object from a `Headers` handle, +/// for the `fetch(url, { headers })` request path (which parses headers-JSON as +/// a `HashMap`). +/// +/// The global `fetch` thunk and the codegen `headers_dynamic` path both +/// JSON-stringify the `init.headers` value. A `Headers` instance is a +/// fetch-band registry *handle* (its first id is `0x40000`), NOT a heap +/// pointer, so the generic `js_json_stringify` walker reaches `gc_obj_type` +/// and dereferences `id - 8` as a `GcHeader` → SIGSEGV (the `claude -p` crash; +/// same #5559/#5560 family of handle-band ids treated as heap pointers). The +/// fetch entry points classify by address band BEFORE any dereference and route +/// `Headers` handles here, reading the request's own header registry instead of +/// walking a bogus pointer. Returns null for an unknown handle so the caller +/// falls back to `{}`. +#[no_mangle] +pub extern "C" fn js_headers_fetch_object_json(handle: f64) -> *mut StringHeader { + let id = handle_id(handle); + let guard = HEADERS_REGISTRY.lock().unwrap(); + let Some(store) = guard.get(&id) else { + return std::ptr::null_mut(); + }; + // Preserve insertion order; collapse repeated names (incl. Set-Cookie) the + // same way `HeadersStore::get` does so the request carries the combined + // value. A `serde_json::Map` keeps first-seen insertion order under the + // `preserve_order` feature; without it the object is still a valid flat map + // that `serde_json::from_str::>` accepts. + let mut seen: Vec = Vec::new(); + let mut out = serde_json::Map::new(); + for (k, _) in &store.entries { + if seen.iter().any(|s| s == k) { + continue; + } + seen.push(k.clone()); + if let Some(v) = store.get(k) { + out.insert(k.clone(), serde_json::Value::String(v)); + } + } + let s = + serde_json::to_string(&serde_json::Value::Object(out)).unwrap_or_else(|_| "{}".to_string()); + js_string_from_bytes(s.as_ptr(), s.len() as u32) +} + #[no_mangle] pub unsafe extern "C" fn js_headers_has(handle: f64, key_ptr: *const StringHeader) -> f64 { let id = handle_id(handle); diff --git a/crates/perry-stdlib/src/fetch/headers_json_test.rs b/crates/perry-stdlib/src/fetch/headers_json_test.rs new file mode 100644 index 0000000000..328f9073c1 --- /dev/null +++ b/crates/perry-stdlib/src/fetch/headers_json_test.rs @@ -0,0 +1,35 @@ +//! Test for `js_headers_fetch_object_json`, kept out of `mod.rs` so that file +//! stays under the 2000-LOC CI limit. `use super::*` carries the fetch +//! module's items (`pub use headers::*` re-exports `js_headers_fetch_object_json`). + +use super::*; + +/// `js_headers_fetch_object_json` must read a `Headers` handle from the +/// registry and emit a flat `{name:value}` JSON object that +/// `js_fetch_with_options` can parse — WITHOUT dereferencing the handle id +/// as a heap pointer (the `claude -p` `fetch(url, { headers: Headers })` +/// SIGSEGV). Unknown handles yield a null pointer so the caller falls back +/// to `{}`. +#[test] +fn headers_fetch_object_json_serializes_registry_store() { + let mut store = HeadersStore::default(); + store.set("Content-Type", "application/json"); + store.set("X-Api-Key", "secret"); + let id = alloc_headers(store); + let handle = handle_to_f64(id); + + let ptr = js_headers_fetch_object_json(handle); + assert!(!ptr.is_null()); + let json = unsafe { string_from_header(ptr as *const StringHeader) }.unwrap(); + let parsed: std::collections::HashMap = + serde_json::from_str(&json).expect("flat object JSON"); + assert_eq!( + parsed.get("content-type").map(String::as_str), + Some("application/json") + ); + assert_eq!(parsed.get("x-api-key").map(String::as_str), Some("secret")); + + // An unknown handle (never allocated) must not be dereferenced. + let bogus = handle_to_f64(perry_runtime::value::addr_class::FETCH_HANDLE_BAND_START + 0xABCD); + assert!(js_headers_fetch_object_json(bogus).is_null()); +} diff --git a/crates/perry-stdlib/src/fetch/mod.rs b/crates/perry-stdlib/src/fetch/mod.rs index 23f11a8b53..ae300764d6 100644 --- a/crates/perry-stdlib/src/fetch/mod.rs +++ b/crates/perry-stdlib/src/fetch/mod.rs @@ -104,6 +104,9 @@ fn alloc_fetch_handle_id() -> usize { id } +#[cfg(test)] +mod headers_json_test; + #[cfg(test)] mod tests { use super::*;