diff --git a/crates/c/src/lib.rs b/crates/c/src/lib.rs index d28908057..626a670b2 100644 --- a/crates/c/src/lib.rs +++ b/crates/c/src/lib.rs @@ -3995,6 +3995,14 @@ impl Bindgen for FunctionBindgen<'_, '_> { self.load("uint8_t *", *offset, operands, results) } Instruction::LengthLoad { offset } => self.load("size_t", *offset, operands, results), + Instruction::LiftNamedFromMemory { .. } => unreachable!( + "LiftNamedFromMemory is only emitted by generators that implement \ + Bindgen::lift_helper_name, which this generator does not" + ), + Instruction::LowerNamedToMemory { .. } => unreachable!( + "LowerNamedToMemory is only emitted by generators that implement \ + Bindgen::lower_helper_name, which this generator does not" + ), Instruction::I32Store { offset } => self.store("int32_t", *offset, operands), Instruction::I64Store { offset } => self.store("int64_t", *offset, operands), Instruction::F32Store { offset } => self.store("float", *offset, operands), diff --git a/crates/core/src/abi.rs b/crates/core/src/abi.rs index 73238fc93..08a1bfa06 100644 --- a/crates/core/src/abi.rs +++ b/crates/core/src/abi.rs @@ -122,6 +122,30 @@ def_instruction! { /// Like `I32Load` or `I64Load`, but for loading array length values. LengthLoad { offset: ArchitectureSize } : [1] => [1], + /// Pops a base pointer from the stack, calls a pre-generated, shared + /// per-type lift helper function to read and lift the named aggregate + /// type `ty` stored at the constant `offset` from that pointer, and + /// pushes the lifted value. + /// + /// This is used to "outline" the canonical-ABI lift of large named + /// record/variant types into shared helper functions instead of + /// inlining the full recursive lift at every use site. Keeping each + /// generated function small avoids the super-linear native-compile cost + /// of a single huge function. + LiftNamedFromMemory { ty: TypeId, offset: ArchitectureSize } : [1] => [1], + + /// Pops a value to lower and then a base pointer from the stack, calls a + /// pre-generated, shared per-type lower helper function to write the + /// named aggregate type `ty` at the constant `offset` from that + /// pointer, producing no result. + /// + /// This is the lower-side counterpart of `LiftNamedFromMemory`: it + /// outlines the canonical-ABI "write to memory" of large named + /// record/variant types into shared helper functions instead of + /// inlining the full recursive lower at every use site. The value + /// operand is `operands[0]` and the base pointer is `operands[1]`. + LowerNamedToMemory { ty: TypeId, offset: ArchitectureSize } : [2] => [0], + /// Pops a pointer from the stack and then an `i32` value. /// Stores the value in little-endian at the pointer specified plus the /// constant `offset`. @@ -791,6 +815,32 @@ pub trait Bindgen { /// "canonical" form for lists. This dictates whether the `ListCanonLower` /// and `ListCanonLift` instructions are used or not. fn is_list_canonical(&self, resolve: &Resolve, element: &Type) -> bool; + + /// Returns the name of a pre-generated, shared lift helper function for the + /// named aggregate type `id`, if one exists. + /// + /// When this returns `Some`, the canonical-ABI lift of that type (in + /// `read_from_memory`) is "outlined" into a call to the named helper + /// (emitted as `Instruction::LiftNamedFromMemory`) instead of being inlined + /// recursively. Generators that do not implement helper outlining should + /// return `None` (the default). + fn lift_helper_name(&self, resolve: &Resolve, id: TypeId) -> Option { + let _ = (resolve, id); + None + } + + /// Returns the name of a pre-generated, shared lower helper function for + /// the named aggregate type `id`, if one exists. + /// + /// When this returns `Some`, the canonical-ABI lower of that type (in + /// `write_to_memory`) is "outlined" into a call to the named helper + /// (emitted as `Instruction::LowerNamedToMemory`) instead of being inlined + /// recursively. Generators that do not implement helper outlining should + /// return `None` (the default). + fn lower_helper_name(&self, resolve: &Resolve, id: TypeId) -> Option { + let _ = (resolve, id); + None + } } /// Generates an abstract sequence of instructions which represents this @@ -860,6 +910,67 @@ pub fn lift_from_memory( generator.stack.pop().unwrap() } +/// Like [`lift_from_memory`], but used to generate the *body* of an outlined +/// lift helper for the named type `skip_outline_root`. +/// +/// The root type itself is lifted inline (one level deep) while nested named +/// aggregate types are outlined into calls to their own shared helpers (see +/// [`Bindgen::lift_helper_name`] and [`Instruction::LiftNamedFromMemory`]). +/// +/// `skip_outline_root` must be the id of the named aggregate (record/variant) +/// being lifted, and `ty` must be exactly `Type::Id(skip_outline_root)` (not an +/// alias to it) so that the single-shot inline of the root in `read_from_memory` +/// lands on the intended type. +pub fn lift_from_memory_root( + resolve: &Resolve, + bindgen: &mut B, + address: B::Operand, + ty: &Type, + skip_outline_root: TypeId, +) -> B::Operand { + assert!( + matches!(ty, Type::Id(id) if *id == skip_outline_root), + "lift_from_memory_root requires `ty` to be `Type::Id(skip_outline_root)`, \ + not an alias or other type" + ); + let mut generator = Generator::new(resolve, bindgen); + generator.lift_outline_root = Some(skip_outline_root); + generator.read_from_memory(ty, address, Default::default()); + generator.stack.pop().unwrap() +} + +/// Like [`lower_to_memory`], but used to generate the *body* of an outlined +/// lower helper for the named type `skip_outline_root`. +/// +/// The root type itself is lowered inline (one level deep) while nested named +/// aggregate types are outlined into calls to their own shared helpers (see +/// [`Bindgen::lower_helper_name`] and [`Instruction::LowerNamedToMemory`]). +/// +/// `skip_outline_root` must be the id of the named aggregate (record/variant) +/// being lowered, and `ty` must be exactly `Type::Id(skip_outline_root)` (not +/// an alias to it) so that the single-shot inline of the root in +/// `write_to_memory` lands on the intended type. +pub fn lower_to_memory_root( + resolve: &Resolve, + bindgen: &mut B, + address: B::Operand, + value: B::Operand, + ty: &Type, + skip_outline_root: TypeId, +) { + assert!( + matches!(ty, Type::Id(id) if *id == skip_outline_root), + "lower_to_memory_root requires `ty` to be `Type::Id(skip_outline_root)`, \ + not an alias or other type" + ); + let mut generator = Generator::new(resolve, bindgen); + generator.realloc = Some(Realloc::Export("cabi_realloc")); + generator.lower_outline_root = Some(skip_outline_root); + generator.stack.push(value); + generator.write_to_memory(ty, address, Default::default()); + debug_assert!(generator.stack.is_empty()); +} + /// Used in a similar manner as the `Interface::call` function except is /// used to generate the `post-return` callback for `func`. /// @@ -1001,6 +1112,16 @@ struct Generator<'a, B: Bindgen> { stack: Vec, return_pointer: Option, realloc: Option, + /// When generating the body of an outlined lift helper for a named type, + /// this holds that type's id so its *own* top-level lift is inlined (one + /// level) while nested named aggregates are still outlined into their own + /// helpers. `None` everywhere else (so all eligible named types outline). + lift_outline_root: Option, + /// Like `lift_outline_root`, but for the lower side: when generating the + /// body of an outlined lower helper for a named type, this holds that + /// type's id so its own top-level lower is inlined (one level) while + /// nested named aggregates are still outlined. `None` everywhere else. + lower_outline_root: Option, } const MAX_FLAT_PARAMS: usize = 16; @@ -1016,6 +1137,8 @@ impl<'a, B: Bindgen> Generator<'a, B> { stack: Vec::new(), return_pointer: None, realloc: None, + lift_outline_root: None, + lower_outline_root: None, } } @@ -1969,7 +2092,21 @@ impl<'a, B: Bindgen> Generator<'a, B> { Type::String => self.write_list_to_memory(ty, addr, offset), Type::ErrorContext => self.lower_and_emit(ty, addr, &I32Store { offset }), - Type::Id(id) => match &self.resolve.types[id].kind { + Type::Id(id) => { + // Outline the lower of large named aggregate types into shared + // helper functions instead of inlining the full recursive + // lower here. `lower_outline_root` is `Some(id)` only while + // generating that type's own helper body, in which case its + // top level is inlined (one level) and nested types still + // outline. + let is_outline_root = self.lower_outline_root == Some(id); + self.lower_outline_root = None; + if !is_outline_root && self.bindgen.lower_helper_name(self.resolve, id).is_some() { + self.stack.push(addr); + self.emit(&Instruction::LowerNamedToMemory { ty: id, offset }); + return; + } + match &self.resolve.types[id].kind { TypeDefKind::Type(t) => self.write_to_memory(t, addr, offset), TypeDefKind::List(_) => self.write_list_to_memory(ty, addr, offset), // Maps have the same linear memory layout as list>. @@ -2083,6 +2220,7 @@ impl<'a, B: Bindgen> Generator<'a, B> { id, }); } + } }, } } @@ -2177,7 +2315,20 @@ impl<'a, B: Bindgen> Generator<'a, B> { Type::String => self.read_list_from_memory(ty, addr, offset), Type::ErrorContext => self.emit_and_lift(ty, addr, &I32Load { offset }), - Type::Id(id) => match &self.resolve.types[id].kind { + Type::Id(id) => { + // Outline the lift of large named aggregate types into shared + // helper functions instead of inlining the full recursive lift + // here. `lift_outline_root` is `Some(id)` only while generating + // that type's own helper body, in which case its top level is + // inlined (one level) and nested types still outline. + let is_outline_root = self.lift_outline_root == Some(id); + self.lift_outline_root = None; + if !is_outline_root && self.bindgen.lift_helper_name(self.resolve, id).is_some() { + self.stack.push(addr); + self.emit(&Instruction::LiftNamedFromMemory { ty: id, offset }); + return; + } + match &self.resolve.types[id].kind { TypeDefKind::Type(t) => self.read_from_memory(t, addr, offset), TypeDefKind::List(_) => self.read_list_from_memory(ty, addr, offset), @@ -2284,7 +2435,8 @@ impl<'a, B: Bindgen> Generator<'a, B> { id, }); } - }, + } + } } } diff --git a/crates/cpp/src/lib.rs b/crates/cpp/src/lib.rs index 468aa2b17..19ee8a50f 100644 --- a/crates/cpp/src/lib.rs +++ b/crates/cpp/src/lib.rs @@ -3520,6 +3520,14 @@ impl<'a, 'b> Bindgen for FunctionBindgen<'a, 'b> { abi::Instruction::LengthLoad { offset } => { self.load("size_t", *offset, operands, results) } + abi::Instruction::LiftNamedFromMemory { .. } => unreachable!( + "LiftNamedFromMemory is only emitted by generators that implement \ + Bindgen::lift_helper_name, which this generator does not" + ), + abi::Instruction::LowerNamedToMemory { .. } => unreachable!( + "LowerNamedToMemory is only emitted by generators that implement \ + Bindgen::lower_helper_name, which this generator does not" + ), abi::Instruction::PointerStore { offset } => { let ptr_type = self.r#gen.r#gen.opts.ptr_type(); self.store(ptr_type, *offset, operands) diff --git a/crates/csharp/src/function.rs b/crates/csharp/src/function.rs index 9de5852cf..866f111e3 100644 --- a/crates/csharp/src/function.rs +++ b/crates/csharp/src/function.rs @@ -464,6 +464,14 @@ impl Bindgen for FunctionBindgen<'_, '_> { offset = offset.size_wasm32() )) } + Instruction::LiftNamedFromMemory { .. } => unreachable!( + "LiftNamedFromMemory is only emitted by generators that implement \ + Bindgen::lift_helper_name, which this generator does not" + ), + Instruction::LowerNamedToMemory { .. } => unreachable!( + "LowerNamedToMemory is only emitted by generators that implement \ + Bindgen::lower_helper_name, which this generator does not" + ), Instruction::PointerLoad { offset } => results.push(format!( "new global::System.Span((void*)((byte*){} + {offset}), 1)[0]", operands[0], diff --git a/crates/go/src/lib.rs b/crates/go/src/lib.rs index 4caf53a04..26b751b77 100644 --- a/crates/go/src/lib.rs +++ b/crates/go/src/lib.rs @@ -1992,6 +1992,14 @@ return {results}" Instruction::LengthLoad { offset } => { load(self, results, &operands[0], offset, "uint32", &|v| v) } + Instruction::LiftNamedFromMemory { .. } => unreachable!( + "LiftNamedFromMemory is only emitted by generators that implement \ + Bindgen::lift_helper_name, which this generator does not" + ), + Instruction::LowerNamedToMemory { .. } => unreachable!( + "LowerNamedToMemory is only emitted by generators that implement \ + Bindgen::lower_helper_name, which this generator does not" + ), Instruction::PointerLoad { offset } => { load(self, results, &operands[0], offset, "uint32", &|v| { format!("uintptr({v})") diff --git a/crates/moonbit/src/lib.rs b/crates/moonbit/src/lib.rs index 4aa29852f..83c576c8d 100644 --- a/crates/moonbit/src/lib.rs +++ b/crates/moonbit/src/lib.rs @@ -2,7 +2,7 @@ use anyhow::Result; use core::panic; use heck::{ToShoutySnakeCase, ToSnakeCase, ToUpperCamelCase}; use std::{ - collections::{HashMap, HashSet}, + collections::{BTreeMap, HashMap, HashSet}, fmt::Write, mem, ops::Deref, @@ -15,8 +15,8 @@ use wit_bindgen_core::{ wit_parser::{ Alignment, ArchitectureSize, Docs, Enum, Flags, FlagsRepr, Function, Int, InterfaceId, LiftLowerAbi, ManglingAndAbi, Param, Record, Resolve, ResourceIntrinsic, Result_, - SizeAlign, Tuple, Type, TypeId, Variant, WasmExport, WasmExportKind, WasmImport, WorldId, - WorldKey, + SizeAlign, Tuple, Type, TypeDefKind, TypeId, Variant, WasmExport, WasmExportKind, + WasmImport, WorldId, WorldItem, WorldKey, }, }; @@ -120,6 +120,34 @@ enum PayloadFor { Stream, } +/// Sanitizes a WIT export identity into a stable MoonBit-identifier suffix used +/// to disambiguate `wasmExport*` names that collide across the world's exports. +/// `golem:agent/guest` -> `GolemAgentGuest`, world `agent-guest` -> `AgentGuest`. +/// Any `@version` segment is stripped so the suffix does not change when a +/// dependency is bumped. +fn export_disambiguator(resolve: &Resolve, key: Option<&WorldKey>, world: WorldId) -> String { + let raw: String = match key { + Some(k) => resolve.name_world_key(k), + None => resolve.worlds[world].name.clone(), + }; + let raw = match raw.rfind('@') { + Some(i) => &raw[..i], + None => raw.as_str(), + }; + let mut out = String::new(); + for part in raw.split([':', '/', '-', '_']) { + if part.is_empty() { + continue; + } + let mut chars = part.chars(); + if let Some(c) = chars.next() { + out.push(c.to_ascii_uppercase()); + out.push_str(chars.as_str()); + } + } + out +} + #[derive(Default)] pub struct MoonBit { opts: Opts, @@ -137,6 +165,16 @@ pub struct MoonBit { export_ns: Ns, + /// Stable per-export disambiguators for `wasmExport*` names that collide + /// across the world's exports (e.g. `invoke` exported by both + /// `golem:agent/guest` and `golem:tool/guest`). Computed once in + /// `preprocess` from the exporting interface's identity, so the generated + /// names no longer depend on `world.exports` iteration order. Keyed by + /// `(interface key, function name)`; the value is an empty string for + /// non-colliding exports (clean `wasmExport{Camel}` name preserved) and an + /// interface-qualified suffix otherwise. + disambiguators: HashMap<(Option, String), String>, + async_support: AsyncSupport, } @@ -159,6 +197,7 @@ impl MoonBit { ffi_imports: HashSet::new(), derive_opts, interface, + lower_helpers: BTreeMap::new(), } } @@ -256,6 +295,47 @@ impl WorldGenerator for MoonBit { })) .unwrap_or("generated".into()); self.sizes.fill(resolve); + + // Pre-compute stable disambiguators for `wasmExport*` names that collide + // across the world's exports. Without this, colliding names (e.g. + // `invoke` exported by both `golem:agent/guest` and `golem:tool/guest`) + // get numeric suffixes assigned in `world.exports` iteration order, + // which can flip when the WIT declaration order changes and silently + // swap the component export mapping. Colliding exports get a stable + // interface-qualified suffix; unique exports keep their clean + // `wasmExport{Camel}` name (empty disambiguator). + let mut groups: HashMap, String)>> = HashMap::new(); + for (key, item) in &resolve.worlds[world].exports { + match item { + WorldItem::Interface { id, .. } => { + for (fname, _) in &resolve.interfaces[*id].functions { + let base = format!("wasmExport{}", fname.to_upper_camel_case()); + groups + .entry(base) + .or_default() + .push((Some(key.clone()), fname.clone())); + } + } + WorldItem::Function(f) => { + let base = format!("wasmExport{}", f.name.to_upper_camel_case()); + groups + .entry(base) + .or_default() + .push((None, f.name.clone())); + } + WorldItem::Type { .. } => {} + } + } + let mut disambiguators = HashMap::new(); + for entries in groups.values() { + if entries.len() > 1 { + for (key, fname) in entries { + let disambig = export_disambiguator(resolve, key.as_ref(), world); + disambiguators.insert((key.clone(), fname.clone()), disambig); + } + } + } + self.disambiguators = disambiguators; } fn import_interface( @@ -416,6 +496,8 @@ impl WorldGenerator for MoonBit { r#gen.export(func); } + r#gen.generate_lower_helper_bodies(); + let fragment = r#gen.finish(); // Write files @@ -480,6 +562,8 @@ impl WorldGenerator for MoonBit { r#gen.export(func); } + r#gen.generate_lower_helper_bodies(); + let fragment = r#gen.finish(); // Write files @@ -577,6 +661,14 @@ struct InterfaceGenerator<'a> { // Options for deriving traits derive_opts: DeriveOpts, + + // Shared lower-to-memory helper functions for named aggregate types + // (records/variants) reached from memory-lowered guest-export results. The + // helper for a given `TypeId` is emitted once and called from every + // `wasmExport*` glue function that lowers a value of that type, instead of + // inlining the full recursive lower at each call site. This keeps the + // generated `wasmExport*` functions small enough for `moonc` to compile. + lower_helpers: BTreeMap, } impl InterfaceGenerator<'_> { @@ -588,6 +680,131 @@ impl InterfaceGenerator<'_> { } } + /// Recursively registers a shared lower-to-memory helper for `ty` and + /// every named aggregate type (record/variant) reachable from it. Only + /// records names here; bodies are generated later by + /// [`Self::generate_lower_helper_bodies`]. + /// + /// Mirrors the Rust backend's `register_lift_helpers`, but for the lower + /// side. Outlining the recursive lower into one helper per aggregate type + /// keeps the generated `wasmExport*` glue functions small enough for + /// `moonc` to compile (a single inlined lower of a deeply nested variant + /// can balloon the per-function IR past `moonc`'s limits). + fn register_lower_helpers(&mut self, ty: &Type) { + let Type::Id(id) = ty else { return }; + let id = *id; + match &self.resolve.types[id].kind { + TypeDefKind::Type(t) => { + let t = *t; + self.register_lower_helpers(&t); + } + TypeDefKind::Record(_) | TypeDefKind::Variant(_) => { + if self.lower_helpers.contains_key(&id) { + return; + } + let name = format!("__wit_bindgen_lower_t{}", id.index()); + self.lower_helpers.insert(id, name); + // Collect child types first to avoid borrow conflicts. + let children: Vec = match &self.resolve.types[id].kind { + TypeDefKind::Record(r) => r.fields.iter().map(|f| f.ty).collect(), + TypeDefKind::Variant(v) => v.cases.iter().filter_map(|c| c.ty).collect(), + _ => Vec::new(), + }; + for child in children { + self.register_lower_helpers(&child); + } + } + TypeDefKind::List(t) | TypeDefKind::Option(t) | TypeDefKind::FixedLengthList(t, _) => { + let t = *t; + self.register_lower_helpers(&t); + } + TypeDefKind::Tuple(tuple) => { + let tys: Vec = tuple.types.clone(); + for t in tys { + self.register_lower_helpers(&t); + } + } + TypeDefKind::Result(r) => { + if let Some(t) = r.ok { + self.register_lower_helpers(&t); + } + if let Some(t) = r.err { + self.register_lower_helpers(&t); + } + } + TypeDefKind::Map(k, v) => { + let k = *k; + let v = *v; + self.register_lower_helpers(&k); + self.register_lower_helpers(&v); + } + _ => {} + } + } + + /// Generates the body of every registered lower helper into `self.ffi`. + /// + /// Each helper has the signature `fn name(ptr : Int, value : T) -> Unit` and + /// writes `value` to linear memory at `ptr` using the canonical ABI. The + /// root type is lowered inline (one level deep) via + /// [`abi::lower_to_memory_root`]; nested named aggregates are outlined + /// into calls to their own helpers. + fn generate_lower_helper_bodies(&mut self) { + let ids: Vec = self.lower_helpers.keys().copied().collect(); + let resolve = self.resolve; + for id in ids { + let name = self.lower_helpers[&id].clone(); + let ty = Type::Id(id); + let value_ty = self.world_gen.pkg_resolver.type_name(self.name, &ty); + + // The helper body is generated in a borrow scope so that + // `FunctionBindgen`'s borrow of `self` ends before we write the + // finished body into `self.ffi` below. + let body = { + // Reserve the helper's parameter names (`ptr`, `value`) in the + // local namespace so that lowering temps such as the string / + // canonical-list pointer (`locals.tmp("ptr")`) become `ptr0`, + // `ptr1`, ... instead of `ptr`, which would shadow the `ptr` + // parameter and make subsequent `(ptr) + offset` stores write + // to the string's own buffer instead of the result base. + let mut f = FunctionBindgen::new( + self, + Box::new(["ptr".to_string(), "value".to_string()]), + ); + abi::lower_to_memory_root( + resolve, + &mut f, + "ptr".to_string(), + "value".to_string(), + &ty, + id, + ); + let body = mem::take(&mut f.src); + + // A pure lower-to-memory of a record/variant allocates the + // result buffers via `cabi_realloc` (handed off to the caller) + // and must not require any of the side state that `export` + // would otherwise flush into the surrounding function. Assert + // the assumption holds rather than silently emitting broken + // code if a future lower path starts depending on them. + assert!( + !f.needs_cleanup_list, + "outlined lower helper unexpectedly requires a cleanup list" + ); + assert!( + f.cleanup.is_empty(), + "outlined lower helper unexpectedly produced cleanup entries" + ); + body + }; + + uwriteln!( + self.ffi, + "\n#doc(hidden)\nfn {name}(ptr : Int, value : {value_ty}) -> Unit {{\n{body}\n}}\n" + ); + } + } + fn import(&mut self, func: &Function) { // Determine if the function is async let async_ = self @@ -728,6 +945,18 @@ impl InterfaceGenerator<'_> { let sig = self.resolve.wasm_signature(variant, func); + // Register shared lower-to-memory helpers for the named aggregate + // types reached from this export's result so that the recursive lower + // is outlined into per-type helpers instead of being inlined into the + // `wasmExport*` glue. Only memory-lowered results (return-pointer + // results) go through `write_to_memory`, so helpers are only needed + // there. + if sig.retptr { + if let Some(result) = &func.result { + self.register_lower_helpers(result); + } + } + let mut bindgen = FunctionBindgen::new( self, (0..sig.params.len()).map(|i| format!("p{i}")).collect(), @@ -758,10 +987,17 @@ impl InterfaceGenerator<'_> { let camel_name = func.name.to_upper_camel_case(); + let disambig = self + .world_gen + .disambiguators + .get(&(self.interface.cloned(), func.name.to_string())) + .cloned() + .unwrap_or_default(); + let func_name = self .world_gen .export_ns - .tmp(&format!("wasmExport{camel_name}")); + .tmp(&format!("wasmExport{camel_name}{disambig}")); let params = sig .params @@ -830,7 +1066,7 @@ impl InterfaceGenerator<'_> { let export_func_name = self .world_gen .export_ns - .tmp(&format!("wasmExportAsync{camel_name}")); + .tmp(&format!("wasmExportAsync{camel_name}{disambig}")); let DeferredTaskReturn::Emitted { body: task_return_body, params: task_return_params, @@ -946,7 +1182,7 @@ impl InterfaceGenerator<'_> { let func_name = self .world_gen .export_ns - .tmp(&format!("wasmExport{camel_name}PostReturn")); + .tmp(&format!("wasmExport{camel_name}{disambig}PostReturn")); uwrite!( self.ffi, @@ -2513,6 +2749,24 @@ impl Bindgen for FunctionBindgen<'_, '_> { )) } + Instruction::LiftNamedFromMemory { .. } => unreachable!( + "LiftNamedFromMemory is only emitted by generators that implement \ + Bindgen::lift_helper_name, which this generator does not" + ), + + Instruction::LowerNamedToMemory { ty, offset } => { + let name = self + .lower_helper_name(self.interface_gen.resolve, *ty) + .expect("lower helper must be registered before it is emitted"); + uwriteln!( + self.src, + "{name}(({}) + {offset}, {})", + operands[1], + operands[0], + offset = offset.size_wasm32() + ) + } + Instruction::I32Load8U { offset } => { self.use_ffi(ffi::LOAD8_U); results.push(format!( @@ -3061,6 +3315,10 @@ impl Bindgen for FunctionBindgen<'_, '_> { Type::U8 | Type::U32 | Type::U64 | Type::S32 | Type::S64 | Type::F32 | Type::F64 ) } + + fn lower_helper_name(&self, _resolve: &Resolve, id: TypeId) -> Option { + self.interface_gen.lower_helpers.get(&id).cloned() + } } fn perform_cast(op: &str, cast: &Bitcast) -> String { diff --git a/crates/rust/src/bindgen.rs b/crates/rust/src/bindgen.rs index bee880167..cd3ce9173 100644 --- a/crates/rust/src/bindgen.rs +++ b/crates/rust/src/bindgen.rs @@ -22,6 +22,16 @@ pub(super) struct FunctionBindgen<'a, 'b> { pub handle_decls: Vec, always_owned: bool, return_self: bool, + /// Whether outlined lift helpers may be used for this snippet. + /// + /// Lift helpers are emitted as free functions in the interface module, and + /// outlined calls to them are unqualified, so they only resolve for code + /// emitted directly in that same module (import wrappers and the helper + /// bodies themselves). Code emitted into a nested submodule — notably the + /// `future`/`stream` payload vtables, which live in a separate module tree — + /// must not outline; it lifts inline instead. Such payload `lift` functions + /// are already isolated single-value lifts, so this loses no benefit. + pub(super) outline_lifts: bool, } pub const POINTER_SIZE_EXPRESSION: &str = "::core::mem::size_of::<*const u8>()"; @@ -48,6 +58,7 @@ impl<'a, 'b> FunctionBindgen<'a, 'b> { handle_decls: Vec::new(), always_owned, return_self, + outline_lifts: true, } } @@ -267,6 +278,13 @@ impl Bindgen for FunctionBindgen<'_, '_> { self.r#gen.is_list_canonical(ty) } + fn lift_helper_name(&self, _resolve: &Resolve, id: TypeId) -> Option { + if !self.outline_lifts { + return None; + } + self.r#gen.lift_helpers.get(&id).cloned() + } + fn emit( &mut self, resolve: &Resolve, @@ -1174,6 +1192,25 @@ impl Bindgen for FunctionBindgen<'_, '_> { results.push(format!("l{tmp}")); } + Instruction::LiftNamedFromMemory { ty, offset } => { + let name = self + .lift_helper_name(resolve, *ty) + .expect("lift helper must be registered before it is emitted"); + let tmp = self.tmp(); + uwriteln!( + self.src, + "let result{tmp} = {name}({base}.add({offset}));", + base = operands[0], + offset = offset.format_term(POINTER_SIZE_EXPRESSION, true), + ); + results.push(format!("result{tmp}")); + } + + Instruction::LowerNamedToMemory { .. } => unreachable!( + "LowerNamedToMemory is only emitted by generators that implement \ + Bindgen::lower_helper_name, which this generator does not" + ), + Instruction::I32Store { offset } => { self.push_str(&format!( "*{}.add({}).cast::() = {};\n", diff --git a/crates/rust/src/interface.rs b/crates/rust/src/interface.rs index c81c3271e..18ac241c2 100644 --- a/crates/rust/src/interface.rs +++ b/crates/rust/src/interface.rs @@ -26,6 +26,14 @@ pub struct InterfaceGenerator<'a> { pub return_pointer_area_align: Alignment, pub(super) needs_runtime_module: bool, pub(super) needs_wit_map: bool, + /// Map of named aggregate type -> name of a shared, outlined lift helper + /// function generated for that type. Populated for guest imports so the + /// canonical-ABI lift of large types is shared across all wrappers in this + /// interface instead of being inlined into each one. + pub(super) lift_helpers: BTreeMap, + /// Generated source for the bodies of the helpers in `lift_helpers`, + /// flushed into `src` once at the end of import generation. + pub(super) lift_helper_bodies: Source, } /// A description of the "mode" in which a type is printed. @@ -356,11 +364,150 @@ macro_rules! {macro_name} {{ funcs: impl Iterator, interface: Option<&WorldKey>, ) { + let funcs: Vec<&Function> = funcs.collect(); + + // Register shared lift helpers for the result types of all imported + // functions (transitively over all named aggregate types), then emit + // their bodies once. Wrappers below then lift via calls to these shared + // helpers instead of inlining the full recursive lift each time. + // + // Only register a helper when the import actually lifts its result from + // memory; otherwise the generated helper would be unused: + // * skipped functions emit no wrapper at all, and + // * a synchronous result that fits in flat returns is lifted directly + // (`abi::call` uses `lift`, not `read_from_memory`). + // Async imports always deliver their result via memory. Under-registering + // is always safe: `read_from_memory` simply inlines the lift when no + // helper exists for a type. + for func in &funcs { + if self.r#gen.skip.contains(&func.name) { + continue; + } + let Some(result) = func.result.as_ref() else { + continue; + }; + let async_ = self.r#gen.is_async(self.resolve, interface, func, true); + let memory_lifted = async_ + || self + .resolve + .wasm_signature(AbiVariant::GuestImport, func) + .retptr; + if memory_lifted { + self.register_lift_helpers(result); + } + } + self.generate_lift_helper_bodies(); + let bodies = mem::take(&mut self.lift_helper_bodies); + self.src.push_str(&String::from(bodies)); + for func in funcs { self.generate_guest_import(func, interface); } } + /// Recursively registers a shared lift helper for `ty` and every named + /// aggregate type (record/variant) reachable from it. Only registers names + /// here; bodies are generated later by `generate_lift_helper_bodies`. + fn register_lift_helpers(&mut self, ty: &Type) { + let Type::Id(id) = ty else { return }; + let id = *id; + match &self.resolve.types[id].kind { + TypeDefKind::Type(t) => { + let t = *t; + self.register_lift_helpers(&t); + } + TypeDefKind::Record(_) | TypeDefKind::Variant(_) => { + if self.lift_helpers.contains_key(&id) { + return; + } + let name = format!("__wit_bindgen_lift_t{}", id.index()); + self.lift_helpers.insert(id, name); + // Collect child types first to avoid borrow conflicts. + let children: Vec = match &self.resolve.types[id].kind { + TypeDefKind::Record(r) => r.fields.iter().map(|f| f.ty).collect(), + TypeDefKind::Variant(v) => v.cases.iter().filter_map(|c| c.ty).collect(), + _ => Vec::new(), + }; + for child in children { + self.register_lift_helpers(&child); + } + } + TypeDefKind::List(t) | TypeDefKind::Option(t) | TypeDefKind::FixedLengthList(t, _) => { + let t = *t; + self.register_lift_helpers(&t); + } + TypeDefKind::Tuple(tuple) => { + let tys: Vec = tuple.types.clone(); + for t in tys { + self.register_lift_helpers(&t); + } + } + TypeDefKind::Result(r) => { + let ok = r.ok; + let err = r.err; + if let Some(t) = ok { + self.register_lift_helpers(&t); + } + if let Some(t) = err { + self.register_lift_helpers(&t); + } + } + TypeDefKind::Map(k, v) => { + let k = *k; + let v = *v; + self.register_lift_helpers(&k); + self.register_lift_helpers(&v); + } + _ => {} + } + } + + /// Generates the body of every registered lift helper into + /// `lift_helper_bodies`. + fn generate_lift_helper_bodies(&mut self) { + let ids: Vec = self.lift_helpers.keys().copied().collect(); + let resolve = self.resolve; + let module = self.wasm_import_module; + for id in ids { + let name = self.lift_helpers[&id].clone(); + let ret_ty = self.type_path(id, true); + let ty = Type::Id(id); + + let mut f = FunctionBindgen::new(self, Vec::new(), module, true, false); + let expr = abi::lift_from_memory_root(resolve, &mut f, "ptr".to_string(), &ty, id); + let body = String::from(mem::take(&mut f.src)); + + // A pure memory-lift of a record/variant must not require any of the + // side state that `generate_guest_import_body_sync` would otherwise + // flush into the surrounding function. These are dropped here, so + // assert the assumption holds rather than silently emitting broken + // code if a future lift path starts depending on them. + assert!( + !f.needs_cleanup_list, + "outlined lift helper unexpectedly requires a cleanup list" + ); + assert!( + f.import_return_pointer_area_size.is_empty(), + "outlined lift helper unexpectedly requires a return pointer area" + ); + assert!( + f.handle_decls.is_empty(), + "outlined lift helper unexpectedly produced handle declarations" + ); + + uwriteln!( + self.lift_helper_bodies, + "#[allow(dead_code, unused_unsafe, clippy::all)]\n\ + unsafe fn {name}(ptr: *mut u8) -> {ret_ty} {{\n\ + unsafe {{\n\ + {body}\n\ + {expr}\n\ + }}\n\ + }}" + ); + } + } + pub fn align_area(&mut self, alignment: Alignment) { match alignment { Alignment::Pointer => uwriteln!( @@ -602,7 +749,12 @@ macro_rules! {macro_name} {{ let lower; let dealloc_lists; if let Some(payload_type) = payload_type { - lift = self.lift_from_memory("ptr", &payload_type, &module); + // This `lift` snippet is emitted inside a nested `pub mod vtable{N}` + // that lives in a separate module tree from the interface's lift + // helpers, so outlining (which emits unqualified helper calls) would + // not resolve. Lift inline instead; this payload `lift` is already an + // isolated single-value function. + lift = self.lift_from_memory_no_outline("ptr", &payload_type, &module); dealloc_lists = self.deallocate_lists( std::slice::from_ref(payload_type), &["ptr".to_string()], @@ -798,7 +950,26 @@ pub mod vtable{ordinal} {{ } fn lift_from_memory(&mut self, address: &str, ty: &Type, module: &str) -> String { + self.lift_from_memory_inner(address, ty, module, true) + } + + /// Like [`Self::lift_from_memory`], but never outlines into shared lift + /// helpers. Used for snippets emitted into a nested submodule (e.g. a + /// `future`/`stream` payload vtable), where the unqualified helper calls + /// would not resolve because the helpers live in a different module. + fn lift_from_memory_no_outline(&mut self, address: &str, ty: &Type, module: &str) -> String { + self.lift_from_memory_inner(address, ty, module, false) + } + + fn lift_from_memory_inner( + &mut self, + address: &str, + ty: &Type, + module: &str, + outline_lifts: bool, + ) -> String { let mut f = FunctionBindgen::new(self, Vec::new(), module, true, false); + f.outline_lifts = outline_lifts; let result = abi::lift_from_memory(f.r#gen.resolve, &mut f, address.into(), ty); format!("unsafe {{ {}\n{result} }}", String::from(f.src)) } diff --git a/crates/rust/src/lib.rs b/crates/rust/src/lib.rs index 7241f2d09..4a1bd8cbc 100644 --- a/crates/rust/src/lib.rs +++ b/crates/rust/src/lib.rs @@ -380,6 +380,8 @@ impl RustWasm { return_pointer_area_align: Default::default(), needs_runtime_module: false, needs_wit_map: false, + lift_helpers: Default::default(), + lift_helper_bodies: Default::default(), } }