Skip to content

Commit a5eb511

Browse files
committed
Adjust allocation strategy for TinyGo-derived modules so they don't immediately crash.
You can run the TinyGo empty project in Viceroy now (once Viceroy is updated to depend on the new wit-component herein); it immediately crashed before. It serves a request and returns a 0 exit code, somewhat obscured by a backtrace because an empty project doesn't implement a Reactor. The crux of this update is that the GC phase of adapter application now takes a `ReallocScheme` arg to its `encode()` method. This represents slightly richer advice on how to find or construct a `realloc()` function for the adapter to use to allocate its state. Before, it took only an `Option`: `None` meant "use `memory.grow`", and `Some(such_and_such)` meant "use a realloc function called `such_and_such`, provided in the module being adapted". Now we can also say "construct a realloc routine using the given malloc routine found in the module being adapted". This lets us communicate to TinyGo's GC that we have reserved some RAM, so it doesn't stomp on us later.
1 parent 6ed1788 commit a5eb511

4 files changed

Lines changed: 164 additions & 48 deletions

File tree

crates/wasmparser/src/readers/core/custom.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ impl<'a> CustomSectionReader<'a> {
103103
/// Return value of [`CustomSectionReader::as_known`].
104104
///
105105
/// Note that this is `#[non_exhaustive]` because depending on crate features
106-
/// this enumeration will different entries.
106+
/// this enumeration will contain different entries.
107107
#[allow(missing_docs)]
108108
#[non_exhaustive]
109109
pub enum KnownCustom<'a> {

crates/wit-component/src/encoding.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@ use std::collections::HashMap;
8181
use std::hash::Hash;
8282
use std::mem;
8383
use wasm_encoder::*;
84+
use wasmparser::KnownCustom::Producers;
85+
use wasmparser::Payload::CustomSection;
8486
use wasmparser::{Validator, WasmFeatures};
8587
use wit_parser::{
8688
Function, FunctionKind, InterfaceId, LiveTypes, Resolve, Stability, Type, TypeDefKind, TypeId,
@@ -2670,11 +2672,34 @@ pub(super) struct Adapter {
26702672
library_info: Option<LibraryInfo>,
26712673
}
26722674

2675+
/// Returns whether the module appears to have been generated by TinyGo. Absent
2676+
/// any evidence, returns false.
2677+
fn is_produced_by_tiny_go(module: &[u8]) -> Result<bool> {
2678+
for section in wasmparser::Parser::new(0).parse_all(module) {
2679+
if let CustomSection(custom_reader) = section? {
2680+
if let Producers(producers_reader) = custom_reader.as_known() {
2681+
for field in producers_reader {
2682+
let field = field?;
2683+
if field.name == "processed-by" {
2684+
for value in field.values.into_iter() {
2685+
if value?.name == "TinyGo" {
2686+
return Ok(true);
2687+
}
2688+
}
2689+
}
2690+
}
2691+
return Ok(false);
2692+
}
2693+
}
2694+
}
2695+
return Ok(false);
2696+
}
26732697
/// An encoder of components based on `wit` interface definitions.
26742698
#[derive(Default)]
26752699
pub struct ComponentEncoder {
26762700
module: Vec<u8>,
26772701
module_import_map: Option<ModuleImportMap>,
2702+
module_is_produced_by_tiny_go: bool,
26782703
pub(super) metadata: Bindgen,
26792704
validate: bool,
26802705
pub(super) main_module_exports: IndexSet<WorldKey>,
@@ -2693,6 +2718,7 @@ impl ComponentEncoder {
26932718
/// core module.
26942719
pub fn module(mut self, module: &[u8]) -> Result<Self> {
26952720
let (wasm, metadata) = self.decode(module.as_ref())?;
2721+
self.module_is_produced_by_tiny_go = is_produced_by_tiny_go(&module)?;
26962722
let (wasm, module_import_map) = ModuleImportMap::new(wasm)?;
26972723
let exports = self
26982724
.merge_metadata(metadata)

crates/wit-component/src/encoding/world.rs

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use super::{Adapter, ComponentEncoder, LibraryInfo, RequiredOptions};
2+
use crate::gc::ReallocScheme;
23
use crate::validation::{
34
Import, ImportMap, ValidatedModule, validate_adapter_module, validate_module,
45
};
@@ -24,7 +25,7 @@ pub struct WorldAdapter<'a> {
2425
/// `EncodingState` as this information doesn't change throughout the encoding
2526
/// process.
2627
pub struct ComponentWorld<'a> {
27-
/// Encoder configuration with modules, the document ,etc.
28+
/// Encoder configuration with modules, the document, etc.
2829
pub encoder: &'a ComponentEncoder,
2930
/// Validation information of the input module, or `None` in `--types-only`
3031
/// mode.
@@ -83,6 +84,23 @@ impl<'a> ComponentWorld<'a> {
8384
Ok(ret)
8485
}
8586

87+
/// Given that there is no realloc function exported from the main module,
88+
/// returns the realloc scheme we should use.
89+
fn fallback_realloc_scheme(&self) -> ReallocScheme {
90+
if self.encoder.module_is_produced_by_tiny_go {
91+
// TODO: Also check for cabi_realloc and bail if it's there.
92+
// If it appears the module was emitted by TinyGo, we delegate to
93+
// its `malloc()` function. (TinyGo assumes its GC has rein over the
94+
// whole memory and quickly overwrites the adapter's
95+
// `memory.grow`-allocated State struct unless we inform TinyGo's GC
96+
// of the memory we use.)
97+
ReallocScheme::Malloc("malloc")
98+
} else {
99+
// If it's not TinyGo, use `memory.grow` instead.
100+
ReallocScheme::MemoryGrow
101+
}
102+
}
103+
86104
/// Process adapters which are required here. Iterate over all
87105
/// adapters and figure out what functions are required from the
88106
/// adapter itself, either because the functions are imported by the
@@ -121,8 +139,8 @@ impl<'a> ComponentWorld<'a> {
121139
} else {
122140
// Without `library_info` this means that this is an adapter.
123141
// The goal of the adapter is to provide a suite of symbols that
124-
// can be imported, but not all symbols may be imported. Here
125-
// the module is trimmed down to only what's needed by the
142+
// can be imported, but perhaps not all symbols are imported.
143+
// Here the module is trimmed down to only what's needed by the
126144
// original main module.
127145
//
128146
// The main module requires `required_by_import` above, but
@@ -163,17 +181,17 @@ impl<'a> ComponentWorld<'a> {
163181
required.insert(name.to_string());
164182
}
165183

184+
let realloc = if self.encoder.realloc_via_memory_grow {
185+
self.fallback_realloc_scheme()
186+
} else {
187+
match self.info.exports.realloc_to_import_into_adapter() {
188+
Some(name) => ReallocScheme::Realloc(name),
189+
None => self.fallback_realloc_scheme(),
190+
}
191+
};
166192
Cow::Owned(
167-
crate::gc::run(
168-
wasm,
169-
&required,
170-
if self.encoder.realloc_via_memory_grow {
171-
None
172-
} else {
173-
self.info.exports.realloc_to_import_into_adapter()
174-
},
175-
)
176-
.context("failed to reduce input adapter module to its minimal size")?,
193+
crate::gc::run(wasm, &required, realloc)
194+
.context("failed to reduce input adapter module to its minimal size")?,
177195
)
178196
};
179197
let info = validate_adapter_module(

crates/wit-component/src/gc.rs

Lines changed: 106 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,30 @@ use wasmparser::*;
1313

1414
const PAGE_SIZE: i32 = 64 * 1024;
1515

16-
/// This function will reduce the input core `wasm` module to only the set of
16+
/// The primitive allocation method we use to implement a realloc function,
17+
/// based on what the main module provides
18+
pub enum ReallocScheme<'a> {
19+
/// The main module exports a ready-made realloc function with the given
20+
/// name.
21+
Realloc(&'a str),
22+
/// The main module exports a `malloc(size: u32) -> u32` function with the
23+
/// given name. We can wrap this in a thin shell to make realloc.
24+
Malloc(&'a str),
25+
/// The main module exports no realloc function or anything to make one out
26+
/// of, so we should implement realloc in terms of `memory.grow`.
27+
MemoryGrow,
28+
}
29+
use ReallocScheme::*;
30+
31+
/// This function will reduce the input core `wasm` module (the adapter, I believe) to only the set of
1732
/// exports `required`.
1833
///
1934
/// This internally performs a "gc" pass after removing exports to ensure that
2035
/// the resulting module imports the minimal set of functions necessary.
2136
pub fn run(
2237
wasm: &[u8],
2338
required: &IndexSet<String>,
24-
main_module_realloc: Option<&str>,
39+
realloc_scheme: ReallocScheme,
2540
) -> Result<Vec<u8>> {
2641
assert!(!required.is_empty());
2742

@@ -46,7 +61,7 @@ pub fn run(
4661
}
4762
assert!(!module.exports.is_empty());
4863
module.liveness()?;
49-
module.encode(main_module_realloc)
64+
module.encode(realloc_scheme)
5065
}
5166

5267
/// This function generates a Wasm function body which implements `cabi_realloc` in terms of `memory.grow`. It
@@ -500,9 +515,21 @@ impl<'a> Module<'a> {
500515
live_iter(&self.live_tables, self.tables.iter())
501516
}
502517

518+
/// Returns a new Wasm function body which implements `cabi_realloc` to call
519+
/// a `malloc` that's exported from the main module. Pass in the index of
520+
/// the `malloc` function within the adapter.
521+
fn realloc_via_malloc(&self, malloc_index: u32) -> Result<wasm_encoder::Function> {
522+
let mut func = wasm_encoder::Function::new([]);
523+
func.instructions()
524+
.local_get(3) // desired new size
525+
.call(malloc_index)
526+
.end();
527+
Ok(func)
528+
}
529+
503530
/// Encodes this `Module` to a new wasm module which is gc'd and only
504531
/// contains the items that are live as calculated by the `liveness` pass.
505-
fn encode(&mut self, main_module_realloc: Option<&str>) -> Result<Vec<u8>> {
532+
fn encode(&mut self, realloc_scheme: ReallocScheme) -> Result<Vec<u8>> {
506533
// Data structure used to track the mapping of old index to new index
507534
// for all live items.
508535
let mut map = Encoder::default();
@@ -584,11 +611,38 @@ impl<'a> Module<'a> {
584611
let is_realloc =
585612
|m, n| m == "__main_module__" && matches!(n, "canonical_abi_realloc" | "cabi_realloc");
586613

614+
let add_malloc_type = |types: &mut wasm_encoder::TypeSection| {
615+
let type_index = types.len();
616+
types
617+
.ty()
618+
.function([wasm_encoder::ValType::I32], [wasm_encoder::ValType::I32]);
619+
type_index
620+
};
621+
622+
let mut malloc_index = None;
623+
let mut func_names = Vec::new();
624+
625+
// Import malloc before we start keeping track of the realloc_index so
626+
// we don't have to add num_func_imports every time we read it.
627+
if let Malloc(malloc_name) = realloc_scheme {
628+
imports.import(
629+
"__main_module__",
630+
malloc_name,
631+
EntityType::Function(add_malloc_type(&mut types)),
632+
);
633+
malloc_index = Some(num_func_imports);
634+
map.funcs.reserve();
635+
func_names.push((num_func_imports, malloc_name));
636+
num_func_imports += 1;
637+
}
638+
587639
let (imported, local) =
588640
self.live_funcs()
589641
.partition::<Vec<_>, _>(|(_, func)| match &func.def {
590642
Definition::Import(m, n) => {
591-
!is_realloc(*m, *n) || main_module_realloc.is_some()
643+
// Keep realloc function around iff we're going to use
644+
// it. Always keep other functions around.
645+
!is_realloc(*m, *n) || matches!(realloc_scheme, Realloc(_))
592646
}
593647
Definition::Local(_) => false,
594648
});
@@ -603,7 +657,10 @@ impl<'a> Module<'a> {
603657
// exports that function, but possibly using a different name
604658
// (e.g. `canonical_abi_realloc`). Update the name to match if necessary.
605659
realloc_index = Some(num_func_imports);
606-
main_module_realloc.unwrap_or(n)
660+
match realloc_scheme {
661+
Realloc(name) => name,
662+
_ => n,
663+
}
607664
} else {
608665
n
609666
};
@@ -637,22 +694,17 @@ impl<'a> Module<'a> {
637694
let sp = self.find_mut_i32_global("__stack_pointer")?;
638695
let allocation_state = self.find_mut_i32_global("allocation_state")?;
639696

640-
let mut func_names = Vec::new();
641-
642-
if let (Some(realloc), Some(_), None) = (main_module_realloc, sp, realloc_index) {
697+
if let (Realloc(realloc_name), Some(_), None) = (&realloc_scheme, sp, realloc_index) {
643698
// The main module exports a realloc function, and although the adapter doesn't import it, we're going
644699
// to add a function which calls it to allocate some stack space, so let's add an import now.
645-
646-
// Tell the function remapper we're reserving a slot for our extra import:
647-
map.funcs.next += 1;
648-
700+
map.funcs.reserve();
649701
realloc_index = Some(num_func_imports);
650702
imports.import(
651703
"__main_module__",
652-
realloc,
704+
realloc_name,
653705
EntityType::Function(add_realloc_type(&mut types)),
654706
);
655-
func_names.push((num_func_imports, realloc));
707+
func_names.push((num_func_imports, realloc_name));
656708
num_func_imports += 1;
657709
}
658710

@@ -665,30 +717,35 @@ impl<'a> Module<'a> {
665717
// exporting it. In this case, we need to define a local function it can call instead.
666718
realloc_index = Some(num_func_imports + funcs.len());
667719
funcs.function(ty);
668-
code.function(&realloc_via_memory_grow());
720+
let realloc_func = match realloc_scheme {
721+
Malloc(_) => self.realloc_via_malloc(
722+
malloc_index.expect("this was set above when the enum was Malloc"),
723+
)?,
724+
MemoryGrow => realloc_via_memory_grow(),
725+
Realloc(_) => bail!(
726+
"shouldn't get here, as we already know the main module doesn't export a realloc function"
727+
),
728+
};
729+
code.function(&realloc_func);
669730
}
670731
Definition::Local(_) => {
671732
funcs.function(ty);
672733
}
673734
}
674735
}
675736

676-
let lazy_stack_init_index =
677-
if sp.is_some() && allocation_state.is_some() && main_module_realloc.is_some() {
678-
// We have a stack pointer, a `cabi_realloc` function from the main module, and a global variable for
737+
let lazy_stack_init_index = match (&realloc_scheme, sp, allocation_state) {
738+
(Realloc(_), Some(_), Some(_)) => {
739+
// We have a `cabi_realloc` function from the main module, a stack pointer, and a global variable for
679740
// keeping track of (and short-circuiting) reentrance. That means we can (and should) do lazy stack
680741
// allocation.
681742
let index = num_func_imports + funcs.len();
682-
683-
// Tell the function remapper we're reserving a slot for our extra function:
684-
map.funcs.next += 1;
685-
743+
map.funcs.reserve();
686744
funcs.function(add_empty_type(&mut types));
687-
688745
Some(index)
689-
} else {
690-
None
691-
};
746+
}
747+
_ => None,
748+
};
692749

693750
let exported_funcs = self
694751
.exports
@@ -733,10 +790,16 @@ impl<'a> Module<'a> {
733790

734791
if sp.is_some() && (realloc_index.is_none() || allocation_state.is_none()) {
735792
// Either the main module does _not_ export a realloc function, or it is not safe to use for stack
736-
// allocation because we have no way to short-circuit reentrance, so we'll use `memory.grow` instead.
793+
// allocation because we have no way to short-circuit reentrance, so we'll provide our own realloc
794+
// function instead.
737795
realloc_index = Some(num_func_imports + funcs.len());
738796
funcs.function(add_realloc_type(&mut types));
739-
code.function(&realloc_via_memory_grow());
797+
let realloc_func = if let Some(index) = malloc_index {
798+
self.realloc_via_malloc(index)?
799+
} else {
800+
realloc_via_memory_grow()
801+
};
802+
code.function(&realloc_func);
740803
}
741804

742805
// Inject a start function to initialize the stack pointer which will be local to this module. This only
@@ -879,11 +942,12 @@ impl<'a> Module<'a> {
879942
section.push(code);
880943
subsection.encode(&mut section);
881944
};
882-
if let (Some(realloc_index), true) = (
883-
realloc_index,
884-
main_module_realloc.is_none() || allocation_state.is_none(),
885-
) {
886-
func_names.push((realloc_index, "realloc_via_memory_grow"));
945+
if let Some(realloc_index) = realloc_index {
946+
match realloc_scheme {
947+
Malloc(_) => func_names.push((realloc_index, "realloc_via_malloc")),
948+
MemoryGrow => func_names.push((realloc_index, "realloc_via_memory_grow")),
949+
Realloc(_) => (), // The realloc routine is in another module.
950+
}
887951
}
888952
if let Some(lazy_stack_init_index) = lazy_stack_init_index {
889953
func_names.push((lazy_stack_init_index, "allocate_stack"));
@@ -1118,6 +1182,14 @@ impl Remap {
11181182
self.next += 1;
11191183
}
11201184

1185+
/// Reserves the next "new index" for an item you are adding.
1186+
///
1187+
/// For example, call this when you add a new function to a module and need
1188+
/// to avoid other functions getting remapped to its index.
1189+
fn reserve(&mut self) {
1190+
self.next += 1;
1191+
}
1192+
11211193
/// Returns the new index corresponding to an old index.
11221194
///
11231195
/// Panics if the `old` index was not added via `push` above.

0 commit comments

Comments
 (0)