Skip to content

Commit 8a25c4d

Browse files
committed
Adjust allocation strategy for TinyGo-derived modules so they don't immediately crash.
You can run the TinyGo empty project in Viceroy now; it immediately crashed before. It serves a request and returns a 0 exit code, somewhat obscured by a backtrace because an empty project doesn't implement a Reactor.
1 parent 6ed1788 commit 8a25c4d

4 files changed

Lines changed: 164 additions & 48 deletions

File tree

crates/wasmparser/src/readers/core/custom.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ impl<'a> CustomSectionReader<'a> {
103103
/// Return value of [`CustomSectionReader::as_known`].
104104
///
105105
/// Note that this is `#[non_exhaustive]` because depending on crate features
106-
/// this enumeration will different entries.
106+
/// this enumeration will contain different entries.
107107
#[allow(missing_docs)]
108108
#[non_exhaustive]
109109
pub enum KnownCustom<'a> {

crates/wit-component/src/encoding.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@ use std::collections::HashMap;
8181
use std::hash::Hash;
8282
use std::mem;
8383
use wasm_encoder::*;
84+
use wasmparser::KnownCustom::Producers;
85+
use wasmparser::Payload::CustomSection;
8486
use wasmparser::{Validator, WasmFeatures};
8587
use wit_parser::{
8688
Function, FunctionKind, InterfaceId, LiveTypes, Resolve, Stability, Type, TypeDefKind, TypeId,
@@ -2670,11 +2672,34 @@ pub(super) struct Adapter {
26702672
library_info: Option<LibraryInfo>,
26712673
}
26722674

2675+
/// Returns whether the module appears to have been generated by TinyGo. Absent
2676+
/// any evidence, returns false.
2677+
fn is_produced_by_tiny_go(module: &[u8]) -> Result<bool> {
2678+
for section in wasmparser::Parser::new(0).parse_all(module) {
2679+
if let CustomSection(custom_reader) = section? {
2680+
if let Producers(producers_reader) = custom_reader.as_known() {
2681+
for field in producers_reader {
2682+
let field = field?;
2683+
if field.name == "processed-by" {
2684+
for value in field.values.into_iter() {
2685+
if value?.name == "TinyGo" {
2686+
return Ok(true);
2687+
}
2688+
}
2689+
}
2690+
}
2691+
return Ok(false);
2692+
}
2693+
}
2694+
}
2695+
return Ok(false);
2696+
}
26732697
/// An encoder of components based on `wit` interface definitions.
26742698
#[derive(Default)]
26752699
pub struct ComponentEncoder {
26762700
module: Vec<u8>,
26772701
module_import_map: Option<ModuleImportMap>,
2702+
module_is_produced_by_tiny_go: bool,
26782703
pub(super) metadata: Bindgen,
26792704
validate: bool,
26802705
pub(super) main_module_exports: IndexSet<WorldKey>,
@@ -2693,6 +2718,7 @@ impl ComponentEncoder {
26932718
/// core module.
26942719
pub fn module(mut self, module: &[u8]) -> Result<Self> {
26952720
let (wasm, metadata) = self.decode(module.as_ref())?;
2721+
self.module_is_produced_by_tiny_go = is_produced_by_tiny_go(&module)?;
26962722
let (wasm, module_import_map) = ModuleImportMap::new(wasm)?;
26972723
let exports = self
26982724
.merge_metadata(metadata)

crates/wit-component/src/encoding/world.rs

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use super::{Adapter, ComponentEncoder, LibraryInfo, RequiredOptions};
2+
use crate::gc::ReallocScheme;
23
use crate::validation::{
34
Import, ImportMap, ValidatedModule, validate_adapter_module, validate_module,
45
};
@@ -24,7 +25,7 @@ pub struct WorldAdapter<'a> {
2425
/// `EncodingState` as this information doesn't change throughout the encoding
2526
/// process.
2627
pub struct ComponentWorld<'a> {
27-
/// Encoder configuration with modules, the document ,etc.
28+
/// Encoder configuration with modules, the document, etc.
2829
pub encoder: &'a ComponentEncoder,
2930
/// Validation information of the input module, or `None` in `--types-only`
3031
/// mode.
@@ -83,6 +84,23 @@ impl<'a> ComponentWorld<'a> {
8384
Ok(ret)
8485
}
8586

87+
/// Given that there is no realloc function exported from the main module,
88+
/// returns the realloc scheme we should use.
89+
fn fallback_realloc_scheme(&self) -> ReallocScheme {
90+
if self.encoder.module_is_produced_by_tiny_go {
91+
// TODO: Also check for cabi_realloc and bail if it's there.
92+
// If it appears the module was emitted by TinyGo, we delegate to
93+
// its `malloc()` function. (TinyGo assumes its GC has rein over the
94+
// whole memory and quickly overwrites the adapter's
95+
// `memory.grow`-allocated State struct unless we inform TinyGo's GC
96+
// of the memory we use.)
97+
ReallocScheme::Malloc("malloc")
98+
} else {
99+
// If it's not TinyGo, use `memory.grow` instead.
100+
ReallocScheme::MemoryGrow
101+
}
102+
}
103+
86104
/// Process adapters which are required here. Iterate over all
87105
/// adapters and figure out what functions are required from the
88106
/// adapter itself, either because the functions are imported by the
@@ -121,8 +139,8 @@ impl<'a> ComponentWorld<'a> {
121139
} else {
122140
// Without `library_info` this means that this is an adapter.
123141
// The goal of the adapter is to provide a suite of symbols that
124-
// can be imported, but not all symbols may be imported. Here
125-
// the module is trimmed down to only what's needed by the
142+
// can be imported, but perhaps not all symbols are imported.
143+
// Here the module is trimmed down to only what's needed by the
126144
// original main module.
127145
//
128146
// The main module requires `required_by_import` above, but
@@ -163,17 +181,17 @@ impl<'a> ComponentWorld<'a> {
163181
required.insert(name.to_string());
164182
}
165183

184+
let realloc = if self.encoder.realloc_via_memory_grow {
185+
self.fallback_realloc_scheme()
186+
} else {
187+
match self.info.exports.realloc_to_import_into_adapter() {
188+
Some(name) => ReallocScheme::Realloc(name),
189+
None => self.fallback_realloc_scheme(),
190+
}
191+
};
166192
Cow::Owned(
167-
crate::gc::run(
168-
wasm,
169-
&required,
170-
if self.encoder.realloc_via_memory_grow {
171-
None
172-
} else {
173-
self.info.exports.realloc_to_import_into_adapter()
174-
},
175-
)
176-
.context("failed to reduce input adapter module to its minimal size")?,
193+
crate::gc::run(wasm, &required, realloc)
194+
.context("failed to reduce input adapter module to its minimal size")?,
177195
)
178196
};
179197
let info = validate_adapter_module(

crates/wit-component/src/gc.rs

Lines changed: 106 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,30 @@ use wasmparser::*;
1313

1414
const PAGE_SIZE: i32 = 64 * 1024;
1515

16-
/// This function will reduce the input core `wasm` module to only the set of
16+
/// The primitive allocation method we use to implement a realloc function,
17+
/// based on what the main module provides
18+
pub enum ReallocScheme<'a> {
19+
/// The main module exports a ready-made realloc function with the given
20+
/// name.
21+
Realloc(&'a str),
22+
/// The main module exports a `malloc(size: u32) -> u32` function with the
23+
/// given name. We can wrap this in a thin shell to make realloc.
24+
Malloc(&'a str),
25+
/// The main module exports no realloc function or anything to make one out
26+
/// of, so we should implement realloc in terms of `memory.grow`.
27+
MemoryGrow,
28+
}
29+
use ReallocScheme::*;
30+
31+
/// This function will reduce the input core `wasm` module (the adapter, I believe) to only the set of
1732
/// exports `required`.
1833
///
1934
/// This internally performs a "gc" pass after removing exports to ensure that
2035
/// the resulting module imports the minimal set of functions necessary.
2136
pub fn run(
2237
wasm: &[u8],
2338
required: &IndexSet<String>,
24-
main_module_realloc: Option<&str>,
39+
realloc_scheme: ReallocScheme,
2540
) -> Result<Vec<u8>> {
2641
assert!(!required.is_empty());
2742

@@ -46,7 +61,7 @@ pub fn run(
4661
}
4762
assert!(!module.exports.is_empty());
4863
module.liveness()?;
49-
module.encode(main_module_realloc)
64+
module.encode(realloc_scheme)
5065
}
5166

5267
/// This function generates a Wasm function body which implements `cabi_realloc` in terms of `memory.grow`. It
@@ -500,9 +515,21 @@ impl<'a> Module<'a> {
500515
live_iter(&self.live_tables, self.tables.iter())
501516
}
502517

518+
/// Returns a new Wasm function body which implements `cabi_realloc` to call
519+
/// a `malloc` that's exported from the main module. Pass in the index of
520+
/// the `malloc` function within the adapter.
521+
fn realloc_via_malloc(&self, malloc_index: u32) -> Result<wasm_encoder::Function> {
522+
let mut func = wasm_encoder::Function::new([]);
523+
func.instructions()
524+
.local_get(3) // desired new size
525+
.call(malloc_index)
526+
.end();
527+
Ok(func)
528+
}
529+
503530
/// Encodes this `Module` to a new wasm module which is gc'd and only
504531
/// contains the items that are live as calculated by the `liveness` pass.
505-
fn encode(&mut self, main_module_realloc: Option<&str>) -> Result<Vec<u8>> {
532+
fn encode(&mut self, realloc_scheme: ReallocScheme) -> Result<Vec<u8>> {
506533
// Data structure used to track the mapping of old index to new index
507534
// for all live items.
508535
let mut map = Encoder::default();
@@ -584,11 +611,38 @@ impl<'a> Module<'a> {
584611
let is_realloc =
585612
|m, n| m == "__main_module__" && matches!(n, "canonical_abi_realloc" | "cabi_realloc");
586613

614+
let add_malloc_type = |types: &mut wasm_encoder::TypeSection| {
615+
let type_index = types.len();
616+
types
617+
.ty()
618+
.function([wasm_encoder::ValType::I32], [wasm_encoder::ValType::I32]);
619+
type_index
620+
};
621+
622+
let mut malloc_index = None;
623+
let mut func_names = Vec::new();
624+
625+
// Import malloc before we start keeping track of the realloc_index so
626+
// we don't have to add num_func_imports every time we read it.
627+
if let Malloc(malloc_name) = realloc_scheme {
628+
imports.import(
629+
"__main_module__",
630+
malloc_name,
631+
EntityType::Function(add_malloc_type(&mut types)),
632+
);
633+
malloc_index = Some(num_func_imports);
634+
map.funcs.reserve();
635+
func_names.push((num_func_imports, malloc_name));
636+
num_func_imports += 1;
637+
}
638+
587639
let (imported, local) =
588640
self.live_funcs()
589641
.partition::<Vec<_>, _>(|(_, func)| match &func.def {
590642
Definition::Import(m, n) => {
591-
!is_realloc(*m, *n) || main_module_realloc.is_some()
643+
// Keep realloc function around iff we're going to use
644+
// it. Always keep other functions around.
645+
!is_realloc(*m, *n) || matches!(realloc_scheme, Realloc(_))
592646
}
593647
Definition::Local(_) => false,
594648
});
@@ -603,7 +657,10 @@ impl<'a> Module<'a> {
603657
// exports that function, but possibly using a different name
604658
// (e.g. `canonical_abi_realloc`). Update the name to match if necessary.
605659
realloc_index = Some(num_func_imports);
606-
main_module_realloc.unwrap_or(n)
660+
match realloc_scheme {
661+
Realloc(name) => name,
662+
_ => n,
663+
}
607664
} else {
608665
n
609666
};
@@ -637,22 +694,17 @@ impl<'a> Module<'a> {
637694
let sp = self.find_mut_i32_global("__stack_pointer")?;
638695
let allocation_state = self.find_mut_i32_global("allocation_state")?;
639696

640-
let mut func_names = Vec::new();
641-
642-
if let (Some(realloc), Some(_), None) = (main_module_realloc, sp, realloc_index) {
697+
if let (Realloc(realloc_name), Some(_), None) = (&realloc_scheme, sp, realloc_index) {
643698
// The main module exports a realloc function, and although the adapter doesn't import it, we're going
644699
// to add a function which calls it to allocate some stack space, so let's add an import now.
645-
646-
// Tell the function remapper we're reserving a slot for our extra import:
647-
map.funcs.next += 1;
648-
700+
map.funcs.reserve();
649701
realloc_index = Some(num_func_imports);
650702
imports.import(
651703
"__main_module__",
652-
realloc,
704+
realloc_name,
653705
EntityType::Function(add_realloc_type(&mut types)),
654706
);
655-
func_names.push((num_func_imports, realloc));
707+
func_names.push((num_func_imports, realloc_name));
656708
num_func_imports += 1;
657709
}
658710

@@ -665,30 +717,35 @@ impl<'a> Module<'a> {
665717
// exporting it. In this case, we need to define a local function it can call instead.
666718
realloc_index = Some(num_func_imports + funcs.len());
667719
funcs.function(ty);
668-
code.function(&realloc_via_memory_grow());
720+
let realloc_func = match realloc_scheme {
721+
Malloc(_) => self.realloc_via_malloc(
722+
malloc_index.expect("this was set above when the enum was Malloc"),
723+
)?,
724+
MemoryGrow => realloc_via_memory_grow(),
725+
Realloc(_) => bail!(
726+
"shouldn't get here, as we already know the main module doesn't export a realloc function"
727+
),
728+
};
729+
code.function(&realloc_func);
669730
}
670731
Definition::Local(_) => {
671732
funcs.function(ty);
672733
}
673734
}
674735
}
675736

676-
let lazy_stack_init_index =
677-
if sp.is_some() && allocation_state.is_some() && main_module_realloc.is_some() {
678-
// We have a stack pointer, a `cabi_realloc` function from the main module, and a global variable for
737+
let lazy_stack_init_index = match (&realloc_scheme, sp, allocation_state) {
738+
(Realloc(_), Some(_), Some(_)) => {
739+
// We have a `cabi_realloc` function from the main module, a stack pointer, and a global variable for
679740
// keeping track of (and short-circuiting) reentrance. That means we can (and should) do lazy stack
680741
// allocation.
681742
let index = num_func_imports + funcs.len();
682-
683-
// Tell the function remapper we're reserving a slot for our extra function:
684-
map.funcs.next += 1;
685-
743+
map.funcs.reserve();
686744
funcs.function(add_empty_type(&mut types));
687-
688745
Some(index)
689-
} else {
690-
None
691-
};
746+
}
747+
_ => None,
748+
};
692749

693750
let exported_funcs = self
694751
.exports
@@ -733,10 +790,16 @@ impl<'a> Module<'a> {
733790

734791
if sp.is_some() && (realloc_index.is_none() || allocation_state.is_none()) {
735792
// Either the main module does _not_ export a realloc function, or it is not safe to use for stack
736-
// allocation because we have no way to short-circuit reentrance, so we'll use `memory.grow` instead.
793+
// allocation because we have no way to short-circuit reentrance, so we'll provide our own realloc
794+
// function instead.
737795
realloc_index = Some(num_func_imports + funcs.len());
738796
funcs.function(add_realloc_type(&mut types));
739-
code.function(&realloc_via_memory_grow());
797+
let realloc_func = if let Some(index) = malloc_index {
798+
self.realloc_via_malloc(index)?
799+
} else {
800+
realloc_via_memory_grow()
801+
};
802+
code.function(&realloc_func);
740803
}
741804

742805
// Inject a start function to initialize the stack pointer which will be local to this module. This only
@@ -879,11 +942,12 @@ impl<'a> Module<'a> {
879942
section.push(code);
880943
subsection.encode(&mut section);
881944
};
882-
if let (Some(realloc_index), true) = (
883-
realloc_index,
884-
main_module_realloc.is_none() || allocation_state.is_none(),
885-
) {
886-
func_names.push((realloc_index, "realloc_via_memory_grow"));
945+
if let Some(realloc_index) = realloc_index {
946+
match realloc_scheme {
947+
Malloc(_) => func_names.push((realloc_index, "realloc_via_malloc")),
948+
MemoryGrow => func_names.push((realloc_index, "realloc_via_memory_grow")),
949+
Realloc(_) => (), // The realloc routine is in another module.
950+
}
887951
}
888952
if let Some(lazy_stack_init_index) = lazy_stack_init_index {
889953
func_names.push((lazy_stack_init_index, "allocate_stack"));
@@ -1118,6 +1182,14 @@ impl Remap {
11181182
self.next += 1;
11191183
}
11201184

1185+
/// Reserves the next "new index" for an item you are adding.
1186+
///
1187+
/// For example, call this when you add a new function to a module and need
1188+
/// to avoid other functions getting remapped to its index.
1189+
fn reserve(&mut self) {
1190+
self.next += 1;
1191+
}
1192+
11211193
/// Returns the new index corresponding to an old index.
11221194
///
11231195
/// Panics if the `old` index was not added via `push` above.

0 commit comments

Comments
 (0)