Skip to content

Commit 40bd20b

Browse files
feat: multithreaded wasm parser
Signed-off-by: Henry <mail@henrygressmann.de>
1 parent cee820e commit 40bd20b

12 files changed

Lines changed: 595 additions & 106 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/parser/Cargo.toml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ log={workspace=true, optional=true}
1616
tinywasm-types={version="0.9.0-alpha.0", path="../types", default-features=false}
1717

1818
[features]
19-
default=["std", "log"]
19+
default=["std", "log", "parallel"]
2020
log=["dep:log"]
2121
std=["tinywasm-types/std", "wasmparser/std"]
22+
parallel=["std"]
23+
24+
[dev-dependencies]
25+
wat.workspace=true

crates/parser/build.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
use std::env;
2+
3+
fn main() {
4+
println!("cargo::rustc-check-cfg=cfg(parallel_parser)");
5+
6+
if env::var("CARGO_FEATURE_PARALLEL").is_err() {
7+
return;
8+
}
9+
10+
let arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default();
11+
let os = env::var("CARGO_CFG_TARGET_OS").unwrap_or_default();
12+
13+
if !arch.starts_with("wasm") && os != "unknown" && os != "none" {
14+
println!("cargo:rustc-cfg=parallel_parser");
15+
}
16+
}

crates/parser/src/conversion.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use alloc::sync::Arc;
22

3-
use crate::{Result, module::Code, visit::process_operators_and_validate};
3+
use crate::{Result, module::FunctionCode, visit::process_operators_and_validate};
44
use alloc::{boxed::Box, format, string::ToString, vec::Vec};
55
use tinywasm_types::*;
66
use wasmparser::{FuncValidator, FuncValidatorAllocations, OperatorsReader, ValidatorResources};
@@ -163,7 +163,7 @@ pub(crate) fn convert_module_export(export: wasmparser::Export<'_>) -> Result<Ex
163163
pub(crate) fn convert_module_code(
164164
func: wasmparser::FunctionBody<'_>,
165165
mut validator: FuncValidator<ValidatorResources>,
166-
) -> Result<(Code, FuncValidatorAllocations)> {
166+
) -> Result<(FunctionCode, FuncValidatorAllocations)> {
167167
let locals_reader = func.get_locals_reader()?;
168168
let count = locals_reader.get_count();
169169
let pos = locals_reader.original_position();
@@ -200,7 +200,7 @@ pub(crate) fn convert_module_code(
200200
}
201201

202202
let (body, data, allocations) = process_operators_and_validate(validator, func, local_addr_map)?;
203-
Ok(((body, data, local_counts), allocations))
203+
Ok((FunctionCode { instructions: body, data, locals: local_counts, uses_local_memory: false }, allocations))
204204
}
205205

206206
pub(crate) fn convert_module_type(ty: wasmparser::RecGroup) -> Result<Arc<FuncType>> {

crates/parser/src/lib.rs

Lines changed: 109 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@ mod macros;
3535
mod module;
3636
mod optimize;
3737
mod visit;
38+
39+
#[cfg(parallel_parser)]
40+
mod parallel;
41+
3842
pub use error::*;
3943
use module::ModuleReader;
4044
use wasmparser::{Validator, WasmFeatures};
@@ -51,11 +55,27 @@ pub struct ParserOptions {
5155
pub optimize_rewrite: bool,
5256
/// Whether to remove `Nop` and `MergeBarrier` instructions after rewriting.
5357
pub optimize_remove_nop: bool,
58+
59+
#[cfg(parallel_parser)]
60+
/// Number of threads to use for parallel parsing.
61+
///
62+
/// Requires the `parallel` feature. Ignored when the feature is disabled.
63+
///
64+
/// - `None`: auto-detect based on available parallelism
65+
/// - `Some(1)`: force single-threaded
66+
/// - `Some(n)`: use up to `n` workers
67+
pub parser_threads: Option<usize>,
5468
}
5569

5670
impl Default for ParserOptions {
5771
fn default() -> Self {
58-
Self { optimize_local_memory_allocation: true, optimize_rewrite: true, optimize_remove_nop: true }
72+
Self {
73+
optimize_local_memory_allocation: true,
74+
optimize_rewrite: true,
75+
optimize_remove_nop: true,
76+
#[cfg(parallel_parser)]
77+
parser_threads: None,
78+
}
5979
}
6080
}
6181

@@ -92,6 +112,21 @@ impl ParserOptions {
92112
pub const fn optimize_remove_nop(&self) -> bool {
93113
self.optimize_remove_nop
94114
}
115+
116+
#[cfg(parallel_parser)]
117+
/// Set the number of threads for parallel parsing.
118+
///
119+
/// Requires the `parallel` feature to have any effect.
120+
pub const fn with_parser_threads(mut self, threads: usize) -> Self {
121+
self.parser_threads = Some(threads);
122+
self
123+
}
124+
125+
#[cfg(parallel_parser)]
126+
/// Returns the configured parser thread count, or `None` for auto-detect.
127+
pub const fn parser_threads(&self) -> Option<usize> {
128+
self.parser_threads
129+
}
95130
}
96131

97132
/// A WebAssembly parser
@@ -139,20 +174,40 @@ impl Parser {
139174
Validator::new_with_features(features)
140175
}
141176

177+
#[cfg(feature = "std")]
178+
fn read_more(stream: &mut impl std::io::Read, buffer: &mut alloc::vec::Vec<u8>, hint: usize) -> Result<usize> {
179+
let len = buffer.len();
180+
buffer.extend((0..hint).map(|_| 0u8));
181+
let read_bytes = stream
182+
.read(&mut buffer[len..])
183+
.map_err(|e| ParseError::Other(alloc::format!("Error reading from stream: {e}")))?;
184+
buffer.truncate(len + read_bytes);
185+
Ok(read_bytes)
186+
}
187+
142188
/// Parse a [`Module`] from bytes
143189
pub fn parse_module_bytes(&self, wasm: impl AsRef<[u8]>) -> Result<Module> {
144190
let wasm = wasm.as_ref();
145191
let mut validator = Self::create_validator(self.options.clone());
146192
let mut reader = ModuleReader::default();
147193

148194
for payload in wasmparser::Parser::new(0).parse_all(wasm) {
149-
reader.process_payload(payload?, &mut validator)?;
195+
match payload? {
196+
wasmparser::Payload::CodeSectionStart { count, range, size } => {
197+
reader.begin_code_section(count, range, size, &mut validator, &self.options)?;
198+
}
199+
wasmparser::Payload::CodeSectionEntry(function) => {
200+
reader.process_borrowed_code_section_entry(function, &mut validator, &self.options)?;
201+
}
202+
payload => reader.process_payload(payload, &mut validator)?,
203+
}
150204
}
151205

152206
if !reader.end_reached {
153207
return Err(ParseError::EndNotReached);
154208
}
155209

210+
reader.process_pending_functions(&self.options)?;
156211
reader.into_module(&self.options)
157212
}
158213

@@ -176,18 +231,60 @@ impl Parser {
176231
loop {
177232
match parser.parse(&buffer, eof)? {
178233
wasmparser::Chunk::NeedMoreData(hint) => {
179-
let len = buffer.len();
180-
buffer.extend((0..hint).map(|_| 0u8));
181-
let read_bytes = stream
182-
.read(&mut buffer[len..])
183-
.map_err(|e| ParseError::Other(alloc::format!("Error reading from stream: {e}")))?;
184-
buffer.truncate(len + read_bytes);
234+
let read_bytes = Self::read_more(&mut stream, &mut buffer, hint as usize)?;
185235
eof = read_bytes == 0;
186236
}
187237
wasmparser::Chunk::Parsed { consumed, payload } => {
188-
reader.process_payload(payload, &mut validator)?;
189-
buffer.drain(..consumed);
238+
#[cfg(parallel_parser)]
239+
let mut deferred_code_section = None;
240+
241+
match payload {
242+
wasmparser::Payload::CodeSectionStart { count, range, size } => {
243+
let defer =
244+
reader.begin_code_section(count, range.clone(), size, &mut validator, &self.options)?;
245+
246+
#[cfg(parallel_parser)]
247+
if defer {
248+
deferred_code_section = Some((count, range.end - size as usize, size as usize));
249+
}
250+
251+
#[cfg(not(parallel_parser))]
252+
let _ = defer;
253+
254+
buffer.drain(..consumed);
255+
}
256+
wasmparser::Payload::CodeSectionEntry(function) => {
257+
reader.process_inline_code_section_entry(function, &mut validator, &self.options)?;
258+
buffer.drain(..consumed);
259+
}
260+
payload => {
261+
reader.process_payload(payload, &mut validator)?;
262+
buffer.drain(..consumed);
263+
}
264+
}
265+
266+
#[cfg(parallel_parser)]
267+
if let Some((count, body_offset, section_size)) = deferred_code_section {
268+
while buffer.len() < section_size {
269+
let remaining = section_size - buffer.len();
270+
let read_bytes = Self::read_more(&mut stream, &mut buffer, remaining)?;
271+
if read_bytes == 0 {
272+
return Err(ParseError::ParseError {
273+
message: "unexpected end-of-file".into(),
274+
offset: body_offset + buffer.len(),
275+
});
276+
}
277+
}
278+
279+
let section_bytes = alloc::sync::Arc::<[u8]>::from(buffer[..section_size].to_vec());
280+
reader.queue_owned_code_section(count, body_offset, section_bytes, &mut validator)?;
281+
parser.skip_section();
282+
buffer.drain(..section_size);
283+
continue;
284+
}
285+
190286
if eof || reader.end_reached {
287+
reader.process_pending_functions(&self.options)?;
191288
return reader.into_module(&self.options);
192289
}
193290
}
@@ -196,10 +293,10 @@ impl Parser {
196293
}
197294
}
198295

199-
impl TryFrom<ModuleReader> for Module {
296+
impl TryFrom<ModuleReader<'_>> for Module {
200297
type Error = ParseError;
201298

202-
fn try_from(reader: ModuleReader) -> Result<Self> {
299+
fn try_from(reader: ModuleReader<'_>) -> Result<Self> {
203300
reader.into_module(&ParserOptions::default())
204301
}
205302
}

0 commit comments

Comments
 (0)