Skip to content

Commit 92410ee

Browse files
use peek for stack access, reorganization and simplifications
1 parent 2f3238c commit 92410ee

21 files changed

Lines changed: 1915 additions & 1331 deletions

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ Based on the [original paper by Ben L. Titzer](https://www.cs.tufts.edu/comp/150
1313
- `wasm_memory.rs` - Linear memory management
1414
- `signature.rs` - Function signature handling
1515
- `leb128.rs` - LEB128 encoding/decoding utilities
16-
- `byte_iter.rs` - Byte stream iteration helpers
1716
- `error.rs` - Error types and handling
17+
- `opcodes.rs` - Opcode constants
1818
- `lib.rs` - Library entry point
1919
- `/bin` - Example usage demonstrations
2020
- `/tests` - Tests

bench.md

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,7 @@ This file documents the coremark bench results to keep track of performance impr
99
- repr(C) for the SideTableEntry struct caused mysterious improvements, not sure if it is a fluke
1010
- cc02503: avg = 855.71814, n = 20 (remove defensive malformed check in main loop)
1111
- since the module is already validated at run time, there is no reason for the check to exist, it was a remnant of early development phase that lacked proper handling for some malformed modules
12-
- current: no significant difference
13-
14-
On nightly, the performance is slightly better (sometimes reaching 900)
12+
- current: avg = 948.285031, n = 20 (use peek for stack access, reorganization and simplifications)
1513

1614
Next step: use direct threading to improve branch prediction
1715

@@ -28,6 +26,6 @@ wasmi: ~1700
2826
tinywasm: ~630
2927

3028
Goal:
31-
We expect/hope to reach ~1200 after threaded dispatch implementation. It seems like Ben Titzer only reached performance comparable to production-ready, optimizing interpreters through manually crafted assembly code for hot paths.
29+
I expect/hope to reach ~1200 after threaded dispatch implementation. It seems like Ben Titzer only reached performance comparable to production-ready, optimizing interpreters through manually crafted assembly code for hot paths.
3230

33-
Higher performance may not be pursued after the point and instead I might focus on adding more instructions to achieve Wasm 2.0 spec parity (should be easy with AI).
31+
Higher performance may not be pursued after the point and instead I might focus on adding more instructions to achieve Wasm 2.0 spec parity.

benches/coremark.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,8 @@ use criterion::{criterion_group, criterion_main, Criterion, Throughput};
77
use wagmi::{ExportValue, Imports, Instance, Module, RuntimeFunction, ValType, WasmValue};
88

99
fn clock_ms_i64() -> i64 {
10-
SystemTime::now()
11-
.duration_since(UNIX_EPOCH)
12-
.expect("Clock may have gone backwards")
13-
.as_millis() as i64
10+
SystemTime::now().duration_since(UNIX_EPOCH).expect("Clock may have gone backwards").as_millis()
11+
as i64
1412
}
1513

1614
fn setup_instance() -> (Instance, wagmi::RuntimeFunction) {
@@ -41,12 +39,16 @@ fn bench_coremark(c: &mut Criterion) {
4139
let results_once = instance.invoke(&run_fn, &[]).expect("invoke run once");
4240
let elapsed_once = t0.elapsed();
4341
let score_once = results_once[0].as_f32();
44-
println!("coremark single-run: elapsed={:.6}s score={}", elapsed_once.as_secs_f64(), score_once);
42+
println!(
43+
"coremark single-run: elapsed={:.6}s score={}",
44+
elapsed_once.as_secs_f64(),
45+
score_once
46+
);
4547

4648
let mut group = c.benchmark_group("coremark_minimal");
4749
let mut scores: Vec<f32> = Vec::new();
4850
group.sample_size(10);
49-
group.measurement_time(Duration::from_secs(600));
51+
group.measurement_time(Duration::from_secs(150));
5052
group.warm_up_time(Duration::from_secs(1));
5153
group.throughput(Throughput::Elements(1));
5254
group.bench_function("run", |b| {
@@ -73,5 +75,3 @@ fn bench_coremark(c: &mut Criterion) {
7375

7476
criterion_group!(benches, bench_coremark);
7577
criterion_main!(benches);
76-
77-

rustfmt.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
edition = "2021"
2+
use_small_heuristics = "Max"
3+
struct_variant_width = 40

src/bin/utils/mod.rs

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,14 @@
1-
use std::process::Command;
2-
use std::path::{Path, PathBuf};
3-
use std::fs;
41
use std::env;
2+
use std::fs;
3+
use std::path::{Path, PathBuf};
4+
use std::process::Command;
55

66
/// Compiles a WAT file to WASM using wat2wasm
77
pub fn compile_wat(wat_path: &Path) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
88
let temp_dir = env::temp_dir();
9-
let stem = wat_path.file_stem()
10-
.ok_or("Invalid WAT file path")?
11-
.to_string_lossy();
9+
let stem = wat_path.file_stem().ok_or("Invalid WAT file path")?.to_string_lossy();
1210
let wasm_path = temp_dir.join(format!("{}.wasm", stem));
13-
11+
1412
// Determine the correct wat2wasm binary based on OS
1513
let wat2wasm = if cfg!(target_os = "macos") {
1614
"tools/osx/wat2wasm"
@@ -19,25 +17,26 @@ pub fn compile_wat(wat_path: &Path) -> Result<Vec<u8>, Box<dyn std::error::Error
1917
} else {
2018
return Err("Unsupported OS for wat2wasm".into());
2119
};
22-
20+
2321
// Run wat2wasm
2422
let output = Command::new(wat2wasm)
2523
.arg(wat_path)
2624
.arg("-o")
2725
.arg(&wasm_path)
2826
.output()
2927
.map_err(|e| format!("Failed to run wat2wasm: {}", e))?;
30-
28+
3129
if !output.status.success() {
3230
return Err(format!(
3331
"wat2wasm compilation failed: {}",
3432
String::from_utf8_lossy(&output.stderr)
35-
).into());
33+
)
34+
.into());
3635
}
37-
36+
3837
let wasm_bytes = fs::read(&wasm_path)?;
3938
let _ = fs::remove_file(&wasm_path);
40-
39+
4140
Ok(wasm_bytes)
4241
}
4342

@@ -48,4 +47,4 @@ pub fn load_resource_module(name: &str) -> Result<Vec<u8>, Box<dyn std::error::E
4847
.join("src/bin/resources")
4948
.join(format!("{}.wat", name));
5049
compile_wat(&path)
51-
}
50+
}

src/bin/wagmi_example_basic.rs

Lines changed: 44 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use std::rc::Rc;
2-
use wagmi::{Module, Instance, Imports, WasmValue};
2+
use wagmi::{Imports, Instance, Module, WasmValue};
33

44
mod utils;
55
use utils::load_resource_module;
@@ -10,103 +10,109 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
1010
let arithmetic_module = Rc::new(arithmetic_module);
1111
let imports = Imports::new();
1212
let arithmetic_instance = Instance::instantiate(arithmetic_module, &imports)?;
13-
14-
13+
1514
if let Some(wagmi::ExportValue::Function(add)) = arithmetic_instance.exports.get("add") {
16-
let result = arithmetic_instance.invoke(add, &[WasmValue::from_i32(10), WasmValue::from_i32(32)])?;
15+
let result =
16+
arithmetic_instance.invoke(add, &[WasmValue::from_i32(10), WasmValue::from_i32(32)])?;
1717
println!("add(10, 32) = {}", result[0].as_i32());
1818
}
19-
19+
2020
if let Some(wagmi::ExportValue::Function(sub)) = arithmetic_instance.exports.get("subtract") {
21-
let result = arithmetic_instance.invoke(sub, &[WasmValue::from_i32(100), WasmValue::from_i32(58)])?;
21+
let result = arithmetic_instance
22+
.invoke(sub, &[WasmValue::from_i32(100), WasmValue::from_i32(58)])?;
2223
println!("subtract(100, 58) = {}", result[0].as_i32());
2324
}
24-
25+
2526
if let Some(wagmi::ExportValue::Function(mul)) = arithmetic_instance.exports.get("multiply") {
26-
let result = arithmetic_instance.invoke(mul, &[WasmValue::from_i32(6), WasmValue::from_i32(7)])?;
27+
let result =
28+
arithmetic_instance.invoke(mul, &[WasmValue::from_i32(6), WasmValue::from_i32(7)])?;
2729
println!("multiply(6, 7) = {}", result[0].as_i32());
2830
}
29-
31+
3032
if let Some(wagmi::ExportValue::Function(div)) = arithmetic_instance.exports.get("divide") {
31-
let result = arithmetic_instance.invoke(div, &[WasmValue::from_i32(84), WasmValue::from_i32(2)])?;
33+
let result =
34+
arithmetic_instance.invoke(div, &[WasmValue::from_i32(84), WasmValue::from_i32(2)])?;
3235
println!("divide(84, 2) = {}", result[0].as_i32());
3336
}
34-
37+
3538
if let Some(wagmi::ExportValue::Function(modulo)) = arithmetic_instance.exports.get("modulo") {
36-
let result = arithmetic_instance.invoke(modulo, &[WasmValue::from_i32(10), WasmValue::from_i32(3)])?;
39+
let result = arithmetic_instance
40+
.invoke(modulo, &[WasmValue::from_i32(10), WasmValue::from_i32(3)])?;
3741
println!("modulo(10, 3) = {}", result[0].as_i32());
3842
}
39-
40-
43+
4144
let factorial_bytes = load_resource_module("factorial")?;
4245
let factorial_module = Module::compile(factorial_bytes)?;
4346
let factorial_module = Rc::new(factorial_module);
4447
let factorial_instance = Instance::instantiate(factorial_module, &imports)?;
45-
46-
if let Some(wagmi::ExportValue::Function(factorial)) = factorial_instance.exports.get("factorial") {
48+
49+
if let Some(wagmi::ExportValue::Function(factorial)) =
50+
factorial_instance.exports.get("factorial")
51+
{
4752
for n in [0, 1, 5, 10] {
4853
let result = factorial_instance.invoke(factorial, &[WasmValue::from_i32(n)])?;
4954
println!("factorial({}) = {}", n, result[0].as_i32());
5055
}
5156
}
52-
53-
57+
5458
let control_bytes = load_resource_module("control_flow")?;
5559
let control_module = Module::compile(control_bytes)?;
5660
let control_module = Rc::new(control_module);
5761
let control_instance = Instance::instantiate(control_module, &imports)?;
58-
59-
62+
6063
if let Some(wagmi::ExportValue::Function(fib)) = control_instance.exports.get("fibonacci") {
6164
for n in [0, 1, 2, 5, 10] {
6265
let result = control_instance.invoke(fib, &[WasmValue::from_i32(n)])?;
6366
println!("fibonacci({}) = {}", n, result[0].as_i32());
6467
}
6568
}
66-
69+
6770
if let Some(wagmi::ExportValue::Function(max)) = control_instance.exports.get("max") {
68-
let result = control_instance.invoke(max, &[WasmValue::from_i32(42), WasmValue::from_i32(17)])?;
71+
let result =
72+
control_instance.invoke(max, &[WasmValue::from_i32(42), WasmValue::from_i32(17)])?;
6973
println!("max(42, 17) = {}", result[0].as_i32());
7074
}
71-
75+
7276
if let Some(wagmi::ExportValue::Function(min)) = control_instance.exports.get("min") {
73-
let result = control_instance.invoke(min, &[WasmValue::from_i32(42), WasmValue::from_i32(17)])?;
77+
let result =
78+
control_instance.invoke(min, &[WasmValue::from_i32(42), WasmValue::from_i32(17)])?;
7479
println!("min(42, 17) = {}", result[0].as_i32());
7580
}
76-
81+
7782
if let Some(wagmi::ExportValue::Function(abs)) = control_instance.exports.get("abs") {
7883
let result = control_instance.invoke(abs, &[WasmValue::from_i32(-42)])?;
7984
println!("abs(-42) = {}", result[0].as_i32());
8085
}
81-
86+
8287
if let Some(wagmi::ExportValue::Function(sign)) = control_instance.exports.get("sign") {
8388
for n in [-42, 0, 42] {
8489
let result = control_instance.invoke(sign, &[WasmValue::from_i32(n)])?;
8590
println!("sign({}) = {}", n, result[0].as_i32());
8691
}
8792
}
88-
89-
93+
9094
let memory_bytes = load_resource_module("memory_ops")?;
9195
let memory_module = Module::compile(memory_bytes)?;
9296
let memory_module = Rc::new(memory_module);
9397
let memory_instance = Instance::instantiate(memory_module, &imports)?;
94-
95-
96-
if let (Some(wagmi::ExportValue::Function(store)), Some(wagmi::ExportValue::Function(load))) =
97-
(memory_instance.exports.get("store_i32"), memory_instance.exports.get("load_i32")) {
98-
98+
99+
if let (Some(wagmi::ExportValue::Function(store)), Some(wagmi::ExportValue::Function(load))) =
100+
(memory_instance.exports.get("store_i32"), memory_instance.exports.get("load_i32"))
101+
{
99102
memory_instance.invoke(store, &[WasmValue::from_i32(0), WasmValue::from_i32(42)])?;
100103
println!("Stored 42 at offset 0");
101-
104+
102105
let result = memory_instance.invoke(load, &[WasmValue::from_i32(0)])?;
103106
println!("Loaded from offset 0: {}", result[0].as_i32());
104107
}
105-
108+
106109
if let Some(wagmi::ExportValue::Function(memset)) = memory_instance.exports.get("memset") {
107-
memory_instance.invoke(memset, &[WasmValue::from_i32(100), WasmValue::from_i32(0xFF), WasmValue::from_i32(10)])?;
110+
memory_instance.invoke(
111+
memset,
112+
&[WasmValue::from_i32(100), WasmValue::from_i32(0xFF), WasmValue::from_i32(10)],
113+
)?;
108114
println!("Filled 10 bytes at offset 100 with 0xFF");
109115
}
110-
116+
111117
Ok(())
112-
}
118+
}

0 commit comments

Comments
 (0)