Skip to content

Commit 1f741ba

Browse files
feat: add relaxed simd
Signed-off-by: Henry <mail@henrygressmann.de>
1 parent a15acab commit 1f741ba

File tree

12 files changed

+234
-43
lines changed

12 files changed

+234
-43
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ Untrusted WebAssembly code should not be able to crash the runtime or access mem
5454
| [**Fixed-Width SIMD**](https://github.com/WebAssembly/simd/blob/main/proposals/simd/Overview.md) | 🟢 | `next` |
5555
| [**Memory64**](https://github.com/WebAssembly/memory64/blob/master/proposals/memory64/Overview.md) | 🟢 | `next` |
5656
| [**Tail Call**](https://github.com/WebAssembly/tail-call/blob/main/proposals/tail-call/Overview.md) | 🟢 | `next` |
57-
| [**Relaxed SIMD**](https://github.com/WebAssembly/relaxed-simd/blob/main/proposals/relaxed-simd/Overview.md) | 🚧 | - |
57+
| [**Relaxed SIMD**](https://github.com/WebAssembly/relaxed-simd/blob/main/proposals/relaxed-simd/Overview.md) | 🟢 | `next` |
5858
| [**Wide Arithmetic**](https://github.com/WebAssembly/wide-arithmetic/blob/main/proposals/wide-arithmetic/Overview.md) | 🚧 | - |
5959
| [**Custom Descriptors**](https://github.com/WebAssembly/custom-descriptors/blob/main/proposals/custom-descriptors/Overview.md) | 🌑 | - |
6060
| [**Exception Handling**](https://github.com/WebAssembly/exception-handling/blob/main/proposals/exception-handling/Exceptions.md) | 🌑 | - |

crates/parser/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ impl Parser {
7979
exceptions: false,
8080
gc: false,
8181
memory_control: false,
82-
relaxed_simd: false,
82+
relaxed_simd: true,
8383
threads: false,
8484
shared_everything_threads: false,
8585
legacy_exceptions: false,

crates/parser/src/visit.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,7 @@ macro_rules! impl_visit_operator {
363363
(@@saturating_float_to_int $($rest:tt)* ) => {};
364364
(@@bulk_memory $($rest:tt)* ) => {};
365365
(@@simd $($rest:tt)* ) => {};
366+
(@@relaxed_simd $($rest:tt)* ) => {};
366367
(@@tail_call $($rest:tt)* ) => {};
367368

368369
(@@$proposal:ident $op:ident $({ $($arg:ident: $argty:ty),* })? => $visit:ident ($($ann:tt)*)) => {
@@ -830,6 +831,7 @@ macro_rules! impl_visit_simd_operator {
830831
};
831832

832833
(@@simd $($rest:tt)* ) => {};
834+
(@@relaxed_simd $($rest:tt)* ) => {};
833835
(@@$proposal:ident $op:ident $({ $($arg:ident: $argty:ty),* })? => $visit:ident ($($ann:tt)*)) => {
834836
fn $visit(&mut self $($(,$arg: $argty)*)?) {
835837
self.unsupported(stringify!($visit))
@@ -879,6 +881,19 @@ impl<R: WasmModuleResources> wasmparser::VisitSimdOperator<'_> for FunctionBuild
879881
visit_f64x2_convert_low_i32x4_s(F64x2ConvertLowI32x4S), visit_f64x2_convert_low_i32x4_u(F64x2ConvertLowI32x4U),
880882
visit_f32x4_demote_f64x2_zero(F32x4DemoteF64x2Zero), visit_f64x2_promote_low_f32x4(F64x2PromoteLowF32x4),
881883

884+
visit_i8x16_relaxed_swizzle(I8x16RelaxedSwizzle),
885+
visit_i32x4_relaxed_trunc_f32x4_s(I32x4RelaxedTruncF32x4S), visit_i32x4_relaxed_trunc_f32x4_u(I32x4RelaxedTruncF32x4U),
886+
visit_i32x4_relaxed_trunc_f64x2_s_zero(I32x4RelaxedTruncF64x2SZero), visit_i32x4_relaxed_trunc_f64x2_u_zero(I32x4RelaxedTruncF64x2UZero),
887+
visit_f32x4_relaxed_madd(F32x4RelaxedMadd), visit_f32x4_relaxed_nmadd(F32x4RelaxedNmadd),
888+
visit_f64x2_relaxed_madd(F64x2RelaxedMadd), visit_f64x2_relaxed_nmadd(F64x2RelaxedNmadd),
889+
visit_i8x16_relaxed_laneselect(I8x16RelaxedLaneselect), visit_i16x8_relaxed_laneselect(I16x8RelaxedLaneselect),
890+
visit_i32x4_relaxed_laneselect(I32x4RelaxedLaneselect), visit_i64x2_relaxed_laneselect(I64x2RelaxedLaneselect),
891+
visit_f32x4_relaxed_min(F32x4RelaxedMin), visit_f32x4_relaxed_max(F32x4RelaxedMax),
892+
visit_f64x2_relaxed_min(F64x2RelaxedMin), visit_f64x2_relaxed_max(F64x2RelaxedMax),
893+
visit_i16x8_relaxed_q15mulr_s(I16x8RelaxedQ15mulrS),
894+
visit_i16x8_relaxed_dot_i8x16_i7x16_s(I16x8RelaxedDotI8x16I7x16S),
895+
visit_i32x4_relaxed_dot_i8x16_i7x16_add_s(I32x4RelaxedDotI8x16I7x16AddS),
896+
882897
visit_i8x16_extract_lane_s(I8x16ExtractLaneS, u8), visit_i8x16_extract_lane_u(I8x16ExtractLaneU, u8), visit_i8x16_replace_lane(I8x16ReplaceLane, u8),
883898
visit_i16x8_extract_lane_s(I16x8ExtractLaneS, u8), visit_i16x8_extract_lane_u(I16x8ExtractLaneU, u8), visit_i16x8_replace_lane(I16x8ReplaceLane, u8),
884899
visit_i32x4_extract_lane(I32x4ExtractLane, u8), visit_i32x4_replace_lane(I32x4ReplaceLane, u8),

crates/tinywasm/src/interpreter/executor.rs

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -152,12 +152,8 @@ impl<'store, const BUDGETED: bool> Executor<'store, BUDGETED> {
152152
LocalCopy64(from, to) => self.store.stack.values.local_set(&self.cf, *to, self.store.stack.values.local_get::<Value64>(&self.cf, *from)),
153153
LocalCopy128(from, to) => self.store.stack.values.local_set(&self.cf, *to, self.store.stack.values.local_get::<Value128>(&self.cf, *from)),
154154
LocalCopyRef(from, to) => self.store.stack.values.local_set(&self.cf, *to, self.store.stack.values.local_get::<ValueRef>(&self.cf, *from)),
155-
I32AddLocals(a, b) => self.store.stack.values.push(
156-
self.store.stack.values.local_get::<i32>(&self.cf, *a).wrapping_add(self.store.stack.values.local_get::<i32>(&self.cf, *b)),
157-
)?,
158-
I64AddLocals(a, b) => self.store.stack.values.push(
159-
self.store.stack.values.local_get::<i64>(&self.cf, *a).wrapping_add(self.store.stack.values.local_get::<i64>(&self.cf, *b)),
160-
)?,
155+
I32AddLocals(a, b) => self.store.stack.values.push(self.store.stack.values.local_get::<i32>(&self.cf, *a).wrapping_add(self.store.stack.values.local_get::<i32>(&self.cf, *b)))?,
156+
I64AddLocals(a, b) => self.store.stack.values.push(self.store.stack.values.local_get::<i64>(&self.cf, *a).wrapping_add(self.store.stack.values.local_get::<i64>(&self.cf, *b)))?,
161157
I32AddConst(c) => stack_op!(unary i32, |v| v.wrapping_add(*c)),
162158
I64AddConst(c) => stack_op!(unary i64, |v| v.wrapping_add(*c)),
163159
I32StoreLocalLocal(m, addr_local, value_local) => {
@@ -394,6 +390,7 @@ impl<'store, const BUDGETED: bool> Executor<'store, BUDGETED> {
394390
V128Bitselect => stack_op!(ternary Value128, |v1, v2, c| Value128::v128_bitselect(v1, v2, c)),
395391
V128AnyTrue => stack_op!(unary Value128 => i32, |v| v.v128_any_true() as i32),
396392
I8x16Swizzle => stack_op!(binary Value128, |a, s| a.i8x16_swizzle(s)),
393+
I8x16RelaxedSwizzle => stack_op!(binary Value128, |a, s| a.i8x16_relaxed_swizzle(s)),
397394
V128Load(arg) => self.exec_mem_load::<Value128, 16, _>(arg.mem_addr(), arg.offset(), |v| v)?,
398395
V128Load8x8S(arg) => self.exec_mem_load::<u64, 8, Value128>(arg.mem_addr(), arg.offset(), |v| Value128::v128_load8x8_s(v.to_le_bytes()))?,
399396
V128Load8x8U(arg) => self.exec_mem_load::<u64, 8, Value128>(arg.mem_addr(), arg.offset(), |v| Value128::v128_load8x8_u(v.to_le_bytes()))?,
@@ -582,6 +579,13 @@ impl<'store, const BUDGETED: bool> Executor<'store, BUDGETED> {
582579
I8x16Shuffle(idx) => { let idx = self.func.data.v128_constants[*idx as usize].to_le_bytes(); stack_op!(binary Value128, |a, b| Value128::i8x16_shuffle(a, b, idx)) }
583580
I16x8Q15MulrSatS => stack_op!(binary Value128, |a, b| a.i16x8_q15mulr_sat_s(b)),
584581
I32x4DotI16x8S => stack_op!(binary Value128, |a, b| a.i32x4_dot_i16x8_s(b)),
582+
I8x16RelaxedLaneselect => stack_op!(ternary Value128, |v1, v2, c| Value128::i8x16_relaxed_laneselect(v1, v2, c)),
583+
I16x8RelaxedLaneselect => stack_op!(ternary Value128, |v1, v2, c| Value128::i16x8_relaxed_laneselect(v1, v2, c)),
584+
I32x4RelaxedLaneselect => stack_op!(ternary Value128, |v1, v2, c| Value128::i32x4_relaxed_laneselect(v1, v2, c)),
585+
I64x2RelaxedLaneselect => stack_op!(ternary Value128, |v1, v2, c| Value128::i64x2_relaxed_laneselect(v1, v2, c)),
586+
I16x8RelaxedQ15mulrS => stack_op!(binary Value128, |a, b| a.i16x8_relaxed_q15mulr_s(b)),
587+
I16x8RelaxedDotI8x16I7x16S => stack_op!(binary Value128, |a, b| a.i16x8_relaxed_dot_i8x16_i7x16_s(b)),
588+
I32x4RelaxedDotI8x16I7x16AddS => stack_op!(ternary Value128, |a, b, c| a.i32x4_relaxed_dot_i8x16_i7x16_add_s(b, c)),
585589
F32x4Ceil => stack_op!(simd_unary f32x4_ceil),
586590
F64x2Ceil => stack_op!(simd_unary f64x2_ceil),
587591
F32x4Floor => stack_op!(simd_unary f32x4_floor),
@@ -612,6 +616,14 @@ impl<'store, const BUDGETED: bool> Executor<'store, BUDGETED> {
612616
F32x4PMax => stack_op!(simd_binary f32x4_pmax),
613617
F64x2PMin => stack_op!(simd_binary f64x2_pmin),
614618
F64x2PMax => stack_op!(simd_binary f64x2_pmax),
619+
F32x4RelaxedMadd => stack_op!(ternary Value128, |a, b, c| a.f32x4_relaxed_madd(b, c)),
620+
F32x4RelaxedNmadd => stack_op!(ternary Value128, |a, b, c| a.f32x4_relaxed_nmadd(b, c)),
621+
F64x2RelaxedMadd => stack_op!(ternary Value128, |a, b, c| a.f64x2_relaxed_madd(b, c)),
622+
F64x2RelaxedNmadd => stack_op!(ternary Value128, |a, b, c| a.f64x2_relaxed_nmadd(b, c)),
623+
F32x4RelaxedMin => stack_op!(binary Value128, |a, b| a.f32x4_relaxed_min(b)),
624+
F32x4RelaxedMax => stack_op!(binary Value128, |a, b| a.f32x4_relaxed_max(b)),
625+
F64x2RelaxedMin => stack_op!(binary Value128, |a, b| a.f64x2_relaxed_min(b)),
626+
F64x2RelaxedMax => stack_op!(binary Value128, |a, b| a.f64x2_relaxed_max(b)),
615627
I32x4TruncSatF32x4S => stack_op!(unary Value128, |v| v.i32x4_trunc_sat_f32x4_s()),
616628
I32x4TruncSatF32x4U => stack_op!(unary Value128, |v| v.i32x4_trunc_sat_f32x4_u()),
617629
F32x4ConvertI32x4S => stack_op!(unary Value128, |v| v.f32x4_convert_i32x4_s()),
@@ -622,6 +634,10 @@ impl<'store, const BUDGETED: bool> Executor<'store, BUDGETED> {
622634
F64x2PromoteLowF32x4 => stack_op!(unary Value128, |v| v.f64x2_promote_low_f32x4()),
623635
I32x4TruncSatF64x2SZero => stack_op!(unary Value128, |v| v.i32x4_trunc_sat_f64x2_s_zero()),
624636
I32x4TruncSatF64x2UZero => stack_op!(unary Value128, |v| v.i32x4_trunc_sat_f64x2_u_zero()),
637+
I32x4RelaxedTruncF32x4S => stack_op!(unary Value128, |v| v.i32x4_relaxed_trunc_f32x4_s()),
638+
I32x4RelaxedTruncF32x4U => stack_op!(unary Value128, |v| v.i32x4_relaxed_trunc_f32x4_u()),
639+
I32x4RelaxedTruncF64x2SZero => stack_op!(unary Value128, |v| v.i32x4_relaxed_trunc_f64x2_s_zero()),
640+
I32x4RelaxedTruncF64x2UZero => stack_op!(unary Value128, |v| v.i32x4_relaxed_trunc_f64x2_u_zero()),
625641
};
626642

627643
self.cf.incr_instr_ptr();

crates/tinywasm/src/interpreter/value128.rs

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -609,6 +609,11 @@ impl Value128 {
609609
Self::from_le_bytes(out)
610610
}
611611

612+
#[doc(alias = "i8x16.relaxed_swizzle")]
613+
pub fn i8x16_relaxed_swizzle(self, s: Self) -> Self {
614+
self.i8x16_swizzle(s)
615+
}
616+
612617
#[doc(alias = "i8x16.shuffle")]
613618
pub fn i8x16_shuffle(a: Self, b: Self, idx: [u8; 16]) -> Self {
614619
let mut src = [0u8; 32];
@@ -938,6 +943,65 @@ impl Value128 {
938943
Self::from_i32x4(out)
939944
}
940945

946+
#[doc(alias = "i8x16.relaxed_laneselect")]
947+
pub fn i8x16_relaxed_laneselect(v1: Self, v2: Self, c: Self) -> Self {
948+
Self::v128_bitselect(v1, v2, c)
949+
}
950+
951+
#[doc(alias = "i16x8.relaxed_laneselect")]
952+
pub fn i16x8_relaxed_laneselect(v1: Self, v2: Self, c: Self) -> Self {
953+
Self::v128_bitselect(v1, v2, c)
954+
}
955+
956+
#[doc(alias = "i32x4.relaxed_laneselect")]
957+
pub fn i32x4_relaxed_laneselect(v1: Self, v2: Self, c: Self) -> Self {
958+
Self::v128_bitselect(v1, v2, c)
959+
}
960+
961+
#[doc(alias = "i64x2.relaxed_laneselect")]
962+
pub fn i64x2_relaxed_laneselect(v1: Self, v2: Self, c: Self) -> Self {
963+
Self::v128_bitselect(v1, v2, c)
964+
}
965+
966+
#[doc(alias = "i16x8.relaxed_q15mulr_s")]
967+
pub fn i16x8_relaxed_q15mulr_s(self, rhs: Self) -> Self {
968+
self.i16x8_q15mulr_sat_s(rhs)
969+
}
970+
971+
#[doc(alias = "i16x8.relaxed_dot_i8x16_i7x16_s")]
972+
pub fn i16x8_relaxed_dot_i8x16_i7x16_s(self, rhs: Self) -> Self {
973+
let a = self.as_i8x16();
974+
let b = rhs.as_i8x16();
975+
let mut out = [0i16; 8];
976+
977+
for (dst, (a_pair, b_pair)) in out.iter_mut().zip(a.chunks_exact(2).zip(b.chunks_exact(2))) {
978+
let prod0 = (a_pair[0] as i16) * (b_pair[0] as i16);
979+
let prod1 = (a_pair[1] as i16) * (b_pair[1] as i16);
980+
*dst = prod0.wrapping_add(prod1);
981+
}
982+
983+
Self::from_i16x8(out)
984+
}
985+
986+
#[doc(alias = "i32x4.relaxed_dot_i8x16_i7x16_add_s")]
987+
pub fn i32x4_relaxed_dot_i8x16_i7x16_add_s(self, rhs: Self, acc: Self) -> Self {
988+
let a = self.as_i8x16();
989+
let b = rhs.as_i8x16();
990+
let c = acc.as_i32x4();
991+
let mut out = [0i32; 4];
992+
993+
for (i, dst) in out.iter_mut().enumerate() {
994+
let base = i * 4;
995+
let mut sum = 0i32;
996+
for j in 0..4 {
997+
sum = sum.wrapping_add((a[base + j] as i32).wrapping_mul(b[base + j] as i32));
998+
}
999+
*dst = sum.wrapping_add(c[i]);
1000+
}
1001+
1002+
Self::from_i32x4(out)
1003+
}
1004+
9411005
simd_cmp_mask!(i8x16_eq, "i8x16.eq", i8x16_eq, i8, 16, as_i8x16, from_i8x16, ==);
9421006
simd_cmp_mask!(i16x8_eq, "i16x8.eq", i16x8_eq, i16, 8, as_i16x8, from_i16x8, ==);
9431007
simd_cmp_mask!(i32x4_eq, "i32x4.eq", i32x4_eq, i32, 4, as_i32x4, from_i32x4, ==);
@@ -1058,6 +1122,70 @@ impl Value128 {
10581122
simd_float_binary!(f32x4_pmax, "f32x4.pmax", zip_f32x4, |a, b| if b > a { b } else { a });
10591123
simd_float_binary!(f64x2_pmax, "f64x2.pmax", zip_f64x2, |a, b| if b > a { b } else { a });
10601124

1125+
#[doc(alias = "f32x4.relaxed_madd")]
1126+
pub fn f32x4_relaxed_madd(self, b: Self, c: Self) -> Self {
1127+
self.zip_f32x4(b, |x, y| canonicalize_simd_f32_nan(x * y))
1128+
.zip_f32x4(c, |xy, z| canonicalize_simd_f32_nan(xy + z))
1129+
}
1130+
1131+
#[doc(alias = "f32x4.relaxed_nmadd")]
1132+
pub fn f32x4_relaxed_nmadd(self, b: Self, c: Self) -> Self {
1133+
self.zip_f32x4(b, |x, y| canonicalize_simd_f32_nan(-(x * y)))
1134+
.zip_f32x4(c, |neg_xy, z| canonicalize_simd_f32_nan(neg_xy + z))
1135+
}
1136+
1137+
#[doc(alias = "f64x2.relaxed_madd")]
1138+
pub fn f64x2_relaxed_madd(self, b: Self, c: Self) -> Self {
1139+
self.zip_f64x2(b, |x, y| canonicalize_simd_f64_nan(x * y))
1140+
.zip_f64x2(c, |xy, z| canonicalize_simd_f64_nan(xy + z))
1141+
}
1142+
1143+
#[doc(alias = "f64x2.relaxed_nmadd")]
1144+
pub fn f64x2_relaxed_nmadd(self, b: Self, c: Self) -> Self {
1145+
self.zip_f64x2(b, |x, y| canonicalize_simd_f64_nan(-(x * y)))
1146+
.zip_f64x2(c, |neg_xy, z| canonicalize_simd_f64_nan(neg_xy + z))
1147+
}
1148+
1149+
#[doc(alias = "f32x4.relaxed_min")]
1150+
pub fn f32x4_relaxed_min(self, rhs: Self) -> Self {
1151+
self.f32x4_min(rhs)
1152+
}
1153+
1154+
#[doc(alias = "f64x2.relaxed_min")]
1155+
pub fn f64x2_relaxed_min(self, rhs: Self) -> Self {
1156+
self.f64x2_min(rhs)
1157+
}
1158+
1159+
#[doc(alias = "f32x4.relaxed_max")]
1160+
pub fn f32x4_relaxed_max(self, rhs: Self) -> Self {
1161+
self.f32x4_max(rhs)
1162+
}
1163+
1164+
#[doc(alias = "f64x2.relaxed_max")]
1165+
pub fn f64x2_relaxed_max(self, rhs: Self) -> Self {
1166+
self.f64x2_max(rhs)
1167+
}
1168+
1169+
#[doc(alias = "i32x4.relaxed_trunc_f32x4_s")]
1170+
pub fn i32x4_relaxed_trunc_f32x4_s(self) -> Self {
1171+
self.i32x4_trunc_sat_f32x4_s()
1172+
}
1173+
1174+
#[doc(alias = "i32x4.relaxed_trunc_f32x4_u")]
1175+
pub fn i32x4_relaxed_trunc_f32x4_u(self) -> Self {
1176+
self.i32x4_trunc_sat_f32x4_u()
1177+
}
1178+
1179+
#[doc(alias = "i32x4.relaxed_trunc_f64x2_s_zero")]
1180+
pub fn i32x4_relaxed_trunc_f64x2_s_zero(self) -> Self {
1181+
self.i32x4_trunc_sat_f64x2_s_zero()
1182+
}
1183+
1184+
#[doc(alias = "i32x4.relaxed_trunc_f64x2_u_zero")]
1185+
pub fn i32x4_relaxed_trunc_f64x2_u_zero(self) -> Self {
1186+
self.i32x4_trunc_sat_f64x2_u_zero()
1187+
}
1188+
10611189
#[doc(alias = "i32x4.trunc_sat_f32x4_s")]
10621190
pub fn i32x4_trunc_sat_f32x4_s(self) -> Self {
10631191
let v = self.as_f32x4();
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.9.0-alpha.0,0,93,[{"name":"i16x8_relaxed_q15mulr_s.wast","passed":0,"failed":3},{"name":"i32x4_relaxed_trunc.wast","passed":0,"failed":17},{"name":"i8x16_relaxed_swizzle.wast","passed":0,"failed":6},{"name":"relaxed_dot_product.wast","passed":0,"failed":11},{"name":"relaxed_laneselect.wast","passed":0,"failed":12},{"name":"relaxed_madd_nmadd.wast","passed":0,"failed":19},{"name":"relaxed_min_max.wast","passed":0,"failed":25}]
1+
0.9.0-alpha.0,93,0,[{"name":"i16x8_relaxed_q15mulr_s.wast","passed":3,"failed":0},{"name":"i32x4_relaxed_trunc.wast","passed":17,"failed":0},{"name":"i8x16_relaxed_swizzle.wast","passed":6,"failed":0},{"name":"relaxed_dot_product.wast","passed":11,"failed":0},{"name":"relaxed_laneselect.wast","passed":12,"failed":0},{"name":"relaxed_madd_nmadd.wast","passed":19,"failed":0},{"name":"relaxed_min_max.wast","passed":25,"failed":0}]

crates/tinywasm/tests/test-wast.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
use std::path::PathBuf;
22

3-
use eyre::{Result, bail, eyre};
4-
use owo_colors::OwoColorize;
3+
use eyre::{Result, bail};
54
use testsuite::TestSuite;
65

76
mod testsuite;

0 commit comments

Comments
 (0)