From 326aea298cedddd4a16bffb8520d784f8582f8bd Mon Sep 17 00:00:00 2001 From: Marcin Kolny Date: Sat, 28 Sep 2024 00:00:49 +0100 Subject: [PATCH 01/32] Implement the first few SIMD opcodes for fast interpreter (v128.const, v128.any_true) (#3818) Tested on the following code: ``` (module (import "wasi_snapshot_preview1" "proc_exit" (func $proc_exit (param i32))) (memory (export "memory") 1) ;; WASI entry point (func $main (export "_start") v128.const i8x16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 v128.any_true if unreachable end v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 v128.any_true i32.const 0 i32.eq if unreachable end i32.const 0 call $proc_exit ) ) ``` --- core/iwasm/common/wasm_runtime_common.h | 25 +++++++++ core/iwasm/interpreter/wasm_interp_fast.c | 32 ++++++++++++ core/iwasm/interpreter/wasm_loader.c | 63 +++++++++++++++++------ core/iwasm/interpreter/wasm_opcode.h | 3 +- 4 files changed, 107 insertions(+), 16 deletions(-) diff --git a/core/iwasm/common/wasm_runtime_common.h b/core/iwasm/common/wasm_runtime_common.h index fb2c79408d..0b89edf5e8 100644 --- a/core/iwasm/common/wasm_runtime_common.h +++ b/core/iwasm/common/wasm_runtime_common.h @@ -37,6 +37,10 @@ extern "C" { do { \ *(int64 *)(addr) = (int64)(value); \ } while (0) +#define PUT_V128_TO_ADDR(addr, value) \ + do { \ + *(V128 *)(addr) = (value); \ + } while (0) #define PUT_F64_TO_ADDR(addr, value) \ do { \ *(float64 *)(addr) = (float64)(value); \ @@ -49,6 +53,7 @@ extern "C" { #define GET_I64_FROM_ADDR(addr) (*(int64 *)(addr)) #define GET_F64_FROM_ADDR(addr) (*(float64 *)(addr)) #define GET_REF_FROM_ADDR(addr) (*(void **)(addr)) +#define GET_V128_FROM_ADDR(addr) (*(V128 *)(addr)) /* For STORE opcodes */ #define STORE_I64 PUT_I64_TO_ADDR @@ -83,6 +88,15 @@ STORE_U8(void *addr, uint8_t value) #else /* WASM_CPU_SUPPORTS_UNALIGNED_ADDR_ACCESS != 0 */ +#define PUT_V128_TO_ADDR(addr, value) \ + do { \ + uint32 *addr_u32 = (uint32 *)(addr); \ + addr_u32[0] = (value).i32x4[0]; \ + addr_u32[1] = (value).i32x4[1]; \ + addr_u32[2] = (value).i32x4[2]; \ + addr_u32[3] = (value).i32x4[3]; \ + } while (0) + #define PUT_I64_TO_ADDR(addr, value) \ do { \ uint32 *addr_u32 = (uint32 *)(addr); \ @@ -124,6 +138,17 @@ STORE_U8(void *addr, uint8_t value) } while (0) #endif +static inline V128 +GET_V128_FROM_ADDR(uint32 *addr) +{ + V128 ret; + ret.i32x4[0] = addr[0]; + ret.i32x4[1] = addr[1]; + ret.i32x4[2] = addr[2]; + ret.i32x4[3] = addr[3]; + return ret; +} + static inline int64 GET_I64_FROM_ADDR(uint32 *addr) { diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index 1d7ca8f908..fdd05e5446 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -433,6 +433,8 @@ wasm_interp_get_frame_ref(WASMInterpFrame *frame) (type) GET_I64_FROM_ADDR(frame_lp + *(int16 *)(frame_ip + off)) #define GET_OPERAND_F64(type, off) \ (type) GET_F64_FROM_ADDR(frame_lp + *(int16 *)(frame_ip + off)) +#define GET_OPERAND_V128(off) \ + GET_V128_FROM_ADDR(frame_lp + *(int16 *)(frame_ip + off)) #define GET_OPERAND_REF(type, off) \ (type) GET_REF_FROM_ADDR(frame_lp + *(int16 *)(frame_ip + off)) @@ -5642,7 +5644,37 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, #endif goto call_func_from_entry; } +#if WASM_ENABLE_SIMD != 0 + HANDLE_OP(WASM_OP_SIMD_PREFIX) + { + GET_OPCODE(); + + switch (opcode) { + case SIMD_v128_const: + { + uint8 *orig_ip = frame_ip; + + frame_ip += sizeof(V128); + addr_ret = GET_OFFSET(); + PUT_V128_TO_ADDR(frame_lp + addr_ret, *(V128 *)orig_ip); + break; + } + case SIMD_v128_any_true: + { + V128 value = GET_OPERAND_V128(0); + frame_ip += 2; + addr_ret = GET_OFFSET(); + frame_lp[addr_ret] = + value.i64x2[0] != 0 || value.i64x2[1] != 0; + break; + } + default: + wasm_set_exception(module, "unsupported SIMD opcode"); + } + HANDLE_OP_END(); + } +#endif HANDLE_OP(WASM_OP_CALL) { #if WASM_ENABLE_THREAD_MGR != 0 diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index ff3501e3d0..5005fc63bf 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -307,7 +307,8 @@ is_byte_a_type(uint8 type) } #if WASM_ENABLE_SIMD != 0 -#if (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) +#if (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) \ + || (WASM_ENABLE_FAST_INTERP != 0) static V128 read_i8x16(uint8 *p_buf, char *error_buf, uint32 error_buf_size) { @@ -320,7 +321,8 @@ read_i8x16(uint8 *p_buf, char *error_buf, uint32 error_buf_size) return result; } -#endif /* end of (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) */ +#endif /* end of (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) || \ + (WASM_ENABLE_FAST_INTERP != 0) */ #endif /* end of WASM_ENABLE_SIMD */ static void * @@ -707,7 +709,8 @@ load_init_expr(WASMModule *module, const uint8 **p_buf, const uint8 *buf_end, goto fail; break; #if WASM_ENABLE_SIMD != 0 -#if (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) +#if (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) \ + || (WASM_ENABLE_FAST_INTERP != 0) /* v128.const */ case INIT_EXPR_TYPE_V128_CONST: { @@ -736,7 +739,8 @@ load_init_expr(WASMModule *module, const uint8 **p_buf, const uint8 *buf_end, #endif break; } -#endif /* end of (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) */ +#endif /* end of (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) || \ + (WASM_ENABLE_FAST_INTERP != 0) */ #endif /* end of WASM_ENABLE_SIMD */ #if WASM_ENABLE_REF_TYPES != 0 || WASM_ENABLE_GC != 0 @@ -4105,7 +4109,8 @@ load_export_section(const uint8 *buf, const uint8 *buf_end, WASMModule *module, return false; } #if WASM_ENABLE_SIMD != 0 -#if (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) +#if (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) \ + || (WASM_ENABLE_FAST_INTERP != 0) /* TODO: check func type, if it has v128 param or result, report error */ #endif @@ -7566,7 +7571,8 @@ wasm_loader_find_block_addr(WASMExecEnv *exec_env, BlockAddr *block_addr_cache, } #if WASM_ENABLE_SIMD != 0 -#if (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) +#if (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) \ + || (WASM_ENABLE_FAST_INTERP != 0) case WASM_OP_SIMD_PREFIX: { uint32 opcode1; @@ -7659,7 +7665,8 @@ wasm_loader_find_block_addr(WASMExecEnv *exec_env, BlockAddr *block_addr_cache, } break; } -#endif /* end of (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) */ +#endif /* end of (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) || \ + (WASM_ENABLE_FAST_INTERP != 0) */ #endif /* end of WASM_ENABLE_SIMD */ #if WASM_ENABLE_SHARED_MEMORY != 0 @@ -9903,7 +9910,8 @@ check_memory_access_align(uint8 opcode, uint32 align, char *error_buf, } #if WASM_ENABLE_SIMD != 0 -#if (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) +#if (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) \ + || (WASM_ENABLE_FAST_INTERP != 0) static bool check_simd_memory_access_align(uint8 opcode, uint32 align, char *error_buf, uint32 error_buf_size) @@ -12120,10 +12128,20 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, #endif } #if WASM_ENABLE_SIMD != 0 -#if (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) +#if (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) \ + || (WASM_ENABLE_FAST_INTERP != 0) else if (*(loader_ctx->frame_ref - 1) == VALUE_TYPE_V128) { loader_ctx->frame_ref -= 4; loader_ctx->stack_cell_num -= 4; +#if WASM_ENABLE_FAST_INTERP != 0 + skip_label(); + loader_ctx->frame_offset -= 4; + if ((*(loader_ctx->frame_offset) + > loader_ctx->start_dynamic_offset) + && (*(loader_ctx->frame_offset) + < loader_ctx->max_dynamic_offset)) + loader_ctx->dynamic_offset -= 4; +#endif } #endif #endif @@ -12210,10 +12228,12 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, #endif /* end of WASM_ENABLE_FAST_INTERP */ break; #if WASM_ENABLE_SIMD != 0 -#if (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) +#if (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) \ + || (WASM_ENABLE_FAST_INTERP != 0) case VALUE_TYPE_V128: break; -#endif /* (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) */ +#endif /* (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) || \ + (WASM_ENABLE_FAST_INTERP != 0) */ #endif /* WASM_ENABLE_SIMD != 0 */ default: { @@ -12308,8 +12328,9 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, uint8 opcode_tmp = WASM_OP_SELECT; if (type == VALUE_TYPE_V128) { -#if (WASM_ENABLE_SIMD == 0) \ - || ((WASM_ENABLE_WAMR_COMPILER == 0) && (WASM_ENABLE_JIT == 0)) +#if (WASM_ENABLE_SIMD == 0) \ + || ((WASM_ENABLE_WAMR_COMPILER == 0) && (WASM_ENABLE_JIT == 0) \ + && (WASM_ENABLE_FAST_INTERP == 0)) set_error_buf(error_buf, error_buf_size, "SIMD v128 type isn't supported"); goto fail; @@ -14870,7 +14891,8 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, } #if WASM_ENABLE_SIMD != 0 -#if (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) +#if (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) \ + || (WASM_ENABLE_FAST_INTERP != 0) case WASM_OP_SIMD_PREFIX: { uint32 opcode1; @@ -14882,6 +14904,10 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, read_leb_uint32(p, p_end, opcode1); +#if WASM_ENABLE_FAST_INTERP != 0 + emit_byte(loader_ctx, opcode1); +#endif + /* follow the order of enum WASMSimdEXTOpcode in wasm_opcode.h */ switch (opcode1) { @@ -14938,7 +14964,13 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, /* basic operation */ case SIMD_v128_const: { + uint64 high, low; CHECK_BUF1(p, p_end, 16); +#if WASM_ENABLE_FAST_INTERP != 0 + wasm_runtime_read_v128(p, &high, &low); + emit_uint64(loader_ctx, high); + emit_uint64(loader_ctx, low); +#endif p += 16; PUSH_V128(); break; @@ -15524,7 +15556,8 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, } break; } -#endif /* end of (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) */ +#endif /* end of (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) || \ + (WASM_ENABLE_FAST_INTERP != 0) */ #endif /* end of WASM_ENABLE_SIMD */ #if WASM_ENABLE_SHARED_MEMORY != 0 diff --git a/core/iwasm/interpreter/wasm_opcode.h b/core/iwasm/interpreter/wasm_opcode.h index 76647454be..1424840e79 100644 --- a/core/iwasm/interpreter/wasm_opcode.h +++ b/core/iwasm/interpreter/wasm_opcode.h @@ -782,7 +782,8 @@ typedef enum WASMAtomicEXTOpcode { #define SET_GOTO_TABLE_ELEM(opcode) [opcode] = HANDLE_OPCODE(opcode) -#if WASM_ENABLE_JIT != 0 && WASM_ENABLE_SIMD != 0 +#if (WASM_ENABLE_JIT != 0 || WASM_ENABLE_FAST_INTERP != 0) \ + && WASM_ENABLE_SIMD != 0 #define SET_GOTO_TABLE_SIMD_PREFIX_ELEM() \ SET_GOTO_TABLE_ELEM(WASM_OP_SIMD_PREFIX), #else From 860379bc58a0736d271cd1071168327799ebe7ff Mon Sep 17 00:00:00 2001 From: Marcin Kolny Date: Tue, 1 Oct 2024 11:10:22 +0100 Subject: [PATCH 02/32] implement POP_V128() This is to simplify the simd implementation for fast interpreter --- core/iwasm/common/wasm_loader_common.c | 3 ++- core/iwasm/interpreter/wasm_interp_fast.c | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/core/iwasm/common/wasm_loader_common.c b/core/iwasm/common/wasm_loader_common.c index 6dd31be2c3..4a9d8a57b2 100644 --- a/core/iwasm/common/wasm_loader_common.c +++ b/core/iwasm/common/wasm_loader_common.c @@ -89,7 +89,8 @@ is_valid_value_type(uint8 type) bool is_valid_value_type_for_interpreter(uint8 value_type) { -#if (WASM_ENABLE_WAMR_COMPILER == 0) && (WASM_ENABLE_JIT == 0) +#if (WASM_ENABLE_WAMR_COMPILER == 0) && (WASM_ENABLE_JIT == 0) \ + && (WASM_ENABLE_FAST_INTERP == 0) /* * Note: regardless of WASM_ENABLE_SIMD, our interpreters don't have * SIMD implemented. It's safer to reject v128, especially for the diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index fdd05e5446..0fda789651 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -485,6 +485,8 @@ wasm_interp_get_frame_ref(WASMInterpFrame *frame) #define POP_I64() (GET_I64_FROM_ADDR(frame_lp + GET_OFFSET())) +#define POP_V128() (GET_V128_FROM_ADDR(frame_lp + GET_OFFSET())) + #define POP_F64() (GET_F64_FROM_ADDR(frame_lp + GET_OFFSET())) #define POP_REF() \ @@ -5662,8 +5664,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_v128_any_true: { - V128 value = GET_OPERAND_V128(0); - frame_ip += 2; + V128 value = POP_V128(); addr_ret = GET_OFFSET(); frame_lp[addr_ret] = value.i64x2[0] != 0 || value.i64x2[1] != 0; From 5b2ab612437e289d3372c65b596dc96cf9bc973b Mon Sep 17 00:00:00 2001 From: James Marsh Date: Fri, 27 Sep 2024 16:47:24 +0100 Subject: [PATCH 03/32] Add all SIMD operations into wasm_interp_fast switch --- core/iwasm/interpreter/wasm_interp_fast.c | 348 ++++++++++++++++++++++ 1 file changed, 348 insertions(+) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index 0fda789651..fa32535f3e 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -5652,6 +5652,25 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, GET_OPCODE(); switch (opcode) { + /* Memory */ + case SIMD_v128_load: + case SIMD_v128_load8x8_s: + case SIMD_v128_load8x8_u: + case SIMD_v128_load16x4_s: + case SIMD_v128_load16x4_u: + case SIMD_v128_load32x2_s: + case SIMD_v128_load32x2_u: + case SIMD_v128_load8_splat: + case SIMD_v128_load16_splat: + case SIMD_v128_load32_splat: + case SIMD_v128_load64_splat: + case SIMD_v128_store: + { + wasm_set_exception(module, "unsupported SIMD opcode"); + break; + } + + /* Basic */ case SIMD_v128_const: { uint8 *orig_ip = frame_ip; @@ -5662,6 +5681,128 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, PUT_V128_TO_ADDR(frame_lp + addr_ret, *(V128 *)orig_ip); break; } + case SIMD_v8x16_shuffle: + case SIMD_v8x16_swizzle: + { + wasm_set_exception(module, "unsupported SIMD opcode"); + break; + } + + /* Splat */ + case SIMD_i8x16_splat: + case SIMD_i16x8_splat: + case SIMD_i32x4_splat: + case SIMD_i64x2_splat: + case SIMD_f32x4_splat: + case SIMD_f64x2_splat: + { + wasm_set_exception(module, "unsupported SIMD opcode"); + break; + } + + /* Lane */ + case SIMD_i8x16_extract_lane_s: + case SIMD_i8x16_extract_lane_u: + case SIMD_i8x16_replace_lane: + case SIMD_i16x8_extract_lane_s: + case SIMD_i16x8_extract_lane_u: + case SIMD_i16x8_replace_lane: + case SIMD_i32x4_extract_lane: + case SIMD_i32x4_replace_lane: + case SIMD_i64x2_extract_lane: + case SIMD_i64x2_replace_lane: + case SIMD_f32x4_extract_lane: + case SIMD_f32x4_replace_lane: + case SIMD_f64x2_extract_lane: + case SIMD_f64x2_replace_lane: + { + wasm_set_exception(module, "unsupported SIMD opcode"); + break; + } + + /* i8x16 comparison operations */ + case SIMD_i8x16_eq: + case SIMD_i8x16_ne: + case SIMD_i8x16_lt_s: + case SIMD_i8x16_lt_u: + case SIMD_i8x16_gt_s: + case SIMD_i8x16_gt_u: + case SIMD_i8x16_le_s: + case SIMD_i8x16_le_u: + case SIMD_i8x16_ge_s: + case SIMD_i8x16_ge_u: + { + wasm_set_exception(module, "unsupported SIMD opcode"); + break; + } + + /* i16x8 comparison operations */ + case SIMD_i16x8_eq: + case SIMD_i16x8_ne: + case SIMD_i16x8_lt_s: + case SIMD_i16x8_lt_u: + case SIMD_i16x8_gt_s: + case SIMD_i16x8_gt_u: + case SIMD_i16x8_le_s: + case SIMD_i16x8_le_u: + case SIMD_i16x8_ge_s: + case SIMD_i16x8_ge_u: + { + wasm_set_exception(module, "unsupported SIMD opcode"); + break; + } + + /* i32x4 comparison operations */ + case SIMD_i32x4_eq: + case SIMD_i32x4_ne: + case SIMD_i32x4_lt_s: + case SIMD_i32x4_lt_u: + case SIMD_i32x4_gt_s: + case SIMD_i32x4_gt_u: + case SIMD_i32x4_le_s: + case SIMD_i32x4_le_u: + case SIMD_i32x4_ge_s: + case SIMD_i32x4_ge_u: + { + wasm_set_exception(module, "unsupported SIMD opcode"); + break; + } + + /* f32x4 comparison operations */ + case SIMD_f32x4_eq: + case SIMD_f32x4_ne: + case SIMD_f32x4_lt: + case SIMD_f32x4_gt: + case SIMD_f32x4_le: + case SIMD_f32x4_ge: + { + wasm_set_exception(module, "unsupported SIMD opcode"); + break; + } + + /* f64x2 comparison operations */ + case SIMD_f64x2_eq: + case SIMD_f64x2_ne: + case SIMD_f64x2_lt: + case SIMD_f64x2_gt: + case SIMD_f64x2_le: + case SIMD_f64x2_ge: + { + wasm_set_exception(module, "unsupported SIMD opcode"); + break; + } + + /* v128 comparison operations */ + case SIMD_v128_not: + case SIMD_v128_and: + case SIMD_v128_andnot: + case SIMD_v128_or: + case SIMD_v128_xor: + case SIMD_v128_bitselect: + { + wasm_set_exception(module, "unsupported SIMD opcode"); + break; + } case SIMD_v128_any_true: { V128 value = POP_V128(); @@ -5670,6 +5811,213 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, value.i64x2[0] != 0 || value.i64x2[1] != 0; break; } + + /* load lane operations */ + case SIMD_v128_load8_lane: + case SIMD_v128_load16_lane: + case SIMD_v128_load32_lane: + case SIMD_v128_load64_lane: + case SIMD_v128_store8_lane: + case SIMD_v128_store16_lane: + case SIMD_v128_store32_lane: + case SIMD_v128_store64_lane: + case SIMD_v128_load32_zero: + case SIMD_v128_load64_zero: + { + wasm_set_exception(module, "unsupported SIMD opcode"); + break; + } + + /* Float conversion */ + case SIMD_f32x4_demote_f64x2_zero: + case SIMD_f64x2_promote_low_f32x4_zero: + { + wasm_set_exception(module, "unsupported SIMD opcode"); + break; + } + + /* i8x16 operations */ + case SIMD_i8x16_abs: + case SIMD_i8x16_neg: + case SIMD_i8x16_popcnt: + case SIMD_i8x16_all_true: + case SIMD_i8x16_bitmask: + case SIMD_i8x16_narrow_i16x8_s: + case SIMD_i8x16_narrow_i16x8_u: + case SIMD_f32x4_ceil: + case SIMD_f32x4_floor: + case SIMD_f32x4_trunc: + case SIMD_f32x4_nearest: + case SIMD_i8x16_shl: + case SIMD_i8x16_shr_s: + case SIMD_i8x16_shr_u: + case SIMD_i8x16_add: + case SIMD_i8x16_add_sat_s: + case SIMD_i8x16_add_sat_u: + case SIMD_i8x16_sub: + case SIMD_i8x16_sub_sat_s: + case SIMD_i8x16_sub_sat_u: + case SIMD_f64x2_ceil: + case SIMD_f64x2_floor: + case SIMD_i8x16_min_s: + case SIMD_i8x16_min_u: + case SIMD_i8x16_max_s: + case SIMD_i8x16_max_u: + case SIMD_f64x2_trunc: + case SIMD_i8x16_avgr_u: + case SIMD_i16x8_extadd_pairwise_i8x16_s: + case SIMD_i16x8_extadd_pairwise_i8x16_u: + case SIMD_i32x4_extadd_pairwise_i16x8_s: + case SIMD_i32x4_extadd_pairwise_i16x8_u: + { + wasm_set_exception(module, "unsupported SIMD opcode"); + break; + } + + /* i16x8 operations */ + case SIMD_i16x8_abs: + case SIMD_i16x8_neg: + case SIMD_i16x8_q15mulr_sat_s: + case SIMD_i16x8_all_true: + case SIMD_i16x8_bitmask: + case SIMD_i16x8_narrow_i32x4_s: + case SIMD_i16x8_narrow_i32x4_u: + case SIMD_i16x8_extend_low_i8x16_s: + case SIMD_i16x8_extend_high_i8x16_s: + case SIMD_i16x8_extend_low_i8x16_u: + case SIMD_i16x8_extend_high_i8x16_u: + case SIMD_i16x8_shl: + case SIMD_i16x8_shr_s: + case SIMD_i16x8_shr_u: + case SIMD_i16x8_add: + case SIMD_i16x8_add_sat_s: + case SIMD_i16x8_add_sat_u: + case SIMD_i16x8_sub: + case SIMD_i16x8_sub_sat_s: + case SIMD_i16x8_sub_sat_u: + case SIMD_f64x2_nearest: + case SIMD_i16x8_mul: + case SIMD_i16x8_min_s: + case SIMD_i16x8_min_u: + case SIMD_i16x8_max_s: + case SIMD_i16x8_max_u: + case SIMD_i16x8_avgr_u: + case SIMD_i16x8_extmul_low_i8x16_s: + case SIMD_i16x8_extmul_high_i8x16_s: + case SIMD_i16x8_extmul_low_i8x16_u: + case SIMD_i16x8_extmul_high_i8x16_u: + { + wasm_set_exception(module, "unsupported SIMD opcode"); + break; + } + + /* i32x4 operations */ + case SIMD_i32x4_abs: + case SIMD_i32x4_neg: + case SIMD_i32x4_all_true: + case SIMD_i32x4_bitmask: + case SIMD_i32x4_extend_low_i16x8_s: + case SIMD_i32x4_extend_high_i16x8_s: + case SIMD_i32x4_extend_low_i16x8_u: + case SIMD_i32x4_extend_high_i16x8_u: + case SIMD_i32x4_shl: + case SIMD_i32x4_shr_s: + case SIMD_i32x4_shr_u: + case SIMD_i32x4_add: + case SIMD_i32x4_sub: + case SIMD_i32x4_mul: + case SIMD_i32x4_min_s: + case SIMD_i32x4_min_u: + case SIMD_i32x4_max_s: + case SIMD_i32x4_max_u: + case SIMD_i32x4_dot_i16x8_s: + case SIMD_i32x4_extmul_low_i16x8_s: + case SIMD_i32x4_extmul_high_i16x8_s: + case SIMD_i32x4_extmul_low_i16x8_u: + case SIMD_i32x4_extmul_high_i16x8_u: + { + wasm_set_exception(module, "unsupported SIMD opcode"); + break; + } + + /* i64x2 operations */ + case SIMD_i64x2_abs: + case SIMD_i64x2_neg: + case SIMD_i64x2_all_true: + case SIMD_i64x2_bitmask: + case SIMD_i64x2_extend_low_i32x4_s: + case SIMD_i64x2_extend_high_i32x4_s: + case SIMD_i64x2_extend_low_i32x4_u: + case SIMD_i64x2_extend_high_i32x4_u: + case SIMD_i64x2_shl: + case SIMD_i64x2_shr_s: + case SIMD_i64x2_shr_u: + case SIMD_i64x2_add: + case SIMD_i64x2_sub: + case SIMD_i64x2_mul: + case SIMD_i64x2_eq: + case SIMD_i64x2_ne: + case SIMD_i64x2_lt_s: + case SIMD_i64x2_gt_s: + case SIMD_i64x2_le_s: + case SIMD_i64x2_ge_s: + case SIMD_i64x2_extmul_low_i32x4_s: + case SIMD_i64x2_extmul_high_i32x4_s: + case SIMD_i64x2_extmul_low_i32x4_u: + case SIMD_i64x2_extmul_high_i32x4_u: + { + wasm_set_exception(module, "unsupported SIMD opcode"); + break; + } + + /* f32x4 opertions */ + case SIMD_f32x4_abs: + case SIMD_f32x4_neg: + case SIMD_f32x4_sqrt: + case SIMD_f32x4_add: + case SIMD_f32x4_sub: + case SIMD_f32x4_mul: + case SIMD_f32x4_div: + case SIMD_f32x4_min: + case SIMD_f32x4_max: + case SIMD_f32x4_pmin: + case SIMD_f32x4_pmax: + { + wasm_set_exception(module, "unsupported SIMD opcode"); + break; + } + + /* f64x2 operations */ + case SIMD_f64x2_abs: + case SIMD_f64x2_neg: + case SIMD_f64x2_sqrt: + case SIMD_f64x2_add: + case SIMD_f64x2_sub: + case SIMD_f64x2_mul: + case SIMD_f64x2_div: + case SIMD_f64x2_min: + case SIMD_f64x2_max: + case SIMD_f64x2_pmin: + case SIMD_f64x2_pmax: + { + wasm_set_exception(module, "unsupported SIMD opcode"); + break; + } + + /* Conversion operations */ + case SIMD_i32x4_trunc_sat_f32x4_s: + case SIMD_i32x4_trunc_sat_f32x4_u: + case SIMD_f32x4_convert_i32x4_s: + case SIMD_f32x4_convert_i32x4_u: + case SIMD_i32x4_trunc_sat_f64x2_s_zero: + case SIMD_i32x4_trunc_sat_f64x2_u_zero: + case SIMD_f64x2_convert_low_i32x4_s: + case SIMD_f64x2_convert_low_i32x4_u: + { + wasm_set_exception(module, "unsupported SIMD opcode"); + break; + } + default: wasm_set_exception(module, "unsupported SIMD opcode"); } From c1bfe2abd6a98b2c5e0594b2a14f2d2665f22b33 Mon Sep 17 00:00:00 2001 From: James Marsh Date: Tue, 1 Oct 2024 11:40:16 +0100 Subject: [PATCH 04/32] Add V128 comparison operations Tested using ``` (module (import "wasi_snapshot_preview1" "proc_exit" (func $proc_exit (param i32))) (memory (export "memory") 1) (func $assert_true (param v128) local.get 0 v128.any_true i32.eqz if unreachable end ) (func $main (export "_start") ;; Test v128.not v128.const i8x16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 v128.not v128.const i8x16 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 i8x16.eq call $assert_true ;; Test v128.and v128.const i8x16 255 255 255 255 0 0 0 0 255 255 255 255 0 0 0 0 v128.const i8x16 255 255 0 0 255 255 0 0 255 255 0 0 255 255 0 0 v128.and v128.const i8x16 255 255 0 0 0 0 0 0 255 255 0 0 0 0 0 0 i8x16.eq call $assert_true ;; Test v128.andnot v128.const i8x16 255 255 255 255 0 0 0 0 255 255 255 255 0 0 0 0 v128.const i8x16 255 255 0 0 255 255 0 0 255 255 0 0 255 255 0 0 v128.andnot v128.const i8x16 0 0 255 255 0 0 0 0 0 0 255 255 0 0 0 0 i8x16.eq call $assert_true ;; Test v128.or v128.const i8x16 255 255 0 0 0 0 255 255 255 255 0 0 0 0 255 0 v128.const i8x16 0 0 255 255 255 255 0 0 0 0 255 255 255 255 0 0 v128.or v128.const i8x16 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 0 i8x16.eq call $assert_true ;; Test v128.xor v128.const i8x16 255 255 0 0 255 255 0 0 255 255 0 0 255 255 0 0 v128.const i8x16 255 255 255 255 0 0 0 0 255 255 255 255 0 0 0 0 v128.xor v128.const i8x16 0 0 255 255 255 255 0 0 0 0 255 255 255 255 0 0 i8x16.eq call $assert_true i32.const 0 call $proc_exit ) ) ``` --- core/iwasm/interpreter/wasm_interp_fast.c | 76 ++++++++++++++++++++++- 1 file changed, 75 insertions(+), 1 deletion(-) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index fa32535f3e..9d3b743382 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -5722,6 +5722,20 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, /* i8x16 comparison operations */ case SIMD_i8x16_eq: + { + V128 v1 = POP_V128(); + V128 v2 = POP_V128(); + int i; + addr_ret = GET_OFFSET(); + + V128 result; + for (i = 0; i < 16; i++) { + result.i8x16[i] = + v1.i8x16[i] == v2.i8x16[i] ? 0xff : 0; + } + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); + break; + } case SIMD_i8x16_ne: case SIMD_i8x16_lt_s: case SIMD_i8x16_lt_u: @@ -5792,12 +5806,56 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, break; } - /* v128 comparison operations */ + /* v128 bitwise operations */ +#define SIMD_V128_BITWISE_OP_COMMON(result_expr_0, result_expr_1) \ + do { \ + V128 result; \ + result.i64x2[0] = (result_expr_0); \ + result.i64x2[1] = (result_expr_1); \ + addr_ret = GET_OFFSET(); \ + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); \ + } while (0) + case SIMD_v128_not: + { + V128 value = POP_V128(); + SIMD_V128_BITWISE_OP_COMMON(~value.i64x2[0], + ~value.i64x2[1]); + break; + } case SIMD_v128_and: + { + V128 v2 = POP_V128(); + V128 v1 = POP_V128(); + SIMD_V128_BITWISE_OP_COMMON(v1.i64x2[0] & v2.i64x2[0], + v1.i64x2[1] & v2.i64x2[1]); + break; + } case SIMD_v128_andnot: + { + V128 v2 = POP_V128(); + V128 v1 = POP_V128(); + SIMD_V128_BITWISE_OP_COMMON( + v1.i64x2[0] & (~v2.i64x2[0]), + v1.i64x2[1] & (~v2.i64x2[1])); + break; + } case SIMD_v128_or: + { + V128 v2 = POP_V128(); + V128 v1 = POP_V128(); + SIMD_V128_BITWISE_OP_COMMON(v1.i64x2[0] | v2.i64x2[0], + v1.i64x2[1] | v2.i64x2[1]); + break; + } case SIMD_v128_xor: + { + V128 v2 = POP_V128(); + V128 v1 = POP_V128(); + SIMD_V128_BITWISE_OP_COMMON(v1.i64x2[0] ^ v2.i64x2[0], + v1.i64x2[1] ^ v2.i64x2[1]); + break; + } case SIMD_v128_bitselect: { wasm_set_exception(module, "unsupported SIMD opcode"); @@ -5841,6 +5899,22 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, case SIMD_i8x16_neg: case SIMD_i8x16_popcnt: case SIMD_i8x16_all_true: + { + V128 v = POP_V128(); + uint8_t *bytes = (uint8_t *)&v; + bool all_true = true; + + for (int i = 0; i < 16; i++) { + if (bytes[i] == 0) { + all_true = false; + break; + } + } + + PUSH_I32(all_true ? 1 : 0); + break; + } + case SIMD_i8x16_bitmask: case SIMD_i8x16_narrow_i16x8_s: case SIMD_i8x16_narrow_i16x8_u: From c930c4d199223543d5bc9f14dc391eaab5cf79fe Mon Sep 17 00:00:00 2001 From: jammar1 <108334558+jammar1@users.noreply.github.com> Date: Tue, 5 Nov 2024 23:58:15 +0000 Subject: [PATCH 05/32] Add first NEON SIMD opcode implementations to fast interpreter (#3859) Add some implementations of SIMD opcodes using NEON instructions. Tested using: ```wast (module (import "wasi_snapshot_preview1" "proc_exit" (func $proc_exit (param i32))) (memory (export "memory") 1) (func $assert_true (param v128) local.get 0 v128.any_true i32.eqz if unreachable end ) (func $main (export "_start") i32.const 0 i32.const 32 memory.grow drop i32.const 0 v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 v128.store i32.const 0 v128.load v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 i8x16.eq call $assert_true i32.const 16 v128.const i8x16 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 v128.store i32.const 16 v128.load v128.const i8x16 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 i8x16.eq call $assert_true i32.const 0 v128.load v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 i8x16.eq call $assert_true drop i32.const 0 i32.const 1 memory.grow drop i32.const 0 i64.const 0x7F80FF017E02FE80 i64.store i32.const 0 v128.load8x8_s v128.const i16x8 127 -128 -1 1 126 2 -2 -128 i16x8.eq call $assert_true i32.const 0 i64.const 0x80FE027E01FF807F i64.store i32.const 0 v128.load8x8_u v128.const i16x8 128 254 2 126 1 255 128 127 i16x8.eq call $assert_true i32.const 0 i64.const 0x8000FFFE7FFF0001 i64.store i32.const 0 v128.load16x4_s v128.const i32x4 -32768 -2 32767 1 i32x4.eq call $assert_true i32.const 0 i64.const 0x8000FFFE7FFF0001 i64.store i32.const 0 v128.load16x4_u v128.const i32x4 32768 65534 32767 1 i32x4.eq call $assert_true i32.const 0 i64.const 0x8000000000000001 i64.store i32.const 0 v128.load32x2_s v128.const i64x2 -2147483648 1 i64x2.eq call $assert_true i32.const 0 i64.const 0x8000000000000001 i64.store i32.const 0 v128.load32x2_u v128.const i64x2 2147483648 1 i64x2.eq call $assert_true call $proc_exit ) ) ``` --- build-scripts/config_common.cmake | 3 + build-scripts/runtime_lib.cmake | 4 + core/config.h | 6 + core/iwasm/common/wasm_runtime_common.h | 67 ++ core/iwasm/interpreter/wasm_interp_fast.c | 1050 ++++++++++++++++++++- core/iwasm/interpreter/wasm_loader.c | 18 +- core/iwasm/interpreter/wasm_opcode.h | 4 +- core/iwasm/libraries/simde/simde.cmake | 23 + 8 files changed, 1128 insertions(+), 47 deletions(-) create mode 100644 core/iwasm/libraries/simde/simde.cmake diff --git a/build-scripts/config_common.cmake b/build-scripts/config_common.cmake index 48c5f7be4b..b6503d808d 100644 --- a/build-scripts/config_common.cmake +++ b/build-scripts/config_common.cmake @@ -290,6 +290,9 @@ endif () if (WAMR_BUILD_LIB_RATS EQUAL 1) message (" Lib rats enabled") endif() +if ((WAMR_BUILD_LIB_SIMDE EQUAL 1)) + message (" Lib simde enabled") +endif() if (WAMR_BUILD_MINI_LOADER EQUAL 1) add_definitions (-DWASM_ENABLE_MINI_LOADER=1) message (" WASM mini loader enabled") diff --git a/build-scripts/runtime_lib.cmake b/build-scripts/runtime_lib.cmake index c57cfc57af..29789d671c 100644 --- a/build-scripts/runtime_lib.cmake +++ b/build-scripts/runtime_lib.cmake @@ -155,6 +155,10 @@ if (WAMR_BUILD_LIB_RATS EQUAL 1) include (${IWASM_DIR}/libraries/lib-rats/lib_rats.cmake) endif () +if (WAMR_BUILD_LIB_SIMDE EQUAL 1) + include (${IWASM_DIR}/libraries/simde/simde.cmake) +endif () + if (WAMR_BUILD_WASM_CACHE EQUAL 1) include (${WAMR_ROOT_DIR}/build-scripts/involve_boringssl.cmake) endif () diff --git a/core/config.h b/core/config.h index 6bab4da908..7b07e9eac6 100644 --- a/core/config.h +++ b/core/config.h @@ -318,6 +318,12 @@ #define WASM_ENABLE_SIMD 0 #endif +/* Disable SIMDe (used in the fast interpreter for SIMD opcodes) +unless used elsewhere */ +#ifndef WASM_ENABLE_SIMDE +#define WASM_ENABLE_SIMDE 0 +#endif + /* GC performance profiling */ #ifndef WASM_ENABLE_GC_PERF_PROFILING #define WASM_ENABLE_GC_PERF_PROFILING 0 diff --git a/core/iwasm/common/wasm_runtime_common.h b/core/iwasm/common/wasm_runtime_common.h index 8ec5ea3a50..3c4460b34b 100644 --- a/core/iwasm/common/wasm_runtime_common.h +++ b/core/iwasm/common/wasm_runtime_common.h @@ -73,6 +73,12 @@ STORE_U8(void *addr, uint8_t value) *(uint8 *)addr = value; } +static inline void +STORE_V128(void *addr, V128 value) +{ + *(V128 *)addr = value; +} + /* For LOAD opcodes */ #define LOAD_I64(addr) (*(int64 *)(addr)) #define LOAD_F64(addr) (*(float64 *)(addr)) @@ -80,6 +86,7 @@ STORE_U8(void *addr, uint8_t value) #define LOAD_U32(addr) (*(uint32 *)(addr)) #define LOAD_I16(addr) (*(int16 *)(addr)) #define LOAD_U16(addr) (*(uint16 *)(addr)) +#define LOAD_V128(addr) (*(V128 *)(addr)) #define STORE_PTR(addr, ptr) \ do { \ @@ -264,7 +271,67 @@ STORE_U16(void *addr, uint16_t value) ((uint8_t *)(addr))[0] = u.u8[0]; ((uint8_t *)(addr))[1] = u.u8[1]; } + +static inline void +STORE_V128(void *addr, V128 value) +{ + uintptr_t addr_ = (uintptr_t)(addr); + union { + V128 val; + uint64 u64[2]; + uint32 u32[4]; + uint16 u16[8]; + uint8 u8[16]; + } u; + + if ((addr_ & (uintptr_t)15) == 0) { + *(V128 *)addr = value; + } + else { + u.val = value; + if ((addr_ & (uintptr_t)7) == 0) { + ((uint64 *)(addr))[0] = u.u64[0]; + ((uint64 *)(addr))[1] = u.u64[1]; + } + else { + bh_assert((addr_ & (uintptr_t)3) == 0); + ((uint32 *)addr)[0] = u.u32[0]; + ((uint32 *)addr)[1] = u.u32[1]; + ((uint32 *)addr)[2] = u.u32[2]; + ((uint32 *)addr)[3] = u.u32[3]; + } + } +} + /* For LOAD opcodes */ +static inline V128 +LOAD_V128(void *addr) +{ + uintptr_t addr1 = (uintptr_t)addr; + union { + V128 val; + uint64 u64[2]; + uint32 u32[4]; + uint16 u16[8]; + uint8 u8[16]; + } u; + if ((addr1 & (uintptr_t)15) == 0) + return *(V128 *)addr; + + if ((addr1 & (uintptr_t)7) == 0) { + u.u64[0] = ((uint64 *)addr)[0]; + u.u64[1] = ((uint64 *)addr)[1]; + } + else { + bh_assert((addr1 & (uintptr_t)3) == 0); + u.u32[0] = ((uint32 *)addr)[0]; + u.u32[1] = ((uint32 *)addr)[1]; + u.u32[2] = ((uint32 *)addr)[2]; + u.u32[3] = ((uint32 *)addr)[3]; + } + return u.val; +} + static inline int64 LOAD_I64(void *addr) { diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index 73e54fca7a..09823f08c8 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -21,6 +21,10 @@ #include "../common/wasm_shared_memory.h" #endif +#if WASM_ENABLE_SIMDE != 0 +#include "simde/wasm/simd128.h" +#endif + typedef int32 CellType_I32; typedef int64 CellType_I64; typedef float32 CellType_F32; @@ -5738,7 +5742,21 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, #endif goto call_func_from_entry; } -#if WASM_ENABLE_SIMD != 0 +#if WASM_ENABLE_SIMDE != 0 +#define SIMD_V128_TO_SIMDE_V128(v) \ + ({ \ + bh_assert(sizeof(V128) == sizeof(simde_v128_t)); \ + simde_v128_t result; \ + bh_memcpy_s(&result, sizeof(simde_v128_t), &(v), sizeof(V128)); \ + result; \ + }) + +#define SIMDE_V128_TO_SIMD_V128(sv, v) \ + do { \ + bh_assert(sizeof(V128) == sizeof(simde_v128_t)); \ + bh_memcpy_s(&(v), sizeof(V128), &(sv), sizeof(simde_v128_t)); \ + } while (0) + HANDLE_OP(WASM_OP_SIMD_PREFIX) { GET_OPCODE(); @@ -5746,19 +5764,129 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, switch (opcode) { /* Memory */ case SIMD_v128_load: + { + uint32 offset, addr; + offset = read_uint32( + frame_ip); // TODO: Check with an offset! + addr = GET_OPERAND(uint32, I32, 0); + frame_ip += 2; + addr_ret = GET_OFFSET(); + CHECK_MEMORY_OVERFLOW(16); + PUT_V128_TO_ADDR(frame_lp + addr_ret, LOAD_V128(maddr)); + break; + } +#define SIMD_LOAD_OP(op_name, simde_func, element_size, num_elements) \ + do { \ + uint32 offset, addr; \ + offset = read_uint32(frame_ip); \ + addr = GET_OPERAND(uint32, I32, 0); \ + frame_ip += 2; \ + addr_ret = GET_OFFSET(); \ + CHECK_MEMORY_OVERFLOW(16); \ + \ + simde_v128_t simde_result = simde_func(maddr); \ + \ + V128 result; \ + SIMDE_V128_TO_SIMD_V128(simde_result, result); \ + \ + V128 reversed_result; \ + for (int i = 0; i < num_elements; i++) { \ + reversed_result.i##element_size##x##num_elements[i] = \ + result.i##element_size##x##num_elements[num_elements - 1 - i]; \ + } \ + PUT_V128_TO_ADDR(frame_lp + addr_ret, reversed_result); \ + \ + break; \ + } while (0) case SIMD_v128_load8x8_s: + { + SIMD_LOAD_OP(SIMD_v128_load8x8_s, + simde_wasm_i16x8_load8x8, 16, 8); + break; + } case SIMD_v128_load8x8_u: + { + SIMD_LOAD_OP(SIMD_v128_load8x8_u, + simde_wasm_u16x8_load8x8, 16, 8); + break; + } case SIMD_v128_load16x4_s: + { + SIMD_LOAD_OP(SIMD_v128_load16x4_s, + simde_wasm_i32x4_load16x4, 32, 4); + break; + } case SIMD_v128_load16x4_u: + { + SIMD_LOAD_OP(SIMD_v128_load16x4_u, + simde_wasm_u32x4_load16x4, 32, 4); + break; + } case SIMD_v128_load32x2_s: + { + SIMD_LOAD_OP(SIMD_v128_load32x2_s, + simde_wasm_i64x2_load32x2, 64, 2); + break; + } case SIMD_v128_load32x2_u: + { + SIMD_LOAD_OP(SIMD_v128_load32x2_u, + simde_wasm_u64x2_load32x2, 64, 2); + break; + } +#define SIMD_LOAD_SPLAT_OP(op_name, simde_func) \ + do { \ + uint32 offset, addr; \ + offset = read_uint32(frame_ip); \ + addr = GET_OPERAND(uint32, I32, 0); \ + frame_ip += 2; \ + addr_ret = GET_OFFSET(); \ + CHECK_MEMORY_OVERFLOW(16); \ + \ + simde_v128_t simde_result = simde_func(maddr); \ + \ + V128 result; \ + SIMDE_V128_TO_SIMD_V128(simde_result, result); \ + \ + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); \ + } while (0) + case SIMD_v128_load8_splat: + { + SIMD_LOAD_SPLAT_OP(SIMD_v128_load8_splat, + simde_wasm_v128_load8_splat); + break; + } case SIMD_v128_load16_splat: + { + SIMD_LOAD_SPLAT_OP(SIMD_v128_load16_splat, + simde_wasm_v128_load16_splat); + break; + } case SIMD_v128_load32_splat: + { + SIMD_LOAD_SPLAT_OP(SIMD_v128_load32_splat, + simde_wasm_v128_load32_splat); + break; + } case SIMD_v128_load64_splat: + { + SIMD_LOAD_SPLAT_OP(SIMD_v128_load64_splat, + simde_wasm_v128_load64_splat); + break; + } case SIMD_v128_store: { - wasm_set_exception(module, "unsupported SIMD opcode"); + uint32 offset, addr; + offset = read_uint32(frame_ip); + frame_ip += 2; + addr = GET_OPERAND(uint32, I32, 0); + + V128 data; + data = POP_V128(); + + CHECK_MEMORY_OVERFLOW(16); + STORE_V128(maddr, data); break; } @@ -5773,25 +5901,100 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, PUT_V128_TO_ADDR(frame_lp + addr_ret, *(V128 *)orig_ip); break; } + // TODO: Add a faster SIMD implementation case SIMD_v8x16_shuffle: - case SIMD_v8x16_swizzle: { - wasm_set_exception(module, "unsupported SIMD opcode"); + V128 indices; + V128 v2 = POP_V128(); + V128 v1 = POP_V128(); + addr_ret = GET_OFFSET(); + + bh_memcpy_s(&indices, sizeof(V128), frame_ip, + sizeof(V128)); + frame_ip += sizeof(V128); + + V128 result; + for (int i = 0; i < 16; i++) { + uint8_t index = indices.i8x16[i]; + if (index < 16) { + result.i8x16[i] = v1.i8x16[index]; + } + else { + result.i8x16[i] = v2.i8x16[index - 16]; + } + } + + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); break; } + case SIMD_v8x16_swizzle: + { + V128 v2 = POP_V128(); + V128 v1 = POP_V128(); + addr_ret = GET_OFFSET(); + simde_v128_t simde_result = simde_wasm_i8x16_swizzle( + SIMD_V128_TO_SIMDE_V128(v1), + SIMD_V128_TO_SIMDE_V128(v2)); + + V128 result; + SIMDE_V128_TO_SIMD_V128(simde_result, result); + + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); + } /* Splat */ +#define SIMD_SPLAT_OP(simde_func, pop_func, val_type) \ + do { \ + val_type val = pop_func(); \ + addr_ret = GET_OFFSET(); \ + \ + simde_v128_t simde_result = simde_func(val); \ + \ + V128 result; \ + SIMDE_V128_TO_SIMD_V128(simde_result, result); \ + \ + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); \ + } while (0) + +#define SIMD_SPLAT_OP_I32(simde_func) SIMD_SPLAT_OP(simde_func, POP_I32, uint32) +#define SIMD_SPLAT_OP_I64(simde_func) SIMD_SPLAT_OP(simde_func, POP_I64, uint64) +#define SIMD_SPLAT_OP_F32(simde_func) \ + SIMD_SPLAT_OP(simde_func, POP_F32, float32) +#define SIMD_SPLAT_OP_F64(simde_func) \ + SIMD_SPLAT_OP(simde_func, POP_F64, float64) + case SIMD_i8x16_splat: + { + SIMD_SPLAT_OP_I32(simde_wasm_i8x16_splat); + break; + } case SIMD_i16x8_splat: + { + SIMD_SPLAT_OP_I32(simde_wasm_i16x8_splat); + break; + } case SIMD_i32x4_splat: + { + SIMD_SPLAT_OP_I32(simde_wasm_i32x4_splat); + break; + } case SIMD_i64x2_splat: + { + SIMD_SPLAT_OP_I64(simde_wasm_i64x2_splat); + break; + } case SIMD_f32x4_splat: + { + SIMD_SPLAT_OP_F32(simde_wasm_f32x4_splat); + break; + } case SIMD_f64x2_splat: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_SPLAT_OP_F64(simde_wasm_f64x2_splat); break; } + // TODO: /* Lane */ case SIMD_i8x16_extract_lane_s: case SIMD_i8x16_extract_lane_u: @@ -5812,89 +6015,238 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, break; } +#define SIMD_DOUBLE_OP(simde_func) \ + do { \ + V128 v1 = POP_V128(); \ + V128 v2 = POP_V128(); \ + addr_ret = GET_OFFSET(); \ + \ + simde_v128_t simde_result = simde_func(SIMD_V128_TO_SIMDE_V128(v1), \ + SIMD_V128_TO_SIMDE_V128(v2)); \ + \ + V128 result; \ + SIMDE_V128_TO_SIMD_V128(simde_result, result); \ + \ + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); \ + } while (0) + /* i8x16 comparison operations */ case SIMD_i8x16_eq: { - V128 v1 = POP_V128(); - V128 v2 = POP_V128(); - int i; - addr_ret = GET_OFFSET(); - - V128 result; - for (i = 0; i < 16; i++) { - result.i8x16[i] = - v1.i8x16[i] == v2.i8x16[i] ? 0xff : 0; - } - PUT_V128_TO_ADDR(frame_lp + addr_ret, result); + SIMD_DOUBLE_OP(simde_wasm_i8x16_eq); break; } case SIMD_i8x16_ne: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_ne); + break; + } case SIMD_i8x16_lt_s: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_lt); + break; + } case SIMD_i8x16_lt_u: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_lt); + break; + } case SIMD_i8x16_gt_s: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_gt); + break; + } case SIMD_i8x16_gt_u: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_gt); + break; + } case SIMD_i8x16_le_s: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_le); + break; + } case SIMD_i8x16_le_u: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_le); + break; + } case SIMD_i8x16_ge_s: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_ge); + break; + } case SIMD_i8x16_ge_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_i8x16_ge); break; } /* i16x8 comparison operations */ case SIMD_i16x8_eq: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_eq); + break; + } case SIMD_i16x8_ne: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_ne); + break; + } case SIMD_i16x8_lt_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_lt); + break; + } case SIMD_i16x8_lt_u: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_lt); + break; + } case SIMD_i16x8_gt_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_gt); + break; + } case SIMD_i16x8_gt_u: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_gt); + break; + } case SIMD_i16x8_le_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_le); + break; + } case SIMD_i16x8_le_u: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_le); + break; + } case SIMD_i16x8_ge_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_ge); + break; + } case SIMD_i16x8_ge_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_i16x8_ge); break; } /* i32x4 comparison operations */ case SIMD_i32x4_eq: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_eq); + break; + } case SIMD_i32x4_ne: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_ne); + break; + } case SIMD_i32x4_lt_s: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_lt); + break; + } case SIMD_i32x4_lt_u: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_lt); + break; + } case SIMD_i32x4_gt_s: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_gt); + break; + } case SIMD_i32x4_gt_u: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_gt); + break; + } case SIMD_i32x4_le_s: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_le); + break; + } case SIMD_i32x4_le_u: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_le); + break; + } case SIMD_i32x4_ge_s: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_ge); + break; + } case SIMD_i32x4_ge_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_i32x4_ge); break; } /* f32x4 comparison operations */ case SIMD_f32x4_eq: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_eq); + break; + } case SIMD_f32x4_ne: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_ne); + break; + } case SIMD_f32x4_lt: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_lt); + break; + } case SIMD_f32x4_gt: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_gt); + break; + } case SIMD_f32x4_le: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_le); + break; + } case SIMD_f32x4_ge: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_f32x4_ge); break; } /* f64x2 comparison operations */ case SIMD_f64x2_eq: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_eq); + break; + } case SIMD_f64x2_ne: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_ne); + break; + } case SIMD_f64x2_lt: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_lt); + break; + } case SIMD_f64x2_gt: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_gt); + break; + } case SIMD_f64x2_le: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_le); + break; + } case SIMD_f64x2_ge: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_f32x4_ge); break; } @@ -5948,10 +6300,23 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, v1.i64x2[1] ^ v2.i64x2[1]); break; } + // TODO: Test case SIMD_v128_bitselect: { - wasm_set_exception(module, "unsupported SIMD opcode"); - break; + V128 v1 = POP_V128(); + V128 v2 = POP_V128(); + V128 v3 = POP_V128(); + addr_ret = GET_OFFSET(); + + simde_v128_t simde_result = simde_wasm_v128_bitselect( + SIMD_V128_TO_SIMDE_V128(v1), + SIMD_V128_TO_SIMDE_V128(v2), + SIMD_V128_TO_SIMDE_V128(v3)); + + V128 result; + SIMDE_V128_TO_SIMD_V128(simde_result, result); + + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); } case SIMD_v128_any_true: { @@ -5962,6 +6327,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, break; } + // TODO: /* load lane operations */ case SIMD_v128_load8_lane: case SIMD_v128_load16_lane: @@ -5978,209 +6344,808 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, break; } +#define SIMD_SINGLE_OP(simde_func) \ + do { \ + V128 v1 = POP_V128(); \ + addr_ret = GET_OFFSET(); \ + \ + simde_v128_t simde_result = simde_func(SIMD_V128_TO_SIMDE_V128(v1)); \ + \ + V128 result; \ + SIMDE_V128_TO_SIMD_V128(simde_result, result); \ + \ + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); \ + } while (0) + /* Float conversion */ case SIMD_f32x4_demote_f64x2_zero: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_demote_f64x2_zero); + break; + } case SIMD_f64x2_promote_low_f32x4_zero: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_SINGLE_OP(simde_wasm_f64x2_promote_low_f32x4); break; } /* i8x16 operations */ case SIMD_i8x16_abs: + { + SIMD_SINGLE_OP(simde_wasm_i8x16_abs); + break; + } case SIMD_i8x16_neg: + { + SIMD_SINGLE_OP(simde_wasm_i8x16_neg); + break; + } case SIMD_i8x16_popcnt: + { + SIMD_SINGLE_OP(simde_wasm_i8x16_popcnt); + break; + } case SIMD_i8x16_all_true: { - V128 v = POP_V128(); - uint8_t *bytes = (uint8_t *)&v; - bool all_true = true; + V128 v1 = POP_V128(); - for (int i = 0; i < 16; i++) { - if (bytes[i] == 0) { - all_true = false; - break; - } - } + bool result = simde_wasm_i8x16_all_true( + SIMD_V128_TO_SIMDE_V128(v1)); - PUSH_I32(all_true ? 1 : 0); + addr_ret = GET_OFFSET(); + frame_lp[addr_ret] = result; break; } case SIMD_i8x16_bitmask: + { + V128 v1 = POP_V128(); + + uint32_t result = simde_wasm_i8x16_bitmask( + SIMD_V128_TO_SIMDE_V128(v1)); + + addr_ret = GET_OFFSET(); + frame_lp[addr_ret] = result; + break; + } case SIMD_i8x16_narrow_i16x8_s: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_narrow_i16x8); + break; + } case SIMD_i8x16_narrow_i16x8_u: - case SIMD_f32x4_ceil: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_narrow_i16x8); + break; + } + case SIMD_f32x4_ceil: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_ceil); + break; + } case SIMD_f32x4_floor: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_floor); + break; + } case SIMD_f32x4_trunc: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_trunc); + break; + } case SIMD_f32x4_nearest: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_nearest); + break; + } +// TODO: Check count? +#define SIMD_LANE_SHIFT(simde_func) \ + do { \ + int32 count = POP_I32(); \ + V128 v1 = POP_V128(); \ + addr_ret = GET_OFFSET(); \ + \ + simde_v128_t simde_result = \ + simde_func(SIMD_V128_TO_SIMDE_V128(v1), count); \ + \ + V128 result; \ + SIMDE_V128_TO_SIMD_V128(simde_result, result); \ + \ + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); \ + } while (0) case SIMD_i8x16_shl: + { + SIMD_LANE_SHIFT(simde_wasm_i8x16_shl); + break; + } case SIMD_i8x16_shr_s: + { + SIMD_LANE_SHIFT(simde_wasm_i8x16_shr); + break; + } case SIMD_i8x16_shr_u: + { + SIMD_LANE_SHIFT(simde_wasm_i8x16_shr); + break; + } case SIMD_i8x16_add: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_add); + break; + } case SIMD_i8x16_add_sat_s: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_add_sat); + break; + } case SIMD_i8x16_add_sat_u: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_add_sat); + break; + } case SIMD_i8x16_sub: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_sub); + break; + } case SIMD_i8x16_sub_sat_s: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_sub_sat); + break; + } case SIMD_i8x16_sub_sat_u: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_sub_sat); + break; + } case SIMD_f64x2_ceil: + { + SIMD_SINGLE_OP(simde_wasm_f64x2_ceil); + break; + } case SIMD_f64x2_floor: + { + SIMD_SINGLE_OP(simde_wasm_f64x2_floor); + break; + } case SIMD_i8x16_min_s: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_min); + break; + } case SIMD_i8x16_min_u: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_min); + break; + } case SIMD_i8x16_max_s: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_max); + break; + } case SIMD_i8x16_max_u: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_max); + break; + } case SIMD_f64x2_trunc: + { + SIMD_SINGLE_OP(simde_wasm_f64x2_trunc); + break; + } case SIMD_i8x16_avgr_u: + { + SIMD_DOUBLE_OP(simde_wasm_u8x16_avgr); + break; + } case SIMD_i16x8_extadd_pairwise_i8x16_s: + { + SIMD_SINGLE_OP(simde_wasm_i16x8_extadd_pairwise_i8x16); + break; + } case SIMD_i16x8_extadd_pairwise_i8x16_u: + { + SIMD_SINGLE_OP(simde_wasm_i16x8_extadd_pairwise_i8x16); + break; + } case SIMD_i32x4_extadd_pairwise_i16x8_s: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_extadd_pairwise_i16x8); + break; + } case SIMD_i32x4_extadd_pairwise_i16x8_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_SINGLE_OP(simde_wasm_i32x4_extadd_pairwise_i16x8); break; } /* i16x8 operations */ case SIMD_i16x8_abs: + { + SIMD_SINGLE_OP(simde_wasm_i16x8_abs); + break; + } case SIMD_i16x8_neg: + { + SIMD_SINGLE_OP(simde_wasm_i16x8_neg); + break; + } case SIMD_i16x8_q15mulr_sat_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_q15mulr_sat); + break; + } case SIMD_i16x8_all_true: + { + V128 v1 = POP_V128(); + + bool result = simde_wasm_i16x8_all_true( + SIMD_V128_TO_SIMDE_V128(v1)); + + addr_ret = GET_OFFSET(); + frame_lp[addr_ret] = result; + break; + } case SIMD_i16x8_bitmask: + { + V128 v1 = POP_V128(); + + uint32_t result = simde_wasm_i16x8_bitmask( + SIMD_V128_TO_SIMDE_V128(v1)); + + addr_ret = GET_OFFSET(); + frame_lp[addr_ret] = result; + break; + } case SIMD_i16x8_narrow_i32x4_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_narrow_i32x4); + break; + } case SIMD_i16x8_narrow_i32x4_u: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_narrow_i32x4); + break; + } case SIMD_i16x8_extend_low_i8x16_s: + { + SIMD_SINGLE_OP(simde_wasm_i16x8_extend_low_i8x16); + break; + } case SIMD_i16x8_extend_high_i8x16_s: + { + SIMD_SINGLE_OP(simde_wasm_i16x8_extend_high_i8x16); + break; + } case SIMD_i16x8_extend_low_i8x16_u: + { + SIMD_SINGLE_OP(simde_wasm_i16x8_extend_low_i8x16); + break; + } case SIMD_i16x8_extend_high_i8x16_u: + { + SIMD_SINGLE_OP(simde_wasm_i16x8_extend_high_i8x16); + break; + } case SIMD_i16x8_shl: + { + SIMD_LANE_SHIFT(simde_wasm_i16x8_shl); + break; + } case SIMD_i16x8_shr_s: + { + SIMD_LANE_SHIFT(simde_wasm_i16x8_shr); + break; + } case SIMD_i16x8_shr_u: + { + SIMD_LANE_SHIFT(simde_wasm_i16x8_shr); + break; + } case SIMD_i16x8_add: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_add); + break; + } case SIMD_i16x8_add_sat_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_add_sat); + break; + } case SIMD_i16x8_add_sat_u: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_add_sat); + break; + } case SIMD_i16x8_sub: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_sub); + break; + } case SIMD_i16x8_sub_sat_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_sub_sat); + break; + } case SIMD_i16x8_sub_sat_u: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_sub_sat); + break; + } case SIMD_f64x2_nearest: + { + SIMD_SINGLE_OP(simde_wasm_f64x2_nearest); + break; + } case SIMD_i16x8_mul: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_mul); + break; + } case SIMD_i16x8_min_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_min); + break; + } case SIMD_i16x8_min_u: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_min); + break; + } case SIMD_i16x8_max_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_max); + break; + } case SIMD_i16x8_max_u: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_max); + break; + } case SIMD_i16x8_avgr_u: + { + SIMD_DOUBLE_OP(simde_wasm_u16x8_avgr); + break; + } case SIMD_i16x8_extmul_low_i8x16_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_extmul_low_i8x16); + break; + } case SIMD_i16x8_extmul_high_i8x16_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_extmul_high_i8x16); + break; + } case SIMD_i16x8_extmul_low_i8x16_u: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_extmul_low_i8x16); + break; + } case SIMD_i16x8_extmul_high_i8x16_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_i16x8_extmul_high_i8x16); break; } /* i32x4 operations */ case SIMD_i32x4_abs: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_abs); + break; + } case SIMD_i32x4_neg: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_neg); + break; + } case SIMD_i32x4_all_true: + { + V128 v1 = POP_V128(); + + bool result = simde_wasm_i32x4_all_true( + SIMD_V128_TO_SIMDE_V128(v1)); + + addr_ret = GET_OFFSET(); + frame_lp[addr_ret] = result; + break; + } case SIMD_i32x4_bitmask: + { + V128 v1 = POP_V128(); + + uint32_t result = simde_wasm_i32x4_bitmask( + SIMD_V128_TO_SIMDE_V128(v1)); + + addr_ret = GET_OFFSET(); + frame_lp[addr_ret] = result; + break; + } case SIMD_i32x4_extend_low_i16x8_s: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_extend_low_i16x8); + break; + } case SIMD_i32x4_extend_high_i16x8_s: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_extend_high_i16x8); + break; + } case SIMD_i32x4_extend_low_i16x8_u: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_extend_low_i16x8); + break; + } case SIMD_i32x4_extend_high_i16x8_u: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_extend_high_i16x8); + break; + } case SIMD_i32x4_shl: + { + SIMD_LANE_SHIFT(simde_wasm_i32x4_shl); + break; + } case SIMD_i32x4_shr_s: + { + SIMD_LANE_SHIFT(simde_wasm_i32x4_shr); + break; + } case SIMD_i32x4_shr_u: + { + SIMD_LANE_SHIFT(simde_wasm_i32x4_shr); + break; + } case SIMD_i32x4_add: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_add); + break; + } case SIMD_i32x4_sub: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_sub); + break; + } case SIMD_i32x4_mul: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_mul); + break; + } case SIMD_i32x4_min_s: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_min); + break; + } case SIMD_i32x4_min_u: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_min); + break; + } case SIMD_i32x4_max_s: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_max); + break; + } case SIMD_i32x4_max_u: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_max); + break; + } case SIMD_i32x4_dot_i16x8_s: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_dot_i16x8); + break; + } case SIMD_i32x4_extmul_low_i16x8_s: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_extmul_low_i16x8); + break; + } case SIMD_i32x4_extmul_high_i16x8_s: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_extmul_high_i16x8); + break; + } case SIMD_i32x4_extmul_low_i16x8_u: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_extmul_low_i16x8); + break; + } case SIMD_i32x4_extmul_high_i16x8_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_i32x4_extmul_high_i16x8); break; } /* i64x2 operations */ case SIMD_i64x2_abs: + { + SIMD_SINGLE_OP(simde_wasm_i64x2_abs); + break; + } case SIMD_i64x2_neg: + { + SIMD_SINGLE_OP(simde_wasm_i64x2_neg); + break; + } case SIMD_i64x2_all_true: + { + V128 v1 = POP_V128(); + + bool result = simde_wasm_i64x2_all_true( + SIMD_V128_TO_SIMDE_V128(v1)); + + addr_ret = GET_OFFSET(); + frame_lp[addr_ret] = result; + break; + } case SIMD_i64x2_bitmask: + { + V128 v1 = POP_V128(); + + uint32_t result = simde_wasm_i64x2_bitmask( + SIMD_V128_TO_SIMDE_V128(v1)); + + addr_ret = GET_OFFSET(); + frame_lp[addr_ret] = result; + break; + } case SIMD_i64x2_extend_low_i32x4_s: + { + SIMD_SINGLE_OP(simde_wasm_i64x2_extend_low_i32x4); + break; + } case SIMD_i64x2_extend_high_i32x4_s: + { + SIMD_SINGLE_OP(simde_wasm_i64x2_extend_high_i32x4); + break; + } case SIMD_i64x2_extend_low_i32x4_u: + { + SIMD_SINGLE_OP(simde_wasm_i64x2_extend_low_i32x4); + break; + } case SIMD_i64x2_extend_high_i32x4_u: + { + SIMD_SINGLE_OP(simde_wasm_i64x2_extend_high_i32x4); + break; + } + + // TODO: Verify count works case SIMD_i64x2_shl: + { + SIMD_LANE_SHIFT(simde_wasm_i64x2_shl); + break; + } case SIMD_i64x2_shr_s: + { + SIMD_LANE_SHIFT(simde_wasm_i64x2_shr); + break; + } case SIMD_i64x2_shr_u: + { + SIMD_LANE_SHIFT(simde_wasm_i64x2_shr); + break; + } case SIMD_i64x2_add: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_add); + break; + } case SIMD_i64x2_sub: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_sub); + break; + } case SIMD_i64x2_mul: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_mul); + break; + } case SIMD_i64x2_eq: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_eq); + break; + } case SIMD_i64x2_ne: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_ne); + break; + } case SIMD_i64x2_lt_s: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_lt); + break; + } case SIMD_i64x2_gt_s: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_gt); + break; + } case SIMD_i64x2_le_s: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_le); + break; + } case SIMD_i64x2_ge_s: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_ge); + break; + } case SIMD_i64x2_extmul_low_i32x4_s: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_extmul_low_i32x4); + break; + } case SIMD_i64x2_extmul_high_i32x4_s: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_extmul_high_i32x4); + break; + } case SIMD_i64x2_extmul_low_i32x4_u: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_extmul_low_i32x4); + break; + } case SIMD_i64x2_extmul_high_i32x4_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_i64x2_extmul_high_i32x4); break; } /* f32x4 opertions */ case SIMD_f32x4_abs: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_abs); + break; + } case SIMD_f32x4_neg: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_neg); + break; + } case SIMD_f32x4_sqrt: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_sqrt); + break; + } case SIMD_f32x4_add: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_add); + break; + } case SIMD_f32x4_sub: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_sub); + break; + } case SIMD_f32x4_mul: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_mul); + break; + } case SIMD_f32x4_div: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_div); + break; + } case SIMD_f32x4_min: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_min); + break; + } case SIMD_f32x4_max: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_max); + break; + } case SIMD_f32x4_pmin: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_pmin); + break; + } case SIMD_f32x4_pmax: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_f32x4_pmax); break; } /* f64x2 operations */ case SIMD_f64x2_abs: + { + SIMD_SINGLE_OP(simde_wasm_f64x2_abs); + break; + } case SIMD_f64x2_neg: + { + SIMD_SINGLE_OP(simde_wasm_f64x2_neg); + break; + } case SIMD_f64x2_sqrt: + { + SIMD_SINGLE_OP(simde_wasm_f64x2_sqrt); + break; + } case SIMD_f64x2_add: + { + SIMD_DOUBLE_OP(simde_wasm_f64x2_add); + break; + } case SIMD_f64x2_sub: + { + SIMD_DOUBLE_OP(simde_wasm_f64x2_sub); + break; + } case SIMD_f64x2_mul: + { + SIMD_DOUBLE_OP(simde_wasm_f64x2_mul); + break; + } case SIMD_f64x2_div: + { + SIMD_DOUBLE_OP(simde_wasm_f64x2_div); + break; + } case SIMD_f64x2_min: + { + SIMD_DOUBLE_OP(simde_wasm_f64x2_min); + break; + } case SIMD_f64x2_max: + { + SIMD_DOUBLE_OP(simde_wasm_f64x2_max); + break; + } case SIMD_f64x2_pmin: + { + SIMD_DOUBLE_OP(simde_wasm_f64x2_pmin); + break; + } case SIMD_f64x2_pmax: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_f64x2_pmax); break; } /* Conversion operations */ case SIMD_i32x4_trunc_sat_f32x4_s: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_trunc_sat_f32x4); + break; + } case SIMD_i32x4_trunc_sat_f32x4_u: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_trunc_sat_f32x4); + break; + } case SIMD_f32x4_convert_i32x4_s: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_convert_i32x4); + break; + } case SIMD_f32x4_convert_i32x4_u: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_convert_i32x4); + break; + } case SIMD_i32x4_trunc_sat_f64x2_s_zero: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_trunc_sat_f64x2_zero); + break; + } case SIMD_i32x4_trunc_sat_f64x2_u_zero: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_trunc_sat_f64x2_zero); + break; + } case SIMD_f64x2_convert_low_i32x4_s: + { + SIMD_SINGLE_OP(simde_wasm_f64x2_convert_low_i32x4); + break; + } case SIMD_f64x2_convert_low_i32x4_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_SINGLE_OP(simde_wasm_f64x2_convert_low_i32x4); break; } @@ -6190,6 +7155,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, HANDLE_OP_END(); } #endif + HANDLE_OP(WASM_OP_CALL) { #if WASM_ENABLE_THREAD_MGR != 0 diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index 47995e03f6..ae6a92fa04 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -15080,6 +15080,10 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, read_leb_mem_offset(p, p_end, mem_offset); /* offset */ +#if WASM_ENABLE_FAST_INTERP != 0 + emit_uint32(loader_ctx, mem_offset); +#endif + POP_AND_PUSH(mem_offset_type, VALUE_TYPE_V128); #if WASM_ENABLE_JIT != 0 || WASM_ENABLE_WAMR_COMPILER != 0 func->has_memory_operations = true; @@ -15099,6 +15103,10 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, read_leb_mem_offset(p, p_end, mem_offset); /* offset */ +#if WASM_ENABLE_FAST_INTERP != 0 + emit_uint32(loader_ctx, mem_offset); +#endif + POP_V128(); POP_MEM_OFFSET(); #if WASM_ENABLE_JIT != 0 || WASM_ENABLE_WAMR_COMPILER != 0 @@ -15128,12 +15136,17 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, CHECK_BUF1(p, p_end, 16); mask = read_i8x16(p, error_buf, error_buf_size); - p += 16; if (!check_simd_shuffle_mask(mask, error_buf, error_buf_size)) { goto fail; } - +#if WASM_ENABLE_FAST_INTERP != 0 + uint64 high, low; + wasm_runtime_read_v128(p, &high, &low); + emit_uint64(loader_ctx, high); + emit_uint64(loader_ctx, low); +#endif + p += 16; POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); break; } @@ -15204,7 +15217,6 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, error_buf_size)) { goto fail; } - if (replace[opcode1 - SIMD_i8x16_extract_lane_s]) { if (!(wasm_loader_pop_frame_ref( loader_ctx, diff --git a/core/iwasm/interpreter/wasm_opcode.h b/core/iwasm/interpreter/wasm_opcode.h index 1424840e79..75d30c9b31 100644 --- a/core/iwasm/interpreter/wasm_opcode.h +++ b/core/iwasm/interpreter/wasm_opcode.h @@ -779,10 +779,10 @@ typedef enum WASMAtomicEXTOpcode { #else #define DEF_DEBUG_BREAK_HANDLE() #endif - #define SET_GOTO_TABLE_ELEM(opcode) [opcode] = HANDLE_OPCODE(opcode) -#if (WASM_ENABLE_JIT != 0 || WASM_ENABLE_FAST_INTERP != 0) \ +#if (WASM_ENABLE_JIT != 0 \ + || (WASM_ENABLE_FAST_INTERP != 0 && WASM_ENABLE_SIMDE != 0)) \ && WASM_ENABLE_SIMD != 0 #define SET_GOTO_TABLE_SIMD_PREFIX_ELEM() \ SET_GOTO_TABLE_ELEM(WASM_OP_SIMD_PREFIX), diff --git a/core/iwasm/libraries/simde/simde.cmake b/core/iwasm/libraries/simde/simde.cmake new file mode 100644 index 0000000000..b36e356945 --- /dev/null +++ b/core/iwasm/libraries/simde/simde.cmake @@ -0,0 +1,23 @@ +# Copyright (C) 2024 Amazon Inc. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# simde is a header only library + +set (LIB_SIMDE_DIR ${CMAKE_CURRENT_LIST_DIR}) + +if (WAMR_BUILD_TARGET MATCHES "AARCH64.*" OR "ARM.*") + add_definitions (-DWASM_ENABLE_SIMDE=1) +endif () + +include_directories(${LIB_SIMDE_DIR} ${LIB_SIMDE_DIR}/simde) + +include(FetchContent) + +FetchContent_Declare( + simde + GIT_REPOSITORY https://github.com/simd-everywhere/simde + GIT_TAG v0.8.2 +) + +message("-- Fetching simde ..") +FetchContent_MakeAvailable(simde) +include_directories("${simde_SOURCE_DIR}") From fbbcd08b3c55d720a8a7e028e0db27be3485718d Mon Sep 17 00:00:00 2001 From: Maks Litskevich Date: Wed, 13 Nov 2024 10:10:46 +0300 Subject: [PATCH 06/32] Emit imm for lane extract and replace (#3906) --- core/iwasm/interpreter/wasm_loader.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index ae6a92fa04..1c6c3f40ea 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -15225,7 +15225,9 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, error_buf, error_buf_size))) goto fail; } - +#if WASM_ENABLE_FAST_INTERP != 0 + emit_byte(loader_ctx, lane); +#endif POP_AND_PUSH( VALUE_TYPE_V128, push_type[opcode1 - SIMD_i8x16_extract_lane_s]); From c3601cc4bd655f8edcf3a46433ffec5d26ddb0d5 Mon Sep 17 00:00:00 2001 From: Maks Litskevich Date: Fri, 22 Nov 2024 14:39:33 +0300 Subject: [PATCH 07/32] Fix replacement value not being correct (#3919) --- core/iwasm/interpreter/wasm_loader.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index 1c6c3f40ea..c4d5dc0ac4 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -15217,17 +15217,27 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, error_buf_size)) { goto fail; } +#if WASM_ENABLE_FAST_INTERP != 0 + emit_byte(loader_ctx, lane); +#endif if (replace[opcode1 - SIMD_i8x16_extract_lane_s]) { +#if WASM_ENABLE_FAST_INTERP != 0 + if (!(wasm_loader_pop_frame_ref_offset( + loader_ctx, + replace[opcode1 + - SIMD_i8x16_extract_lane_s], + error_buf, error_buf_size))) + goto fail; +#else if (!(wasm_loader_pop_frame_ref( loader_ctx, replace[opcode1 - SIMD_i8x16_extract_lane_s], error_buf, error_buf_size))) goto fail; +#endif /* end of WASM_ENABLE_FAST_INTERP != 0 */ } -#if WASM_ENABLE_FAST_INTERP != 0 - emit_byte(loader_ctx, lane); -#endif + POP_AND_PUSH( VALUE_TYPE_V128, push_type[opcode1 - SIMD_i8x16_extract_lane_s]); From cfcb946d283c4802b56810844a1e196b256b2071 Mon Sep 17 00:00:00 2001 From: Maks Litskevich Date: Mon, 25 Nov 2024 08:31:41 +0000 Subject: [PATCH 08/32] Replace/extract opcodes for fast interp --- core/iwasm/interpreter/wasm_interp_fast.c | 72 +++++++++++++++++++++-- 1 file changed, 68 insertions(+), 4 deletions(-) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index 09823f08c8..faf3537dce 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -5993,25 +5993,89 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, SIMD_SPLAT_OP_F64(simde_wasm_f64x2_splat); break; } - - // TODO: - /* Lane */ +#define SIMD_EXTRACT_LANE_OP(register, return_type, push_elem) \ + do { \ + uint8 lane = *frame_ip++; \ + V128 v = POP_V128(); \ + push_elem((return_type)(v.register[lane])); \ + } while (0) +#define SIMD_REPLACE_LANE_OP(register, return_type, pop_elem) \ + do { \ + uint8 lane = *frame_ip++; \ + return_type replacement = pop_elem(); \ + V128 v = POP_V128(); \ + v.register[lane] = replacement; \ + addr_ret = GET_OFFSET(); \ + PUT_V128_TO_ADDR(frame_lp + addr_ret, v); \ + } while (0) case SIMD_i8x16_extract_lane_s: + { + SIMD_EXTRACT_LANE_OP(i8x16, int8, PUSH_I32); + break; + } case SIMD_i8x16_extract_lane_u: + { + SIMD_EXTRACT_LANE_OP(i8x16, uint8, PUSH_I32); + break; + } case SIMD_i8x16_replace_lane: + { + SIMD_REPLACE_LANE_OP(i8x16, int8, POP_I32); + break; + } case SIMD_i16x8_extract_lane_s: + { + SIMD_EXTRACT_LANE_OP(i16x8, int16, PUSH_I32); + break; + } case SIMD_i16x8_extract_lane_u: + { + SIMD_EXTRACT_LANE_OP(i16x8, uint16, PUSH_I32); + break; + } case SIMD_i16x8_replace_lane: + { + SIMD_REPLACE_LANE_OP(i16x8, int16, POP_I32); + break; + } case SIMD_i32x4_extract_lane: + { + SIMD_EXTRACT_LANE_OP(i32x4, int32, PUSH_I32); + break; + } case SIMD_i32x4_replace_lane: + { + SIMD_REPLACE_LANE_OP(i32x4, int32, POP_I32); + break; + } case SIMD_i64x2_extract_lane: + { + SIMD_EXTRACT_LANE_OP(i64x2, int64, PUSH_I64); + break; + } case SIMD_i64x2_replace_lane: + { + SIMD_REPLACE_LANE_OP(i64x2, int64, POP_I64); + break; + } case SIMD_f32x4_extract_lane: + { + SIMD_EXTRACT_LANE_OP(f32x4, float32, PUSH_F32); + break; + } case SIMD_f32x4_replace_lane: + { + SIMD_REPLACE_LANE_OP(f32x4, float32, POP_F32); + break; + } case SIMD_f64x2_extract_lane: + { + SIMD_EXTRACT_LANE_OP(f64x2, float64, PUSH_F64); + break; + } case SIMD_f64x2_replace_lane: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_REPLACE_LANE_OP(f64x2, float64, POP_F64); break; } From 138faba9545a0889585ce2be9f6c8a13ca39fdc1 Mon Sep 17 00:00:00 2001 From: Maks Litskevich Date: Tue, 3 Dec 2024 17:23:27 +0300 Subject: [PATCH 09/32] Implement load lanes opcodes for wasm (#3942) --- core/iwasm/interpreter/wasm_interp_fast.c | 38 +++++++++++++++++++++-- core/iwasm/interpreter/wasm_loader.c | 7 ++++- 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index faf3537dce..5744b7ab10 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -6391,12 +6391,46 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, break; } - // TODO: - /* load lane operations */ +#define SIMD_LOAD_LANE_OP(register, width) \ + do { \ + uint32 offset, addr; \ + offset = read_uint32(frame_ip); \ + V128 vec = POP_V128(); \ + int32 base = POP_I32(); \ + offset += base; \ + int lane = *frame_ip++; \ + addr = GET_OPERAND(uint32, I32, 0); \ + addr_ret = GET_OFFSET(); \ + CHECK_MEMORY_OVERFLOW(width / 8); \ + if (width == 64) { \ + vec.register[lane] = GET_I64_FROM_ADDR(maddr); \ + } \ + else { \ + vec.register[lane] = *(uint##width *)(maddr); \ + } \ + PUT_V128_TO_ADDR(frame_lp + addr_ret, vec); \ + } while (0) + case SIMD_v128_load8_lane: + { + SIMD_LOAD_LANE_OP(i8x16, 8); + break; + } case SIMD_v128_load16_lane: + { + SIMD_LOAD_LANE_OP(i16x8, 16); + break; + } case SIMD_v128_load32_lane: + { + SIMD_LOAD_LANE_OP(i32x4, 32); + break; + } case SIMD_v128_load64_lane: + { + SIMD_LOAD_LANE_OP(i64x2, 64); + break; + } case SIMD_v128_store8_lane: case SIMD_v128_store16_lane: case SIMD_v128_store32_lane: diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index 838ac55f8a..163f086141 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -15368,9 +15368,14 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, error_buf_size)) { goto fail; } - +#if WASM_ENABLE_FAST_INTERP != 0 + emit_uint32(loader_ctx, mem_offset); +#endif POP_V128(); POP_MEM_OFFSET(); +#if WASM_ENABLE_FAST_INTERP != 0 + emit_byte(loader_ctx, lane); +#endif if (opcode1 < SIMD_v128_store8_lane) { PUSH_V128(); } From 4cb9b1b196f5531b0b8e9f28f8b8c5bd8c6c85ff Mon Sep 17 00:00:00 2001 From: Maks Litskevich Date: Mon, 23 Dec 2024 06:42:41 +0000 Subject: [PATCH 10/32] Add zero load opcodes --- core/iwasm/interpreter/wasm_interp_fast.c | 40 ++++++++++++++++++----- core/iwasm/interpreter/wasm_loader.c | 4 ++- 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index 5744b7ab10..4e6dc00228 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -6391,14 +6391,8 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, break; } -#define SIMD_LOAD_LANE_OP(register, width) \ +#define SIMD_LOAD_LANE_COMMON(vec, register, lane, width) \ do { \ - uint32 offset, addr; \ - offset = read_uint32(frame_ip); \ - V128 vec = POP_V128(); \ - int32 base = POP_I32(); \ - offset += base; \ - int lane = *frame_ip++; \ addr = GET_OPERAND(uint32, I32, 0); \ addr_ret = GET_OFFSET(); \ CHECK_MEMORY_OVERFLOW(width / 8); \ @@ -6411,6 +6405,17 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, PUT_V128_TO_ADDR(frame_lp + addr_ret, vec); \ } while (0) +#define SIMD_LOAD_LANE_OP(register, width) \ + do { \ + uint32 offset, addr; \ + offset = read_uint32(frame_ip); \ + V128 vec = POP_V128(); \ + int32 base = POP_I32(); \ + offset += base; \ + int lane = *frame_ip++; \ + SIMD_LOAD_LANE_COMMON(vec, register, lane, width); \ + } while (0) + case SIMD_v128_load8_lane: { SIMD_LOAD_LANE_OP(i8x16, 8); @@ -6435,10 +6440,29 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, case SIMD_v128_store16_lane: case SIMD_v128_store32_lane: case SIMD_v128_store64_lane: + { + wasm_set_exception(module, "unsupported SIMD opcode"); + break; + } +#define SIMD_LOAD_ZERO_OP(register, width) \ + do { \ + uint32 offset, addr; \ + offset = read_uint32(frame_ip); \ + int32 base = POP_I32(); \ + offset += base; \ + int32 lane = 0; \ + V128 vec = { 0 }; \ + SIMD_LOAD_LANE_COMMON(vec, register, lane, width); \ + } while (0) + case SIMD_v128_load32_zero: + { + SIMD_LOAD_ZERO_OP(i32x4, 32); + break; + } case SIMD_v128_load64_zero: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_LOAD_ZERO_OP(i64x2, 64); break; } diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index 163f086141..a39dff89dc 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -15397,7 +15397,9 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, } read_leb_mem_offset(p, p_end, mem_offset); /* offset */ - +#if WASM_ENABLE_FAST_INTERP != 0 + emit_uint32(loader_ctx, mem_offset); +#endif POP_AND_PUSH(mem_offset_type, VALUE_TYPE_V128); #if WASM_ENABLE_JIT != 0 || WASM_ENABLE_WAMR_COMPILER != 0 func->has_memory_operations = true; From 441440ff8a13f547f298e492864da4979208fc0b Mon Sep 17 00:00:00 2001 From: jammar1 <108334558+jammar1@users.noreply.github.com> Date: Thu, 2 Jan 2025 23:30:30 +0000 Subject: [PATCH 11/32] Implement final SIMD opcodes: store lane (#4001) Co-authored-by: James Marsh --- core/iwasm/interpreter/wasm_interp_fast.c | 36 ++++++++++++++++++++++- core/iwasm/interpreter/wasm_loader.c | 4 +++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index 4e6dc00228..506848fad5 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -6436,12 +6436,46 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, SIMD_LOAD_LANE_OP(i64x2, 64); break; } +#define SIMD_STORE_LANE_OP(register, width) \ + do { \ + uint32 offset, addr; \ + offset = read_uint32(frame_ip); \ + V128 vec = POP_V128(); \ + int32 base = POP_I32(); \ + offset += base; \ + int lane = *frame_ip++; \ + addr = GET_OPERAND(uint32, I32, 0); \ + addr_ret = GET_OFFSET(); \ + CHECK_MEMORY_OVERFLOW(width / 8); \ + if (width == 64) { \ + STORE_I64(maddr, vec.register[lane]); \ + } \ + else { \ + *(uint##width *)(maddr) = vec.register[lane]; \ + } \ + } while (0) + case SIMD_v128_store8_lane: + { + SIMD_STORE_LANE_OP(i8x16, 8); + break; + } + case SIMD_v128_store16_lane: + { + SIMD_STORE_LANE_OP(i16x8, 16); + break; + } + case SIMD_v128_store32_lane: + { + SIMD_STORE_LANE_OP(i32x4, 32); + break; + } + case SIMD_v128_store64_lane: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_STORE_LANE_OP(i64x2, 64); break; } #define SIMD_LOAD_ZERO_OP(register, width) \ diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index a39dff89dc..10a5573a78 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -15362,6 +15362,10 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, read_leb_mem_offset(p, p_end, mem_offset); /* offset */ +#if WASM_ENABLE_FAST_INTERP != 0 + emit_uint32(loader_ctx, mem_offset); +#endif + CHECK_BUF(p, p_end, 1); lane = read_uint8(p); if (!check_simd_access_lane(opcode1, lane, error_buf, From 07fd987ad80762f8d468ca4b2af4c1de7c33a941 Mon Sep 17 00:00:00 2001 From: Maks Litskevich Date: Mon, 6 Jan 2025 14:24:51 +0000 Subject: [PATCH 12/32] Fix boolean tests --- core/iwasm/interpreter/wasm_interp_fast.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index 506848fad5..f66a4d6987 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -6364,7 +6364,6 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, v1.i64x2[1] ^ v2.i64x2[1]); break; } - // TODO: Test case SIMD_v128_bitselect: { V128 v1 = POP_V128(); @@ -6373,14 +6372,15 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, addr_ret = GET_OFFSET(); simde_v128_t simde_result = simde_wasm_v128_bitselect( - SIMD_V128_TO_SIMDE_V128(v1), + SIMD_V128_TO_SIMDE_V128(v3), SIMD_V128_TO_SIMDE_V128(v2), - SIMD_V128_TO_SIMDE_V128(v3)); + SIMD_V128_TO_SIMDE_V128(v1)); V128 result; SIMDE_V128_TO_SIMD_V128(simde_result, result); PUT_V128_TO_ADDR(frame_lp + addr_ret, result); + break; } case SIMD_v128_any_true: { From 8ce74b75bb577b2a0f835734aebcd36d0760b259 Mon Sep 17 00:00:00 2001 From: Maks Litskevich Date: Sat, 1 Feb 2025 04:14:35 +0000 Subject: [PATCH 13/32] Fix load/store (#4054) Fix v128 load/store --- core/iwasm/interpreter/wasm_interp_fast.c | 22 +++++++--------------- core/iwasm/interpreter/wasm_loader.c | 4 ---- 2 files changed, 7 insertions(+), 19 deletions(-) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index f66a4d6987..298d39668b 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -5766,10 +5766,8 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, case SIMD_v128_load: { uint32 offset, addr; - offset = read_uint32( - frame_ip); // TODO: Check with an offset! - addr = GET_OPERAND(uint32, I32, 0); - frame_ip += 2; + offset = read_uint32(frame_ip); + addr = POP_I32(); addr_ret = GET_OFFSET(); CHECK_MEMORY_OVERFLOW(16); PUT_V128_TO_ADDR(frame_lp + addr_ret, LOAD_V128(maddr)); @@ -5879,8 +5877,8 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, { uint32 offset, addr; offset = read_uint32(frame_ip); - frame_ip += 2; - addr = GET_OPERAND(uint32, I32, 0); + V128 data = POP_V128(); + addr = POP_I32(); V128 data; data = POP_V128(); @@ -6393,7 +6391,6 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, #define SIMD_LOAD_LANE_COMMON(vec, register, lane, width) \ do { \ - addr = GET_OPERAND(uint32, I32, 0); \ addr_ret = GET_OFFSET(); \ CHECK_MEMORY_OVERFLOW(width / 8); \ if (width == 64) { \ @@ -6410,8 +6407,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, uint32 offset, addr; \ offset = read_uint32(frame_ip); \ V128 vec = POP_V128(); \ - int32 base = POP_I32(); \ - offset += base; \ + addr = POP_I32(); \ int lane = *frame_ip++; \ SIMD_LOAD_LANE_COMMON(vec, register, lane, width); \ } while (0) @@ -6441,11 +6437,8 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, uint32 offset, addr; \ offset = read_uint32(frame_ip); \ V128 vec = POP_V128(); \ - int32 base = POP_I32(); \ - offset += base; \ + addr = POP_I32(); \ int lane = *frame_ip++; \ - addr = GET_OPERAND(uint32, I32, 0); \ - addr_ret = GET_OFFSET(); \ CHECK_MEMORY_OVERFLOW(width / 8); \ if (width == 64) { \ STORE_I64(maddr, vec.register[lane]); \ @@ -6482,8 +6475,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, do { \ uint32 offset, addr; \ offset = read_uint32(frame_ip); \ - int32 base = POP_I32(); \ - offset += base; \ + addr = POP_I32(); \ int32 lane = 0; \ V128 vec = { 0 }; \ SIMD_LOAD_LANE_COMMON(vec, register, lane, width); \ diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index 10a5573a78..a39dff89dc 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -15362,10 +15362,6 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, read_leb_mem_offset(p, p_end, mem_offset); /* offset */ -#if WASM_ENABLE_FAST_INTERP != 0 - emit_uint32(loader_ctx, mem_offset); -#endif - CHECK_BUF(p, p_end, 1); lane = read_uint8(p); if (!check_simd_access_lane(opcode1, lane, error_buf, From 50faad05019aa59526ca2d1455cfb93ef6c6607c Mon Sep 17 00:00:00 2001 From: Maks Litskevich Date: Sat, 1 Feb 2025 04:15:51 +0000 Subject: [PATCH 14/32] Correctly use unsigned functions (#4055) - Correctly select unsigned functions --------- Co-authored-by: James Marsh Co-authored-by: Ubuntu --- core/iwasm/interpreter/wasm_interp_fast.c | 104 +++++++++++----------- 1 file changed, 52 insertions(+), 52 deletions(-) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index 298d39668b..cd9dd91eb8 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -6110,7 +6110,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i8x16_lt_u: { - SIMD_DOUBLE_OP(simde_wasm_i8x16_lt); + SIMD_DOUBLE_OP(simde_wasm_u8x16_lt); break; } case SIMD_i8x16_gt_s: @@ -6120,7 +6120,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i8x16_gt_u: { - SIMD_DOUBLE_OP(simde_wasm_i8x16_gt); + SIMD_DOUBLE_OP(simde_wasm_u8x16_gt); break; } case SIMD_i8x16_le_s: @@ -6130,7 +6130,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i8x16_le_u: { - SIMD_DOUBLE_OP(simde_wasm_i8x16_le); + SIMD_DOUBLE_OP(simde_wasm_u8x16_le); break; } case SIMD_i8x16_ge_s: @@ -6140,7 +6140,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i8x16_ge_u: { - SIMD_DOUBLE_OP(simde_wasm_i8x16_ge); + SIMD_DOUBLE_OP(simde_wasm_u8x16_ge); break; } @@ -6162,7 +6162,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i16x8_lt_u: { - SIMD_DOUBLE_OP(simde_wasm_i16x8_lt); + SIMD_DOUBLE_OP(simde_wasm_u16x8_lt); break; } case SIMD_i16x8_gt_s: @@ -6172,7 +6172,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i16x8_gt_u: { - SIMD_DOUBLE_OP(simde_wasm_i16x8_gt); + SIMD_DOUBLE_OP(simde_wasm_u16x8_gt); break; } case SIMD_i16x8_le_s: @@ -6182,7 +6182,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i16x8_le_u: { - SIMD_DOUBLE_OP(simde_wasm_i16x8_le); + SIMD_DOUBLE_OP(simde_wasm_u16x8_le); break; } case SIMD_i16x8_ge_s: @@ -6192,7 +6192,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i16x8_ge_u: { - SIMD_DOUBLE_OP(simde_wasm_i16x8_ge); + SIMD_DOUBLE_OP(simde_wasm_u16x8_ge); break; } @@ -6214,7 +6214,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i32x4_lt_u: { - SIMD_DOUBLE_OP(simde_wasm_i32x4_lt); + SIMD_DOUBLE_OP(simde_wasm_u32x4_lt); break; } case SIMD_i32x4_gt_s: @@ -6224,7 +6224,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i32x4_gt_u: { - SIMD_DOUBLE_OP(simde_wasm_i32x4_gt); + SIMD_DOUBLE_OP(simde_wasm_u32x4_gt); break; } case SIMD_i32x4_le_s: @@ -6234,7 +6234,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i32x4_le_u: { - SIMD_DOUBLE_OP(simde_wasm_i32x4_le); + SIMD_DOUBLE_OP(simde_wasm_u32x4_le); break; } case SIMD_i32x4_ge_s: @@ -6244,7 +6244,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i32x4_ge_u: { - SIMD_DOUBLE_OP(simde_wasm_i32x4_ge); + SIMD_DOUBLE_OP(simde_wasm_u32x4_ge); break; } @@ -6283,32 +6283,32 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, /* f64x2 comparison operations */ case SIMD_f64x2_eq: { - SIMD_DOUBLE_OP(simde_wasm_f32x4_eq); + SIMD_DOUBLE_OP(simde_wasm_f64x2_eq); break; } case SIMD_f64x2_ne: { - SIMD_DOUBLE_OP(simde_wasm_f32x4_ne); + SIMD_DOUBLE_OP(simde_wasm_f64x2_ne); break; } case SIMD_f64x2_lt: { - SIMD_DOUBLE_OP(simde_wasm_f32x4_lt); + SIMD_DOUBLE_OP(simde_wasm_f64x2_lt); break; } case SIMD_f64x2_gt: { - SIMD_DOUBLE_OP(simde_wasm_f32x4_gt); + SIMD_DOUBLE_OP(simde_wasm_f64x2_gt); break; } case SIMD_f64x2_le: { - SIMD_DOUBLE_OP(simde_wasm_f32x4_le); + SIMD_DOUBLE_OP(simde_wasm_f64x2_le); break; } case SIMD_f64x2_ge: { - SIMD_DOUBLE_OP(simde_wasm_f32x4_ge); + SIMD_DOUBLE_OP(simde_wasm_f64x2_ge); break; } @@ -6563,7 +6563,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i8x16_narrow_i16x8_u: { - SIMD_DOUBLE_OP(simde_wasm_i8x16_narrow_i16x8); + SIMD_DOUBLE_OP(simde_wasm_u8x16_narrow_i16x8); break; } case SIMD_f32x4_ceil: @@ -6613,7 +6613,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i8x16_shr_u: { - SIMD_LANE_SHIFT(simde_wasm_i8x16_shr); + SIMD_LANE_SHIFT(simde_wasm_u8x16_shr); break; } case SIMD_i8x16_add: @@ -6628,7 +6628,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i8x16_add_sat_u: { - SIMD_DOUBLE_OP(simde_wasm_i8x16_add_sat); + SIMD_DOUBLE_OP(simde_wasm_u8x16_add_sat); break; } case SIMD_i8x16_sub: @@ -6643,7 +6643,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i8x16_sub_sat_u: { - SIMD_DOUBLE_OP(simde_wasm_i8x16_sub_sat); + SIMD_DOUBLE_OP(simde_wasm_u8x16_sub_sat); break; } case SIMD_f64x2_ceil: @@ -6663,7 +6663,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i8x16_min_u: { - SIMD_DOUBLE_OP(simde_wasm_i8x16_min); + SIMD_DOUBLE_OP(simde_wasm_u8x16_min); break; } case SIMD_i8x16_max_s: @@ -6673,7 +6673,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i8x16_max_u: { - SIMD_DOUBLE_OP(simde_wasm_i8x16_max); + SIMD_DOUBLE_OP(simde_wasm_u8x16_max); break; } case SIMD_f64x2_trunc: @@ -6693,7 +6693,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i16x8_extadd_pairwise_i8x16_u: { - SIMD_SINGLE_OP(simde_wasm_i16x8_extadd_pairwise_i8x16); + SIMD_SINGLE_OP(simde_wasm_u16x8_extadd_pairwise_u8x16); break; } case SIMD_i32x4_extadd_pairwise_i16x8_s: @@ -6703,7 +6703,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i32x4_extadd_pairwise_i16x8_u: { - SIMD_SINGLE_OP(simde_wasm_i32x4_extadd_pairwise_i16x8); + SIMD_SINGLE_OP(simde_wasm_u32x4_extadd_pairwise_u16x8); break; } @@ -6752,7 +6752,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i16x8_narrow_i32x4_u: { - SIMD_DOUBLE_OP(simde_wasm_i16x8_narrow_i32x4); + SIMD_DOUBLE_OP(simde_wasm_u16x8_narrow_i32x4); break; } case SIMD_i16x8_extend_low_i8x16_s: @@ -6767,12 +6767,12 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i16x8_extend_low_i8x16_u: { - SIMD_SINGLE_OP(simde_wasm_i16x8_extend_low_i8x16); + SIMD_SINGLE_OP(simde_wasm_u16x8_extend_low_u8x16); break; } case SIMD_i16x8_extend_high_i8x16_u: { - SIMD_SINGLE_OP(simde_wasm_i16x8_extend_high_i8x16); + SIMD_SINGLE_OP(simde_wasm_u16x8_extend_high_u8x16); break; } case SIMD_i16x8_shl: @@ -6787,7 +6787,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i16x8_shr_u: { - SIMD_LANE_SHIFT(simde_wasm_i16x8_shr); + SIMD_LANE_SHIFT(simde_wasm_u16x8_shr); break; } case SIMD_i16x8_add: @@ -6802,7 +6802,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i16x8_add_sat_u: { - SIMD_DOUBLE_OP(simde_wasm_i16x8_add_sat); + SIMD_DOUBLE_OP(simde_wasm_u16x8_add_sat); break; } case SIMD_i16x8_sub: @@ -6817,7 +6817,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i16x8_sub_sat_u: { - SIMD_DOUBLE_OP(simde_wasm_i16x8_sub_sat); + SIMD_DOUBLE_OP(simde_wasm_u16x8_sub_sat); break; } case SIMD_f64x2_nearest: @@ -6837,7 +6837,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i16x8_min_u: { - SIMD_DOUBLE_OP(simde_wasm_i16x8_min); + SIMD_DOUBLE_OP(simde_wasm_u16x8_min); break; } case SIMD_i16x8_max_s: @@ -6847,7 +6847,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i16x8_max_u: { - SIMD_DOUBLE_OP(simde_wasm_i16x8_max); + SIMD_DOUBLE_OP(simde_wasm_u16x8_max); break; } case SIMD_i16x8_avgr_u: @@ -6867,12 +6867,12 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i16x8_extmul_low_i8x16_u: { - SIMD_DOUBLE_OP(simde_wasm_i16x8_extmul_low_i8x16); + SIMD_DOUBLE_OP(simde_wasm_u16x8_extmul_low_u8x16); break; } case SIMD_i16x8_extmul_high_i8x16_u: { - SIMD_DOUBLE_OP(simde_wasm_i16x8_extmul_high_i8x16); + SIMD_DOUBLE_OP(simde_wasm_u16x8_extmul_high_u8x16); break; } @@ -6921,12 +6921,12 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i32x4_extend_low_i16x8_u: { - SIMD_SINGLE_OP(simde_wasm_i32x4_extend_low_i16x8); + SIMD_SINGLE_OP(simde_wasm_u32x4_extend_low_u16x8); break; } case SIMD_i32x4_extend_high_i16x8_u: { - SIMD_SINGLE_OP(simde_wasm_i32x4_extend_high_i16x8); + SIMD_SINGLE_OP(simde_wasm_u32x4_extend_high_u16x8); break; } case SIMD_i32x4_shl: @@ -6941,7 +6941,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i32x4_shr_u: { - SIMD_LANE_SHIFT(simde_wasm_i32x4_shr); + SIMD_LANE_SHIFT(simde_wasm_u32x4_shr); break; } case SIMD_i32x4_add: @@ -6966,7 +6966,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i32x4_min_u: { - SIMD_DOUBLE_OP(simde_wasm_i32x4_min); + SIMD_DOUBLE_OP(simde_wasm_u32x4_min); break; } case SIMD_i32x4_max_s: @@ -6976,7 +6976,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i32x4_max_u: { - SIMD_DOUBLE_OP(simde_wasm_i32x4_max); + SIMD_DOUBLE_OP(simde_wasm_u32x4_max); break; } case SIMD_i32x4_dot_i16x8_s: @@ -6996,12 +6996,12 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i32x4_extmul_low_i16x8_u: { - SIMD_DOUBLE_OP(simde_wasm_i32x4_extmul_low_i16x8); + SIMD_DOUBLE_OP(simde_wasm_u32x4_extmul_low_u16x8); break; } case SIMD_i32x4_extmul_high_i16x8_u: { - SIMD_DOUBLE_OP(simde_wasm_i32x4_extmul_high_i16x8); + SIMD_DOUBLE_OP(simde_wasm_u32x4_extmul_high_u16x8); break; } @@ -7050,12 +7050,12 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i64x2_extend_low_i32x4_u: { - SIMD_SINGLE_OP(simde_wasm_i64x2_extend_low_i32x4); + SIMD_SINGLE_OP(simde_wasm_u64x2_extend_low_u32x4); break; } case SIMD_i64x2_extend_high_i32x4_u: { - SIMD_SINGLE_OP(simde_wasm_i64x2_extend_high_i32x4); + SIMD_SINGLE_OP(simde_wasm_u64x2_extend_high_u32x4); break; } @@ -7072,7 +7072,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i64x2_shr_u: { - SIMD_LANE_SHIFT(simde_wasm_i64x2_shr); + SIMD_LANE_SHIFT(simde_wasm_u64x2_shr); break; } case SIMD_i64x2_add: @@ -7132,12 +7132,12 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i64x2_extmul_low_i32x4_u: { - SIMD_DOUBLE_OP(simde_wasm_i64x2_extmul_low_i32x4); + SIMD_DOUBLE_OP(simde_wasm_u64x2_extmul_low_u32x4); break; } case SIMD_i64x2_extmul_high_i32x4_u: { - SIMD_DOUBLE_OP(simde_wasm_i64x2_extmul_high_i32x4); + SIMD_DOUBLE_OP(simde_wasm_u64x2_extmul_high_u32x4); break; } @@ -7263,7 +7263,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i32x4_trunc_sat_f32x4_u: { - SIMD_SINGLE_OP(simde_wasm_i32x4_trunc_sat_f32x4); + SIMD_SINGLE_OP(simde_wasm_u32x4_trunc_sat_f32x4); break; } case SIMD_f32x4_convert_i32x4_s: @@ -7273,7 +7273,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_f32x4_convert_i32x4_u: { - SIMD_SINGLE_OP(simde_wasm_f32x4_convert_i32x4); + SIMD_SINGLE_OP(simde_wasm_f32x4_convert_u32x4); break; } case SIMD_i32x4_trunc_sat_f64x2_s_zero: @@ -7283,7 +7283,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i32x4_trunc_sat_f64x2_u_zero: { - SIMD_SINGLE_OP(simde_wasm_i32x4_trunc_sat_f64x2_zero); + SIMD_SINGLE_OP(simde_wasm_u32x4_trunc_sat_f64x2_zero); break; } case SIMD_f64x2_convert_low_i32x4_s: @@ -7293,7 +7293,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_f64x2_convert_low_i32x4_u: { - SIMD_SINGLE_OP(simde_wasm_f64x2_convert_low_i32x4); + SIMD_SINGLE_OP(simde_wasm_f64x2_convert_low_u32x4); break; } From 474acd72e353a0a2f9196114f808609266fcea50 Mon Sep 17 00:00:00 2001 From: Maks Litskevich Date: Fri, 3 Jan 2025 14:24:36 +0000 Subject: [PATCH 15/32] implement local and function calls for v128 in the fast interpreter --- core/iwasm/interpreter/wasm_interp_fast.c | 58 +++++++++++++++++++++-- core/iwasm/interpreter/wasm_loader.c | 13 ++++- core/iwasm/interpreter/wasm_opcode.h | 19 +++++++- 3 files changed, 85 insertions(+), 5 deletions(-) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index cd9dd91eb8..7015635056 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -1699,6 +1699,11 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, GET_OPERAND(uint64, I64, off)); ret_offset += 2; } + else if (ret_types[ret_idx] == VALUE_TYPE_V128) { + PUT_V128_TO_ADDR(prev_frame->lp + ret_offset, + GET_OPERAND_V128(off)); + ret_offset += 4; + } #if WASM_ENABLE_GC != 0 else if (wasm_is_type_reftype(ret_types[ret_idx])) { PUT_REF_TO_ADDR(prev_frame->lp + ret_offset, @@ -3536,6 +3541,24 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, HANDLE_OP_END(); } +#if WASM_ENABLE_SIMDE != 0 + HANDLE_OP(EXT_OP_SET_LOCAL_FAST_V128) + HANDLE_OP(EXT_OP_TEE_LOCAL_FAST_V128) + { + /* clang-format off */ +#if WASM_CPU_SUPPORTS_UNALIGNED_ADDR_ACCESS != 0 + local_offset = *frame_ip++; +#else + local_offset = *frame_ip; + frame_ip += 2; +#endif + /* clang-format on */ + PUT_V128_TO_ADDR((uint32 *)(frame_lp + local_offset), + GET_OPERAND_V128(0)); + frame_ip += 2; + HANDLE_OP_END(); + } +#endif HANDLE_OP(WASM_OP_GET_GLOBAL) { global_idx = read_uint32(frame_ip); @@ -4884,6 +4907,28 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, HANDLE_OP_END(); } +#if WASM_ENABLE_SIMDE != 0 + HANDLE_OP(EXT_OP_COPY_STACK_TOP_V128) + { + addr1 = GET_OFFSET(); + addr2 = GET_OFFSET(); + + PUT_V128_TO_ADDR(frame_lp + addr2, + GET_V128_FROM_ADDR(frame_lp + addr1)); + +#if WASM_ENABLE_GC != 0 + /* Ignore constants because they are not reference */ + if (addr1 >= 0) { + if (*FRAME_REF(addr1)) { + CLEAR_FRAME_REF(addr1); + SET_FRAME_REF(addr2); + } + } +#endif + + HANDLE_OP_END(); + } +#endif HANDLE_OP(EXT_OP_COPY_STACK_VALUES) { @@ -6079,8 +6124,8 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, #define SIMD_DOUBLE_OP(simde_func) \ do { \ - V128 v1 = POP_V128(); \ V128 v2 = POP_V128(); \ + V128 v1 = POP_V128(); \ addr_ret = GET_OFFSET(); \ \ simde_v128_t simde_result = simde_func(SIMD_V128_TO_SIMDE_V128(v1), \ @@ -6946,6 +6991,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i32x4_add: { + SIMD_DOUBLE_OP(simde_wasm_i32x4_add); break; } @@ -7480,8 +7526,14 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } for (i = 0; i < cur_func->param_count; i++) { - if (cur_func->param_types[i] == VALUE_TYPE_I64 - || cur_func->param_types[i] == VALUE_TYPE_F64) { + if (cur_func->param_types[i] == VALUE_TYPE_V128) { + PUT_V128_TO_ADDR( + outs_area->lp, + GET_OPERAND_V128(2 * (cur_func->param_count - i - 1))); + outs_area->lp += 4; + } + else if (cur_func->param_types[i] == VALUE_TYPE_I64 + || cur_func->param_types[i] == VALUE_TYPE_F64) { PUT_I64_TO_ADDR( outs_area->lp, GET_OPERAND(uint64, I64, diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index a39dff89dc..d7bd34fde7 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -12944,10 +12944,21 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, emit_label(EXT_OP_SET_LOCAL_FAST); emit_byte(loader_ctx, (uint8)local_offset); } - else { + else if (is_64bit_type(local_type)) { emit_label(EXT_OP_SET_LOCAL_FAST_I64); emit_byte(loader_ctx, (uint8)local_offset); } +#if WASM_ENABLE_SIMDE != 0 + else if (local_type == VALUE_TYPE_V128) { + emit_label(EXT_OP_SET_LOCAL_FAST_V128); + emit_byte(loader_ctx, (uint8)local_offset); + } +#endif + else { + set_error_buf(error_buf, error_buf_size, + "unknown local type"); + goto fail; + } POP_OFFSET_TYPE(local_type); } } diff --git a/core/iwasm/interpreter/wasm_opcode.h b/core/iwasm/interpreter/wasm_opcode.h index 75d30c9b31..c3c5e00f80 100644 --- a/core/iwasm/interpreter/wasm_opcode.h +++ b/core/iwasm/interpreter/wasm_opcode.h @@ -278,6 +278,14 @@ typedef enum WASMOpcode { DEBUG_OP_BREAK = 0xdc, /* debug break point */ #endif +#if (WASM_ENABLE_JIT != 0 \ + || (WASM_ENABLE_FAST_INTERP != 0 && WASM_ENABLE_SIMDE != 0)) \ + && WASM_ENABLE_SIMD != 0 + EXT_OP_SET_LOCAL_FAST_V128 = 0xdd, + EXT_OP_TEE_LOCAL_FAST_V128 = 0xde, + EXT_OP_COPY_STACK_TOP_V128 = 0xdf, +#endif + /* Post-MVP extend op prefix */ WASM_OP_GC_PREFIX = 0xfb, WASM_OP_MISC_PREFIX = 0xfc, @@ -790,6 +798,15 @@ typedef enum WASMAtomicEXTOpcode { #define SET_GOTO_TABLE_SIMD_PREFIX_ELEM() #endif +#if (WASM_ENABLE_FAST_INTERP != 0 && WASM_ENABLE_SIMDE != 0) \ + && WASM_ENABLE_SIMD != 0 +#define DEF_EXT_V128_HANDLE() \ + SET_GOTO_TABLE_ELEM(EXT_OP_SET_LOCAL_FAST_V128), \ + SET_GOTO_TABLE_ELEM(EXT_OP_TEE_LOCAL_FAST_V128), \ + SET_GOTO_TABLE_ELEM(EXT_OP_COPY_STACK_TOP_V128), +#else +#define DEF_EXT_V128_HANDLE() +#endif /* * Macro used to generate computed goto tables for the C interpreter. */ @@ -1021,7 +1038,7 @@ typedef enum WASMAtomicEXTOpcode { SET_GOTO_TABLE_ELEM(WASM_OP_MISC_PREFIX), /* 0xfc */ \ SET_GOTO_TABLE_SIMD_PREFIX_ELEM() /* 0xfd */ \ SET_GOTO_TABLE_ELEM(WASM_OP_ATOMIC_PREFIX), /* 0xfe */ \ - DEF_DEBUG_BREAK_HANDLE() \ + DEF_DEBUG_BREAK_HANDLE() DEF_EXT_V128_HANDLE() \ }; #ifdef __cplusplus From b2804c004f1db144e17f7338ec99cb4d82598645 Mon Sep 17 00:00:00 2001 From: James Marsh Date: Tue, 21 Jan 2025 18:23:58 +0000 Subject: [PATCH 16/32] Fix splat opcodes, add V128 handling in preserve_referenced_local and reserve_block_ret --- core/iwasm/interpreter/wasm_interp_fast.c | 45 +++++--- core/iwasm/interpreter/wasm_loader.c | 127 ++++++++++++++++++---- 2 files changed, 136 insertions(+), 36 deletions(-) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index 7015635056..9a026428f3 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -47,7 +47,7 @@ typedef float64 CellType_F64; && (app_addr) <= shared_heap_end_off - bytes + 1) #define shared_heap_addr_app_to_native(app_addr, native_addr) \ - native_addr = shared_heap_base_addr + ((app_addr)-shared_heap_start_off) + native_addr = shared_heap_base_addr + ((app_addr) - shared_heap_start_off) #define CHECK_SHARED_HEAP_OVERFLOW(app_addr, bytes, native_addr) \ if (app_addr_in_shared_heap(app_addr, bytes)) \ @@ -1793,7 +1793,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, else cur_func_type = cur_func->u.func->func_type; - /* clang-format off */ + /* clang-format off */ #if WASM_ENABLE_GC == 0 if (cur_type != cur_func_type) { wasm_set_exception(module, "indirect call type mismatch"); @@ -5923,12 +5923,11 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, uint32 offset, addr; offset = read_uint32(frame_ip); V128 data = POP_V128(); - addr = POP_I32(); + int32 base = POP_I32(); + offset += base; + addr = GET_OPERAND(uint32, I32, 0); - V128 data; - data = POP_V128(); - - CHECK_MEMORY_OVERFLOW(16); + CHECK_MEMORY_OVERFLOW(32); STORE_V128(maddr, data); break; } @@ -5948,14 +5947,14 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, case SIMD_v8x16_shuffle: { V128 indices; - V128 v2 = POP_V128(); - V128 v1 = POP_V128(); - addr_ret = GET_OFFSET(); - bh_memcpy_s(&indices, sizeof(V128), frame_ip, sizeof(V128)); frame_ip += sizeof(V128); + V128 v2 = POP_V128(); + V128 v1 = POP_V128(); + addr_ret = GET_OFFSET(); + V128 result; for (int i = 0; i < 16; i++) { uint8_t index = indices.i8x16[i]; @@ -5983,6 +5982,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, SIMDE_V128_TO_SIMD_V128(simde_result, result); PUT_V128_TO_ADDR(frame_lp + addr_ret, result); + break; } /* Splat */ @@ -6008,7 +6008,15 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, case SIMD_i8x16_splat: { - SIMD_SPLAT_OP_I32(simde_wasm_i8x16_splat); + uint32 val = POP_I32(); + addr_ret = GET_OFFSET(); + + simde_v128_t simde_result = simde_wasm_i8x16_splat(val); + + V128 result; + SIMDE_V128_TO_SIMD_V128(simde_result, result); + + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); break; } case SIMD_i16x8_splat: @@ -6140,7 +6148,18 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, /* i8x16 comparison operations */ case SIMD_i8x16_eq: { - SIMD_DOUBLE_OP(simde_wasm_i8x16_eq); + V128 v2 = POP_V128(); + V128 v1 = POP_V128(); + addr_ret = GET_OFFSET(); + + simde_v128_t simde_result = + simde_wasm_i8x16_eq(SIMD_V128_TO_SIMDE_V128(v1), + SIMD_V128_TO_SIMDE_V128(v2)); + + V128 result; + SIMDE_V128_TO_SIMD_V128(simde_result, result); + + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); break; } case SIMD_i8x16_ne: diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index d7bd34fde7..e19a648da3 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -9125,6 +9125,9 @@ preserve_referenced_local(WASMLoaderContext *loader_ctx, uint8 opcode, if (is_32bit_type(cur_type)) i++; + else if (cur_type == VALUE_TYPE_V128) { + i += 4; + } else i += 2; } @@ -9155,7 +9158,10 @@ preserve_local_for_block(WASMLoaderContext *loader_ctx, uint8 opcode, return false; } - if (is_32bit_type(cur_type)) { + if (cur_type == VALUE_TYPE_V128) { + i += 4; + } + else if (is_32bit_type(cur_type)) { i++; } else { @@ -9498,6 +9504,8 @@ wasm_loader_get_const_offset(WASMLoaderContext *ctx, uint8 type, void *value, || (type == VALUE_TYPE_EXTERNREF && *(int32 *)value == c->value.i32) #endif + || (type == VALUE_TYPE_V128 + && (0 == memcmp(value, &(c->value.v128), sizeof(V128)))) || (type == VALUE_TYPE_F64 && (0 == memcmp(value, &(c->value.f64), sizeof(float64)))) || (type == VALUE_TYPE_F32 @@ -9508,6 +9516,9 @@ wasm_loader_get_const_offset(WASMLoaderContext *ctx, uint8 type, void *value, } if (is_32bit_type(c->value_type)) operand_offset += 1; + else if (c->value_type == VALUE_TYPE_V128) { + operand_offset += 4; + } else operand_offset += 2; } @@ -9559,6 +9570,10 @@ wasm_loader_get_const_offset(WASMLoaderContext *ctx, uint8 type, void *value, c->value.i32 = *(int32 *)value; ctx->const_cell_num++; break; + case VALUE_TYPE_V128: + bh_memcpy_s(&(c->value.v128), sizeof(WASMValue), value, + sizeof(V128)); + ctx->const_cell_num++; #if WASM_ENABLE_REF_TYPES != 0 && WASM_ENABLE_GC == 0 case VALUE_TYPE_EXTERNREF: case VALUE_TYPE_FUNCREF: @@ -9760,17 +9775,22 @@ reserve_block_ret(WASMLoaderContext *loader_ctx, uint8 opcode, block_type, &return_types, &reftype_maps, &reftype_map_count); #endif - /* If there is only one return value, use EXT_OP_COPY_STACK_TOP/_I64 instead - * of EXT_OP_COPY_STACK_VALUES for interpreter performance. */ + /* If there is only one return value, use EXT_OP_COPY_STACK_TOP/_I64/V128 + * instead of EXT_OP_COPY_STACK_VALUES for interpreter performance. */ if (return_count == 1) { uint8 cell = (uint8)wasm_value_type_cell_num(return_types[0]); - if (cell <= 2 /* V128 isn't supported whose cell num is 4 */ - && block->dynamic_offset != *(loader_ctx->frame_offset - cell)) { + if (block->dynamic_offset != *(loader_ctx->frame_offset - cell)) { /* insert op_copy before else opcode */ if (opcode == WASM_OP_ELSE) skip_label(); - emit_label(cell == 1 ? EXT_OP_COPY_STACK_TOP - : EXT_OP_COPY_STACK_TOP_I64); + + if (cell == 4) { + emit_label(EXT_OP_COPY_STACK_TOP_V128); + } + else { + emit_label(cell == 1 ? EXT_OP_COPY_STACK_TOP + : EXT_OP_COPY_STACK_TOP_I64); + } emit_operand(loader_ctx, *(loader_ctx->frame_offset - cell)); emit_operand(loader_ctx, block->dynamic_offset); @@ -9805,11 +9825,37 @@ reserve_block_ret(WASMLoaderContext *loader_ctx, uint8 opcode, for (i = (int32)return_count - 1; i >= 0; i--) { uint8 cells = (uint8)wasm_value_type_cell_num(return_types[i]); - frame_offset -= cells; - dynamic_offset -= cells; - if (dynamic_offset != *frame_offset) { - value_count++; - total_cel_num += cells; + if (frame_offset - cells < loader_ctx->frame_offset_bottom) { + set_error_buf(error_buf, error_buf_size, "frame offset underflow"); + goto fail; + } + + if (cells == 4) { + bool needs_copy = false; + int16 v128_dynamic = dynamic_offset - cells; + + for (int j = 0; j < 4; j++) { + if (*(frame_offset - j - 1) != (v128_dynamic + j)) { + needs_copy = true; + break; + } + } + + if (needs_copy) { + value_count++; + total_cel_num += cells; + } + + frame_offset -= cells; + dynamic_offset = v128_dynamic; + } + else { + frame_offset -= cells; + dynamic_offset -= cells; + if (dynamic_offset != *frame_offset) { + value_count++; + total_cel_num += cells; + } } } @@ -9845,19 +9891,50 @@ reserve_block_ret(WASMLoaderContext *loader_ctx, uint8 opcode, dynamic_offset = dynamic_offset_org; for (i = (int32)return_count - 1, j = 0; i >= 0; i--) { uint8 cell = (uint8)wasm_value_type_cell_num(return_types[i]); - frame_offset -= cell; - dynamic_offset -= cell; - if (dynamic_offset != *frame_offset) { - /* cell num */ - cells[j] = cell; - /* src offset */ - src_offsets[j] = *frame_offset; - /* dst offset */ - dst_offsets[j] = dynamic_offset; - j++; + + if (cell == 4) { + bool needs_copy = false; + int16 v128_dynamic = dynamic_offset - cell; + + for (int k = 0; k < 4; k++) { + if (*(frame_offset - k - 1) != (v128_dynamic + k)) { + needs_copy = true; + break; + } + } + + if (needs_copy) { + cells[j] = cell; + src_offsets[j] = *(frame_offset - cell); + dst_offsets[j] = v128_dynamic; + j++; + } + + frame_offset -= cell; + dynamic_offset = v128_dynamic; + } + else { + frame_offset -= cell; + dynamic_offset -= cell; + if (dynamic_offset != *frame_offset) { + cells[j] = cell; + /* src offset */ + src_offsets[j] = *frame_offset; + /* dst offset */ + dst_offsets[j] = dynamic_offset; + j++; + } } + if (opcode == WASM_OP_ELSE) { - *frame_offset = dynamic_offset; + if (cell == 4) { + for (int k = 0; k < cell; k++) { + *(frame_offset + k) = dynamic_offset + k; + } + } + else { + *frame_offset = dynamic_offset; + } } else { loader_ctx->frame_offset = frame_offset; @@ -13031,6 +13108,10 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, emit_label(EXT_OP_TEE_LOCAL_FAST); emit_byte(loader_ctx, (uint8)local_offset); } + else if (local_type == VALUE_TYPE_V128) { + emit_label(EXT_OP_TEE_LOCAL_FAST_V128); + emit_byte(loader_ctx, (uint8)local_offset); + } else { emit_label(EXT_OP_TEE_LOCAL_FAST_I64); emit_byte(loader_ctx, (uint8)local_offset); From 28d74d2e6c036515ae62ea70db44d552eaae5d3f Mon Sep 17 00:00:00 2001 From: Maks Litskevich Date: Tue, 21 Jan 2025 19:16:13 +0000 Subject: [PATCH 17/32] implement globals --- core/iwasm/interpreter/wasm_interp_fast.c | 27 ++++++++++++++++++++++- core/iwasm/interpreter/wasm_loader.c | 20 ++++++++++++++++- core/iwasm/interpreter/wasm_opcode.h | 7 +++++- 3 files changed, 51 insertions(+), 3 deletions(-) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index 9a026428f3..d972fee233 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -3595,7 +3595,19 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, GET_I64_FROM_ADDR((uint32 *)global_addr)); HANDLE_OP_END(); } - +#if WASM_ENABLE_SIMDE != 0 + HANDLE_OP(WASM_OP_GET_GLOBAL_128) + { + global_idx = read_uint32(frame_ip); + bh_assert(global_idx < module->e->global_count); + global = globals + global_idx; + global_addr = get_global_addr(global_data, global); + addr_ret = GET_OFFSET(); + PUT_V128_TO_ADDR(frame_lp + addr_ret, + GET_V128_FROM_ADDR((uint32 *)global_addr)); + HANDLE_OP_END(); + } +#endif HANDLE_OP(WASM_OP_SET_GLOBAL) { global_idx = read_uint32(frame_ip); @@ -3662,6 +3674,19 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, GET_I64_FROM_ADDR(frame_lp + addr1)); HANDLE_OP_END(); } +#if WASM_ENABLE_SIMDE != 0 + HANDLE_OP(WASM_OP_SET_GLOBAL_128) + { + global_idx = read_uint32(frame_ip); + bh_assert(global_idx < module->e->global_count); + global = globals + global_idx; + global_addr = get_global_addr(global_data, global); + addr1 = GET_OFFSET(); + PUT_V128_TO_ADDR((uint32 *)global_addr, + GET_V128_FROM_ADDR(frame_lp + addr1)); + HANDLE_OP_END(); + } +#endif /* memory load instructions */ HANDLE_OP(WASM_OP_I32_LOAD) diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index e19a648da3..1a1ec8b355 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -7300,6 +7300,8 @@ wasm_loader_find_block_addr(WASMExecEnv *exec_env, BlockAddr *block_addr_cache, case WASM_OP_SET_GLOBAL: case WASM_OP_GET_GLOBAL_64: case WASM_OP_SET_GLOBAL_64: + case WASM_OP_GET_GLOBAL_128: + case WASM_OP_SET_GLOBAL_128: case WASM_OP_SET_GLOBAL_AUX_STACK: skip_leb_uint32(p, p_end); /* local index */ break; @@ -9111,6 +9113,11 @@ preserve_referenced_local(WASMLoaderContext *loader_ctx, uint8 opcode, loader_ctx->preserved_local_offset++; emit_label(EXT_OP_COPY_STACK_TOP); } + else if (local_type == VALUE_TYPE_V128) { + if (loader_ctx->p_code_compiled) + loader_ctx->preserved_local_offset += 4; + emit_label(EXT_OP_COPY_STACK_TOP_V128); + } else { if (loader_ctx->p_code_compiled) loader_ctx->preserved_local_offset += 2; @@ -13206,9 +13213,14 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, skip_label(); emit_label(WASM_OP_GET_GLOBAL_64); } + + if (global_type == VALUE_TYPE_V128) { + skip_label(); + emit_label(WASM_OP_GET_GLOBAL_128); + } +#endif /* end of WASM_ENABLE_SIMDE */ emit_uint32(loader_ctx, global_idx); PUSH_OFFSET_TYPE(global_type); -#endif /* end of WASM_ENABLE_FAST_INTERP */ break; } @@ -13300,6 +13312,12 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, skip_label(); emit_label(WASM_OP_SET_GLOBAL_AUX_STACK); } +#if WASM_ENABLE_SIMDE != 0 + else if (global_type == VALUE_TYPE_V128) { + skip_label(); + emit_label(WASM_OP_SET_GLOBAL_128); + } +#endif /* end of WASM_ENABLE_SIMDE */ emit_uint32(loader_ctx, global_idx); POP_OFFSET_TYPE(global_type); #endif /* end of WASM_ENABLE_FAST_INTERP */ diff --git a/core/iwasm/interpreter/wasm_opcode.h b/core/iwasm/interpreter/wasm_opcode.h index c3c5e00f80..47036e0db4 100644 --- a/core/iwasm/interpreter/wasm_opcode.h +++ b/core/iwasm/interpreter/wasm_opcode.h @@ -284,6 +284,8 @@ typedef enum WASMOpcode { EXT_OP_SET_LOCAL_FAST_V128 = 0xdd, EXT_OP_TEE_LOCAL_FAST_V128 = 0xde, EXT_OP_COPY_STACK_TOP_V128 = 0xdf, + WASM_OP_GET_GLOBAL_128 = 0xe0, + WASM_OP_SET_GLOBAL_128 = 0xe1, #endif /* Post-MVP extend op prefix */ @@ -803,7 +805,10 @@ typedef enum WASMAtomicEXTOpcode { #define DEF_EXT_V128_HANDLE() \ SET_GOTO_TABLE_ELEM(EXT_OP_SET_LOCAL_FAST_V128), \ SET_GOTO_TABLE_ELEM(EXT_OP_TEE_LOCAL_FAST_V128), \ - SET_GOTO_TABLE_ELEM(EXT_OP_COPY_STACK_TOP_V128), + SET_GOTO_TABLE_ELEM(EXT_OP_COPY_STACK_TOP_V128), \ + SET_GOTO_TABLE_ELEM(WASM_OP_GET_GLOBAL_128), \ + SET_GOTO_TABLE_ELEM(WASM_OP_SET_GLOBAL_128), \ + #else #define DEF_EXT_V128_HANDLE() #endif From 80e6c986f4a3366ee4a43a83d21b1cfab300d0cb Mon Sep 17 00:00:00 2001 From: James Marsh Date: Fri, 24 Jan 2025 18:07:37 +0000 Subject: [PATCH 18/32] Fix incorrect memory overflow values + SIMD ifdefs --- core/iwasm/interpreter/wasm_interp_fast.c | 4 ++-- core/iwasm/interpreter/wasm_loader.c | 15 ++++++++++++--- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index d972fee233..3f107bdb21 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -5909,7 +5909,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, addr = GET_OPERAND(uint32, I32, 0); \ frame_ip += 2; \ addr_ret = GET_OFFSET(); \ - CHECK_MEMORY_OVERFLOW(16); \ + CHECK_MEMORY_OVERFLOW(4); \ \ simde_v128_t simde_result = simde_func(maddr); \ \ @@ -5952,7 +5952,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, offset += base; addr = GET_OPERAND(uint32, I32, 0); - CHECK_MEMORY_OVERFLOW(32); + CHECK_MEMORY_OVERFLOW(4); STORE_V128(maddr, data); break; } diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index 1a1ec8b355..a860b7dedb 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -7300,8 +7300,10 @@ wasm_loader_find_block_addr(WASMExecEnv *exec_env, BlockAddr *block_addr_cache, case WASM_OP_SET_GLOBAL: case WASM_OP_GET_GLOBAL_64: case WASM_OP_SET_GLOBAL_64: +#if WASM_ENABLE_SIMDE != 0 case WASM_OP_GET_GLOBAL_128: case WASM_OP_SET_GLOBAL_128: +#endif case WASM_OP_SET_GLOBAL_AUX_STACK: skip_leb_uint32(p, p_end); /* local index */ break; @@ -9090,6 +9092,7 @@ preserve_referenced_local(WASMLoaderContext *loader_ctx, uint8 opcode, bool *preserved, char *error_buf, uint32 error_buf_size) { + uint32 i = 0; int16 preserved_offset = (int16)local_index; @@ -9113,11 +9116,13 @@ preserve_referenced_local(WASMLoaderContext *loader_ctx, uint8 opcode, loader_ctx->preserved_local_offset++; emit_label(EXT_OP_COPY_STACK_TOP); } +#if WASM_ENABLE_SIMDE != 0 else if (local_type == VALUE_TYPE_V128) { if (loader_ctx->p_code_compiled) loader_ctx->preserved_local_offset += 4; emit_label(EXT_OP_COPY_STACK_TOP_V128); } +#endif else { if (loader_ctx->p_code_compiled) loader_ctx->preserved_local_offset += 2; @@ -9790,11 +9795,12 @@ reserve_block_ret(WASMLoaderContext *loader_ctx, uint8 opcode, /* insert op_copy before else opcode */ if (opcode == WASM_OP_ELSE) skip_label(); - +#if WASM_ENABLE_SIMDE != 0 if (cell == 4) { emit_label(EXT_OP_COPY_STACK_TOP_V128); } - else { +#endif + if (cell <= 2) { emit_label(cell == 1 ? EXT_OP_COPY_STACK_TOP : EXT_OP_COPY_STACK_TOP_I64); } @@ -13115,10 +13121,12 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, emit_label(EXT_OP_TEE_LOCAL_FAST); emit_byte(loader_ctx, (uint8)local_offset); } +#if WASM_ENABLE_SIMDE != 0 else if (local_type == VALUE_TYPE_V128) { emit_label(EXT_OP_TEE_LOCAL_FAST_V128); emit_byte(loader_ctx, (uint8)local_offset); } +#endif else { emit_label(EXT_OP_TEE_LOCAL_FAST_I64); emit_byte(loader_ctx, (uint8)local_offset); @@ -13213,11 +13221,12 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, skip_label(); emit_label(WASM_OP_GET_GLOBAL_64); } - +#if WASM_ENABLE_SIMDE != 0 if (global_type == VALUE_TYPE_V128) { skip_label(); emit_label(WASM_OP_GET_GLOBAL_128); } +#endif #endif /* end of WASM_ENABLE_SIMDE */ emit_uint32(loader_ctx, global_idx); PUSH_OFFSET_TYPE(global_type); From bd97970828687358e560df27bc019c2981ed961e Mon Sep 17 00:00:00 2001 From: James Marsh Date: Wed, 22 Jan 2025 10:09:54 +0000 Subject: [PATCH 19/32] Fix load/load_splat macros --- core/iwasm/interpreter/wasm_interp_fast.c | 74 +++++++++-------------- core/iwasm/interpreter/wasm_loader.c | 12 ++-- 2 files changed, 36 insertions(+), 50 deletions(-) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index 3f107bdb21..384bc0ad37 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -3604,7 +3604,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, global_addr = get_global_addr(global_data, global); addr_ret = GET_OFFSET(); PUT_V128_TO_ADDR(frame_lp + addr_ret, - GET_V128_FROM_ADDR((uint32 *)global_addr)); + GET_V128_FROM_ADDR((uint32 *)global_addr)); HANDLE_OP_END(); } #endif @@ -3683,7 +3683,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, global_addr = get_global_addr(global_data, global); addr1 = GET_OFFSET(); PUT_V128_TO_ADDR((uint32 *)global_addr, - GET_V128_FROM_ADDR(frame_lp + addr1)); + GET_V128_FROM_ADDR(frame_lp + addr1)); HANDLE_OP_END(); } #endif @@ -5843,66 +5843,54 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, PUT_V128_TO_ADDR(frame_lp + addr_ret, LOAD_V128(maddr)); break; } -#define SIMD_LOAD_OP(op_name, simde_func, element_size, num_elements) \ - do { \ - uint32 offset, addr; \ - offset = read_uint32(frame_ip); \ - addr = GET_OPERAND(uint32, I32, 0); \ - frame_ip += 2; \ - addr_ret = GET_OFFSET(); \ - CHECK_MEMORY_OVERFLOW(16); \ - \ - simde_v128_t simde_result = simde_func(maddr); \ - \ - V128 result; \ - SIMDE_V128_TO_SIMD_V128(simde_result, result); \ - \ - V128 reversed_result; \ - for (int i = 0; i < num_elements; i++) { \ - reversed_result.i##element_size##x##num_elements[i] = \ - result.i##element_size##x##num_elements[num_elements - 1 - i]; \ - } \ - PUT_V128_TO_ADDR(frame_lp + addr_ret, reversed_result); \ - \ - break; \ +#define SIMD_LOAD_OP(simde_func, element_size, num_elements) \ + do { \ + uint32 offset, addr; \ + offset = read_uint32(frame_ip); \ + addr = GET_OPERAND(uint32, I32, 0); \ + frame_ip += 2; \ + addr_ret = GET_OFFSET(); \ + CHECK_MEMORY_OVERFLOW(4); \ + \ + simde_v128_t simde_result = simde_func(maddr); \ + \ + V128 result; \ + SIMDE_V128_TO_SIMD_V128(simde_result, result); \ + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); \ + \ + break; \ } while (0) case SIMD_v128_load8x8_s: { - SIMD_LOAD_OP(SIMD_v128_load8x8_s, - simde_wasm_i16x8_load8x8, 16, 8); + SIMD_LOAD_OP(simde_wasm_i16x8_load8x8, 16, 8); break; } case SIMD_v128_load8x8_u: { - SIMD_LOAD_OP(SIMD_v128_load8x8_u, - simde_wasm_u16x8_load8x8, 16, 8); + SIMD_LOAD_OP(simde_wasm_u16x8_load8x8, 16, 8); break; } case SIMD_v128_load16x4_s: { - SIMD_LOAD_OP(SIMD_v128_load16x4_s, - simde_wasm_i32x4_load16x4, 32, 4); + SIMD_LOAD_OP(simde_wasm_i32x4_load16x4, 32, 4); break; } case SIMD_v128_load16x4_u: { - SIMD_LOAD_OP(SIMD_v128_load16x4_u, - simde_wasm_u32x4_load16x4, 32, 4); + SIMD_LOAD_OP(simde_wasm_u32x4_load16x4, 32, 4); break; } case SIMD_v128_load32x2_s: { - SIMD_LOAD_OP(SIMD_v128_load32x2_s, - simde_wasm_i64x2_load32x2, 64, 2); + SIMD_LOAD_OP(simde_wasm_i64x2_load32x2, 64, 2); break; } case SIMD_v128_load32x2_u: { - SIMD_LOAD_OP(SIMD_v128_load32x2_u, - simde_wasm_u64x2_load32x2, 64, 2); + SIMD_LOAD_OP(simde_wasm_u64x2_load32x2, 64, 2); break; } -#define SIMD_LOAD_SPLAT_OP(op_name, simde_func) \ +#define SIMD_LOAD_SPLAT_OP(simde_func) \ do { \ uint32 offset, addr; \ offset = read_uint32(frame_ip); \ @@ -5921,26 +5909,22 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, case SIMD_v128_load8_splat: { - SIMD_LOAD_SPLAT_OP(SIMD_v128_load8_splat, - simde_wasm_v128_load8_splat); + SIMD_LOAD_SPLAT_OP(simde_wasm_v128_load8_splat); break; } case SIMD_v128_load16_splat: { - SIMD_LOAD_SPLAT_OP(SIMD_v128_load16_splat, - simde_wasm_v128_load16_splat); + SIMD_LOAD_SPLAT_OP(simde_wasm_v128_load16_splat); break; } case SIMD_v128_load32_splat: { - SIMD_LOAD_SPLAT_OP(SIMD_v128_load32_splat, - simde_wasm_v128_load32_splat); + SIMD_LOAD_SPLAT_OP(simde_wasm_v128_load32_splat); break; } case SIMD_v128_load64_splat: { - SIMD_LOAD_SPLAT_OP(SIMD_v128_load64_splat, - simde_wasm_v128_load64_splat); + SIMD_LOAD_SPLAT_OP(simde_wasm_v128_load64_splat); break; } case SIMD_v128_store: diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index a860b7dedb..1eb13d7dd2 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -9135,13 +9135,15 @@ preserve_referenced_local(WASMLoaderContext *loader_ctx, uint8 opcode, loader_ctx->frame_offset_bottom[i] = preserved_offset; } - if (is_32bit_type(cur_type)) - i++; - else if (cur_type == VALUE_TYPE_V128) { + if (cur_type == VALUE_TYPE_V128) { i += 4; } - else + else if (is_32bit_type(cur_type)) { + i++; + } + else { i += 2; + } } (void)error_buf; @@ -13310,7 +13312,7 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, func->has_op_set_global_aux_stack = true; #endif } -#else /* else of WASM_ENABLE_FAST_INTERP */ +#else /* else of WASM_ENABLE_FAST_INTERP */ if (global_type == VALUE_TYPE_I64 || global_type == VALUE_TYPE_F64) { skip_label(); From 253d741f77a351a919a7ac7df7b33b8e04d1c772 Mon Sep 17 00:00:00 2001 From: Maks Litskevich Date: Mon, 27 Jan 2025 15:57:58 +0000 Subject: [PATCH 20/32] formatting commit --- core/iwasm/interpreter/wasm_interp_fast.c | 6 +++--- core/iwasm/interpreter/wasm_loader.c | 2 +- core/iwasm/interpreter/wasm_opcode.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index 384bc0ad37..c8f00aed92 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -47,7 +47,7 @@ typedef float64 CellType_F64; && (app_addr) <= shared_heap_end_off - bytes + 1) #define shared_heap_addr_app_to_native(app_addr, native_addr) \ - native_addr = shared_heap_base_addr + ((app_addr) - shared_heap_start_off) + native_addr = shared_heap_base_addr + ((app_addr)-shared_heap_start_off) #define CHECK_SHARED_HEAP_OVERFLOW(app_addr, bytes, native_addr) \ if (app_addr_in_shared_heap(app_addr, bytes)) \ @@ -1793,7 +1793,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, else cur_func_type = cur_func->u.func->func_type; - /* clang-format off */ + /* clang-format off */ #if WASM_ENABLE_GC == 0 if (cur_type != cur_func_type) { wasm_set_exception(module, "indirect call type mismatch"); @@ -5897,7 +5897,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, addr = GET_OPERAND(uint32, I32, 0); \ frame_ip += 2; \ addr_ret = GET_OFFSET(); \ - CHECK_MEMORY_OVERFLOW(4); \ + CHECK_MEMORY_OVERFLOW(4); \ \ simde_v128_t simde_result = simde_func(maddr); \ \ diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index 1eb13d7dd2..efd95f426f 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -13217,7 +13217,7 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, #endif *p_org = WASM_OP_GET_GLOBAL_64; } -#else /* else of WASM_ENABLE_FAST_INTERP */ +#else /* else of WASM_ENABLE_FAST_INTERP */ if (global_type == VALUE_TYPE_I64 || global_type == VALUE_TYPE_F64) { skip_label(); diff --git a/core/iwasm/interpreter/wasm_opcode.h b/core/iwasm/interpreter/wasm_opcode.h index 47036e0db4..0ddf8153ac 100644 --- a/core/iwasm/interpreter/wasm_opcode.h +++ b/core/iwasm/interpreter/wasm_opcode.h @@ -807,7 +807,7 @@ typedef enum WASMAtomicEXTOpcode { SET_GOTO_TABLE_ELEM(EXT_OP_TEE_LOCAL_FAST_V128), \ SET_GOTO_TABLE_ELEM(EXT_OP_COPY_STACK_TOP_V128), \ SET_GOTO_TABLE_ELEM(WASM_OP_GET_GLOBAL_128), \ - SET_GOTO_TABLE_ELEM(WASM_OP_SET_GLOBAL_128), \ + SET_GOTO_TABLE_ELEM(WASM_OP_SET_GLOBAL_128), #else #define DEF_EXT_V128_HANDLE() From 72535e4e4523a01d92487e338fa46f145ab0a7a8 Mon Sep 17 00:00:00 2001 From: Maks Litskevich Date: Mon, 27 Jan 2025 16:03:59 +0000 Subject: [PATCH 21/32] correct endif wasm loader --- core/iwasm/interpreter/wasm_interp_fast.c | 1 - core/iwasm/interpreter/wasm_loader.c | 2 +- core/iwasm/interpreter/wasm_opcode.h | 9 +++------ core/iwasm/libraries/simde/simde.cmake | 7 +++++-- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index c8f00aed92..ee1ba73c6f 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -7019,7 +7019,6 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i32x4_add: { - SIMD_DOUBLE_OP(simde_wasm_i32x4_add); break; } diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index efd95f426f..4e2d6f3378 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -13228,10 +13228,10 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, skip_label(); emit_label(WASM_OP_GET_GLOBAL_128); } -#endif #endif /* end of WASM_ENABLE_SIMDE */ emit_uint32(loader_ctx, global_idx); PUSH_OFFSET_TYPE(global_type); +#endif /* end of WASM_ENABLE_FAST_INTERP */ break; } diff --git a/core/iwasm/interpreter/wasm_opcode.h b/core/iwasm/interpreter/wasm_opcode.h index 0ddf8153ac..a8118ead0e 100644 --- a/core/iwasm/interpreter/wasm_opcode.h +++ b/core/iwasm/interpreter/wasm_opcode.h @@ -278,8 +278,7 @@ typedef enum WASMOpcode { DEBUG_OP_BREAK = 0xdc, /* debug break point */ #endif -#if (WASM_ENABLE_JIT != 0 \ - || (WASM_ENABLE_FAST_INTERP != 0 && WASM_ENABLE_SIMDE != 0)) \ +#if (WASM_ENABLE_JIT != 0 || (WASM_ENABLE_FAST_INTERP != 0)) \ && WASM_ENABLE_SIMD != 0 EXT_OP_SET_LOCAL_FAST_V128 = 0xdd, EXT_OP_TEE_LOCAL_FAST_V128 = 0xde, @@ -791,8 +790,7 @@ typedef enum WASMAtomicEXTOpcode { #endif #define SET_GOTO_TABLE_ELEM(opcode) [opcode] = HANDLE_OPCODE(opcode) -#if (WASM_ENABLE_JIT != 0 \ - || (WASM_ENABLE_FAST_INTERP != 0 && WASM_ENABLE_SIMDE != 0)) \ +#if (WASM_ENABLE_JIT != 0 || (WASM_ENABLE_FAST_INTERP != 0)) \ && WASM_ENABLE_SIMD != 0 #define SET_GOTO_TABLE_SIMD_PREFIX_ELEM() \ SET_GOTO_TABLE_ELEM(WASM_OP_SIMD_PREFIX), @@ -800,8 +798,7 @@ typedef enum WASMAtomicEXTOpcode { #define SET_GOTO_TABLE_SIMD_PREFIX_ELEM() #endif -#if (WASM_ENABLE_FAST_INTERP != 0 && WASM_ENABLE_SIMDE != 0) \ - && WASM_ENABLE_SIMD != 0 +#if (WASM_ENABLE_FAST_INTERP != 0) && WASM_ENABLE_SIMD != 0 #define DEF_EXT_V128_HANDLE() \ SET_GOTO_TABLE_ELEM(EXT_OP_SET_LOCAL_FAST_V128), \ SET_GOTO_TABLE_ELEM(EXT_OP_TEE_LOCAL_FAST_V128), \ diff --git a/core/iwasm/libraries/simde/simde.cmake b/core/iwasm/libraries/simde/simde.cmake index b36e356945..1219c8e5b1 100644 --- a/core/iwasm/libraries/simde/simde.cmake +++ b/core/iwasm/libraries/simde/simde.cmake @@ -4,9 +4,12 @@ set (LIB_SIMDE_DIR ${CMAKE_CURRENT_LIST_DIR}) -if (WAMR_BUILD_TARGET MATCHES "AARCH64.*" OR "ARM.*") +if (WAMR_BUILD_TARGET MATCHES "AARCH64.*" OR WAMR_BUILD_TARGET MATCHES "ARM.*") add_definitions (-DWASM_ENABLE_SIMDE=1) -endif () +else() + message(WARNING "Disabling SIMD for fast interpreter as the target is not supported") + set(WAMR_BUILD_SIMD 0) +endif() include_directories(${LIB_SIMDE_DIR} ${LIB_SIMDE_DIR}/simde) From d01b702f4b3a12f11729b4c9b78a24fb357e6917 Mon Sep 17 00:00:00 2001 From: Maks Litskevich Date: Tue, 28 Jan 2025 13:02:41 +0000 Subject: [PATCH 22/32] Update core/iwasm/interpreter/wasm_opcode.h Co-authored-by: Marcin Kolny --- core/iwasm/interpreter/wasm_opcode.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/iwasm/interpreter/wasm_opcode.h b/core/iwasm/interpreter/wasm_opcode.h index a8118ead0e..55c94b031b 100644 --- a/core/iwasm/interpreter/wasm_opcode.h +++ b/core/iwasm/interpreter/wasm_opcode.h @@ -798,7 +798,7 @@ typedef enum WASMAtomicEXTOpcode { #define SET_GOTO_TABLE_SIMD_PREFIX_ELEM() #endif -#if (WASM_ENABLE_FAST_INTERP != 0) && WASM_ENABLE_SIMD != 0 +#if WASM_ENABLE_FAST_INTERP != 0 && WASM_ENABLE_SIMD != 0 #define DEF_EXT_V128_HANDLE() \ SET_GOTO_TABLE_ELEM(EXT_OP_SET_LOCAL_FAST_V128), \ SET_GOTO_TABLE_ELEM(EXT_OP_TEE_LOCAL_FAST_V128), \ From 595a2b3a5278e53b7a72e77bc1ec41b195506e34 Mon Sep 17 00:00:00 2001 From: Maks Litskevich Date: Tue, 28 Jan 2025 13:02:48 +0000 Subject: [PATCH 23/32] Update core/iwasm/interpreter/wasm_opcode.h Co-authored-by: Marcin Kolny --- core/iwasm/interpreter/wasm_opcode.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/iwasm/interpreter/wasm_opcode.h b/core/iwasm/interpreter/wasm_opcode.h index 55c94b031b..ceb75f26a3 100644 --- a/core/iwasm/interpreter/wasm_opcode.h +++ b/core/iwasm/interpreter/wasm_opcode.h @@ -790,7 +790,7 @@ typedef enum WASMAtomicEXTOpcode { #endif #define SET_GOTO_TABLE_ELEM(opcode) [opcode] = HANDLE_OPCODE(opcode) -#if (WASM_ENABLE_JIT != 0 || (WASM_ENABLE_FAST_INTERP != 0)) \ +#if (WASM_ENABLE_JIT != 0 || WASM_ENABLE_FAST_INTERP != 0) \ && WASM_ENABLE_SIMD != 0 #define SET_GOTO_TABLE_SIMD_PREFIX_ELEM() \ SET_GOTO_TABLE_ELEM(WASM_OP_SIMD_PREFIX), From 1b267b007764f02d09d174c1d8b94be8d1da4984 Mon Sep 17 00:00:00 2001 From: Maks Litskevich Date: Tue, 28 Jan 2025 13:02:57 +0000 Subject: [PATCH 24/32] Update core/iwasm/interpreter/wasm_opcode.h Co-authored-by: Marcin Kolny --- core/iwasm/interpreter/wasm_opcode.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/iwasm/interpreter/wasm_opcode.h b/core/iwasm/interpreter/wasm_opcode.h index ceb75f26a3..c8cdd0c2ff 100644 --- a/core/iwasm/interpreter/wasm_opcode.h +++ b/core/iwasm/interpreter/wasm_opcode.h @@ -803,8 +803,8 @@ typedef enum WASMAtomicEXTOpcode { SET_GOTO_TABLE_ELEM(EXT_OP_SET_LOCAL_FAST_V128), \ SET_GOTO_TABLE_ELEM(EXT_OP_TEE_LOCAL_FAST_V128), \ SET_GOTO_TABLE_ELEM(EXT_OP_COPY_STACK_TOP_V128), \ - SET_GOTO_TABLE_ELEM(WASM_OP_GET_GLOBAL_128), \ - SET_GOTO_TABLE_ELEM(WASM_OP_SET_GLOBAL_128), + SET_GOTO_TABLE_ELEM(WASM_OP_GET_GLOBAL_V128), \ + SET_GOTO_TABLE_ELEM(WASM_OP_SET_GLOBAL_V128), #else #define DEF_EXT_V128_HANDLE() From f7c4e9f3286f8ad798203d6a0c634fb05c55fdbd Mon Sep 17 00:00:00 2001 From: Maks Litskevich Date: Tue, 28 Jan 2025 13:03:24 +0000 Subject: [PATCH 25/32] Update core/iwasm/interpreter/wasm_opcode.h Co-authored-by: Marcin Kolny --- core/iwasm/interpreter/wasm_opcode.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/iwasm/interpreter/wasm_opcode.h b/core/iwasm/interpreter/wasm_opcode.h index c8cdd0c2ff..176515c570 100644 --- a/core/iwasm/interpreter/wasm_opcode.h +++ b/core/iwasm/interpreter/wasm_opcode.h @@ -278,7 +278,7 @@ typedef enum WASMOpcode { DEBUG_OP_BREAK = 0xdc, /* debug break point */ #endif -#if (WASM_ENABLE_JIT != 0 || (WASM_ENABLE_FAST_INTERP != 0)) \ +#if WASM_ENABLE_JIT != 0 || WASM_ENABLE_FAST_INTERP != 0 \ && WASM_ENABLE_SIMD != 0 EXT_OP_SET_LOCAL_FAST_V128 = 0xdd, EXT_OP_TEE_LOCAL_FAST_V128 = 0xde, From 3e9a5b0a2a47a063890eb96d62b45f5b7dd303ec Mon Sep 17 00:00:00 2001 From: Maks Litskevich Date: Tue, 28 Jan 2025 16:06:15 +0000 Subject: [PATCH 26/32] Fixes --- build-scripts/config_common.cmake | 14 ++++++++++---- build-scripts/runtime_lib.cmake | 10 ++++++++-- core/iwasm/interpreter/wasm_interp_fast.c | 18 +++++++++--------- core/iwasm/interpreter/wasm_loader.c | 9 +++++---- core/iwasm/interpreter/wasm_opcode.h | 22 +++++++++++----------- core/iwasm/libraries/simde/simde.cmake | 7 +------ 6 files changed, 44 insertions(+), 36 deletions(-) diff --git a/build-scripts/config_common.cmake b/build-scripts/config_common.cmake index b6503d808d..3db29e848c 100644 --- a/build-scripts/config_common.cmake +++ b/build-scripts/config_common.cmake @@ -320,12 +320,18 @@ else () message (" Wakeup of blocking operations enabled") endif () if (WAMR_BUILD_SIMD EQUAL 1) - if (NOT WAMR_BUILD_TARGET MATCHES "RISCV64.*") - add_definitions (-DWASM_ENABLE_SIMD=1) - message (" SIMD enabled") - else () + set(SIMD_ENABLED 0) + if (WAMR_BUILD_TARGET MATCHES "RISCV64.*") + set(WAMR_BUILD_SIMD 0) message (" SIMD disabled due to not supported on target RISCV64") + elseif (WAMR_BUILD_FAST_INTERP EQUAL 1 AND WAMR_BUILD_SIMDE EQUAL 0) + set(WAMR_BUILD_SIMD 0) + message(" SIMD disabled as the simde is not built in fast interpreter mode") + else() + set(SIMD_ENABLED 1) + message (" SIMD enabled") endif () + add_definitions(-DWASM_ENABLE_SIMD=${SIMD_ENABLED}) endif () if (WAMR_BUILD_AOT_STACK_FRAME EQUAL 1) add_definitions (-DWASM_ENABLE_AOT_STACK_FRAME=1) diff --git a/build-scripts/runtime_lib.cmake b/build-scripts/runtime_lib.cmake index 29789d671c..ec3a370d61 100644 --- a/build-scripts/runtime_lib.cmake +++ b/build-scripts/runtime_lib.cmake @@ -155,8 +155,14 @@ if (WAMR_BUILD_LIB_RATS EQUAL 1) include (${IWASM_DIR}/libraries/lib-rats/lib_rats.cmake) endif () -if (WAMR_BUILD_LIB_SIMDE EQUAL 1) - include (${IWASM_DIR}/libraries/simde/simde.cmake) +if (WAMR_BUILD_SIMD EQUAL 1 AND WAMR_BUILD_FAST_INTERP EQUAL 1) + if (NOT (WAMR_BUILD_TARGET MATCHES "AARCH64.*" OR WAMR_BUILD_TARGET MATCHES "ARM.*")) + message(STATUS "SIMDe doesnt support platform " ${WAMR_BUILD_TARGET}) + set(WAMR_BUILD_SIMDE 0) + else() + include (${IWASM_DIR}/libraries/simde/simde.cmake) + set (WAMR_BUILD_SIMDE 1) + endif() endif () if (WAMR_BUILD_WASM_CACHE EQUAL 1) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index ee1ba73c6f..5aee64b4df 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -3541,7 +3541,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, HANDLE_OP_END(); } -#if WASM_ENABLE_SIMDE != 0 +#if WASM_ENABLE_SIMD != 0 HANDLE_OP(EXT_OP_SET_LOCAL_FAST_V128) HANDLE_OP(EXT_OP_TEE_LOCAL_FAST_V128) { @@ -3595,8 +3595,8 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, GET_I64_FROM_ADDR((uint32 *)global_addr)); HANDLE_OP_END(); } -#if WASM_ENABLE_SIMDE != 0 - HANDLE_OP(WASM_OP_GET_GLOBAL_128) +#if WASM_ENABLE_SIMD != 0 + HANDLE_OP(WASM_OP_GET_GLOBAL_V128) { global_idx = read_uint32(frame_ip); bh_assert(global_idx < module->e->global_count); @@ -3675,7 +3675,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, HANDLE_OP_END(); } #if WASM_ENABLE_SIMDE != 0 - HANDLE_OP(WASM_OP_SET_GLOBAL_128) + HANDLE_OP(WASM_OP_SET_GLOBAL_V128) { global_idx = read_uint32(frame_ip); bh_assert(global_idx < module->e->global_count); @@ -4932,7 +4932,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, HANDLE_OP_END(); } -#if WASM_ENABLE_SIMDE != 0 +#if WASM_ENABLE_SIMD != 0 HANDLE_OP(EXT_OP_COPY_STACK_TOP_V128) { addr1 = GET_OFFSET(); @@ -5837,7 +5837,8 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, { uint32 offset, addr; offset = read_uint32(frame_ip); - addr = POP_I32(); + addr = GET_OPERAND(uint32, I32, 0); + frame_ip += 2; addr_ret = GET_OFFSET(); CHECK_MEMORY_OVERFLOW(16); PUT_V128_TO_ADDR(frame_lp + addr_ret, LOAD_V128(maddr)); @@ -5850,7 +5851,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, addr = GET_OPERAND(uint32, I32, 0); \ frame_ip += 2; \ addr_ret = GET_OFFSET(); \ - CHECK_MEMORY_OVERFLOW(4); \ + CHECK_MEMORY_OVERFLOW(16); \ \ simde_v128_t simde_result = simde_func(maddr); \ \ @@ -5858,7 +5859,6 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, SIMDE_V128_TO_SIMD_V128(simde_result, result); \ PUT_V128_TO_ADDR(frame_lp + addr_ret, result); \ \ - break; \ } while (0) case SIMD_v128_load8x8_s: { @@ -5936,7 +5936,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, offset += base; addr = GET_OPERAND(uint32, I32, 0); - CHECK_MEMORY_OVERFLOW(4); + CHECK_MEMORY_OVERFLOW(16); STORE_V128(maddr, data); break; } diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index 4e2d6f3378..eaba5555b0 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -7301,8 +7301,8 @@ wasm_loader_find_block_addr(WASMExecEnv *exec_env, BlockAddr *block_addr_cache, case WASM_OP_GET_GLOBAL_64: case WASM_OP_SET_GLOBAL_64: #if WASM_ENABLE_SIMDE != 0 - case WASM_OP_GET_GLOBAL_128: - case WASM_OP_SET_GLOBAL_128: + case WASM_OP_GET_GLOBAL_V128: + case WASM_OP_SET_GLOBAL_V128: #endif case WASM_OP_SET_GLOBAL_AUX_STACK: skip_leb_uint32(p, p_end); /* local index */ @@ -9588,6 +9588,7 @@ wasm_loader_get_const_offset(WASMLoaderContext *ctx, uint8 type, void *value, bh_memcpy_s(&(c->value.v128), sizeof(WASMValue), value, sizeof(V128)); ctx->const_cell_num++; + break; #if WASM_ENABLE_REF_TYPES != 0 && WASM_ENABLE_GC == 0 case VALUE_TYPE_EXTERNREF: case VALUE_TYPE_FUNCREF: @@ -13226,7 +13227,7 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, #if WASM_ENABLE_SIMDE != 0 if (global_type == VALUE_TYPE_V128) { skip_label(); - emit_label(WASM_OP_GET_GLOBAL_128); + emit_label(WASM_OP_GET_GLOBAL_V128); } #endif /* end of WASM_ENABLE_SIMDE */ emit_uint32(loader_ctx, global_idx); @@ -13326,7 +13327,7 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, #if WASM_ENABLE_SIMDE != 0 else if (global_type == VALUE_TYPE_V128) { skip_label(); - emit_label(WASM_OP_SET_GLOBAL_128); + emit_label(WASM_OP_SET_GLOBAL_V128); } #endif /* end of WASM_ENABLE_SIMDE */ emit_uint32(loader_ctx, global_idx); diff --git a/core/iwasm/interpreter/wasm_opcode.h b/core/iwasm/interpreter/wasm_opcode.h index 176515c570..9660bb1236 100644 --- a/core/iwasm/interpreter/wasm_opcode.h +++ b/core/iwasm/interpreter/wasm_opcode.h @@ -278,13 +278,13 @@ typedef enum WASMOpcode { DEBUG_OP_BREAK = 0xdc, /* debug break point */ #endif -#if WASM_ENABLE_JIT != 0 || WASM_ENABLE_FAST_INTERP != 0 \ - && WASM_ENABLE_SIMD != 0 +#if WASM_ENABLE_JIT != 0 \ + || WASM_ENABLE_FAST_INTERP != 0 && WASM_ENABLE_SIMD != 0 EXT_OP_SET_LOCAL_FAST_V128 = 0xdd, EXT_OP_TEE_LOCAL_FAST_V128 = 0xde, EXT_OP_COPY_STACK_TOP_V128 = 0xdf, - WASM_OP_GET_GLOBAL_128 = 0xe0, - WASM_OP_SET_GLOBAL_128 = 0xe1, + WASM_OP_GET_GLOBAL_V128 = 0xe0, + WASM_OP_SET_GLOBAL_V128 = 0xe1, #endif /* Post-MVP extend op prefix */ @@ -798,13 +798,13 @@ typedef enum WASMAtomicEXTOpcode { #define SET_GOTO_TABLE_SIMD_PREFIX_ELEM() #endif -#if WASM_ENABLE_FAST_INTERP != 0 && WASM_ENABLE_SIMD != 0 -#define DEF_EXT_V128_HANDLE() \ - SET_GOTO_TABLE_ELEM(EXT_OP_SET_LOCAL_FAST_V128), \ - SET_GOTO_TABLE_ELEM(EXT_OP_TEE_LOCAL_FAST_V128), \ - SET_GOTO_TABLE_ELEM(EXT_OP_COPY_STACK_TOP_V128), \ - SET_GOTO_TABLE_ELEM(WASM_OP_GET_GLOBAL_V128), \ - SET_GOTO_TABLE_ELEM(WASM_OP_SET_GLOBAL_V128), +#if (WASM_ENABLE_FAST_INTERP != 0) && WASM_ENABLE_SIMD != 0 +#define DEF_EXT_V128_HANDLE() \ + SET_GOTO_TABLE_ELEM(EXT_OP_SET_LOCAL_FAST_V128), /* 0xdd */ \ + SET_GOTO_TABLE_ELEM(EXT_OP_TEE_LOCAL_FAST_V128), /* 0xde */ \ + SET_GOTO_TABLE_ELEM(EXT_OP_COPY_STACK_TOP_V128), /* 0xdf */ \ + SET_GOTO_TABLE_ELEM(WASM_OP_GET_GLOBAL_V128), /* 0xe0 */ \ + SET_GOTO_TABLE_ELEM(WASM_OP_SET_GLOBAL_V128), /* 0xe1 */ #else #define DEF_EXT_V128_HANDLE() diff --git a/core/iwasm/libraries/simde/simde.cmake b/core/iwasm/libraries/simde/simde.cmake index 1219c8e5b1..eeb0e8d1f2 100644 --- a/core/iwasm/libraries/simde/simde.cmake +++ b/core/iwasm/libraries/simde/simde.cmake @@ -4,12 +4,7 @@ set (LIB_SIMDE_DIR ${CMAKE_CURRENT_LIST_DIR}) -if (WAMR_BUILD_TARGET MATCHES "AARCH64.*" OR WAMR_BUILD_TARGET MATCHES "ARM.*") - add_definitions (-DWASM_ENABLE_SIMDE=1) -else() - message(WARNING "Disabling SIMD for fast interpreter as the target is not supported") - set(WAMR_BUILD_SIMD 0) -endif() +add_definitions (-DWASM_ENABLE_SIMDE=1) include_directories(${LIB_SIMDE_DIR} ${LIB_SIMDE_DIR}/simde) From 93feee8ee3ed2fb02b92fe481c25384d18fdd56d Mon Sep 17 00:00:00 2001 From: Maks Litskevich Date: Fri, 24 Jan 2025 12:27:33 +0000 Subject: [PATCH 27/32] Fix load/store Fix v128 load/store style --- core/iwasm/interpreter/wasm_interp_fast.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index 5aee64b4df..6dc9351e03 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -5837,8 +5837,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, { uint32 offset, addr; offset = read_uint32(frame_ip); - addr = GET_OPERAND(uint32, I32, 0); - frame_ip += 2; + addr = POP_I32(); addr_ret = GET_OFFSET(); CHECK_MEMORY_OVERFLOW(16); PUT_V128_TO_ADDR(frame_lp + addr_ret, LOAD_V128(maddr)); @@ -5932,9 +5931,10 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, uint32 offset, addr; offset = read_uint32(frame_ip); V128 data = POP_V128(); - int32 base = POP_I32(); - offset += base; - addr = GET_OPERAND(uint32, I32, 0); + addr = POP_I32(); + + V128 data; + data = POP_V128(); CHECK_MEMORY_OVERFLOW(16); STORE_V128(maddr, data); From 59cfa1aa056b6fc21641b8d166b1d8f171939805 Mon Sep 17 00:00:00 2001 From: James Marsh Date: Mon, 17 Feb 2025 16:10:20 +0000 Subject: [PATCH 28/32] Fix spec tests when WASM_CPU_SUPPORTS_UNALIGNED_ADDR_ACCESS is 0 --- core/iwasm/common/wasm_runtime_common.h | 53 ++++++++++++---- core/iwasm/interpreter/wasm_interp_fast.c | 73 ++++++++++++----------- 2 files changed, 77 insertions(+), 49 deletions(-) diff --git a/core/iwasm/common/wasm_runtime_common.h b/core/iwasm/common/wasm_runtime_common.h index 3c4460b34b..10c725edef 100644 --- a/core/iwasm/common/wasm_runtime_common.h +++ b/core/iwasm/common/wasm_runtime_common.h @@ -287,19 +287,33 @@ STORE_V128(void *addr, V128 value) if ((addr_ & (uintptr_t)15) == 0) { *(V128 *)addr = value; } + else if ((addr_ & (uintptr_t)7) == 0) { + u.val = value; + ((uint64 *)(addr))[0] = u.u64[0]; + ((uint64 *)(addr))[1] = u.u64[1]; + } + else if ((addr_ & (uintptr_t)3) == 0) { + u.val = value; + ((uint32 *)addr)[0] = u.u32[0]; + ((uint32 *)addr)[1] = u.u32[1]; + ((uint32 *)addr)[2] = u.u32[2]; + ((uint32 *)addr)[3] = u.u32[3]; + } + else if ((addr_ & (uintptr_t)1) == 0) { + u.val = value; + ((uint16 *)addr)[0] = u.u16[0]; + ((uint16 *)addr)[1] = u.u16[1]; + ((uint16 *)addr)[2] = u.u16[2]; + ((uint16 *)addr)[3] = u.u16[3]; + ((uint16 *)addr)[4] = u.u16[4]; + ((uint16 *)addr)[5] = u.u16[5]; + ((uint16 *)addr)[6] = u.u16[6]; + ((uint16 *)addr)[7] = u.u16[7]; + } else { u.val = value; - if ((addr_ & (uintptr_t)7) == 0) { - ((uint64 *)(addr))[0] = u.u64[0]; - ((uint64 *)(addr))[1] = u.u64[1]; - } - else { - bh_assert((addr_ & (uintptr_t)3) == 0); - ((uint32 *)addr)[0] = u.u32[0]; - ((uint32 *)addr)[1] = u.u32[1]; - ((uint32 *)addr)[2] = u.u32[2]; - ((uint32 *)addr)[3] = u.u32[3]; - } + for (int i = 0; i < 16; i++) + ((uint8 *)addr)[i] = u.u8[i]; } } @@ -322,13 +336,26 @@ LOAD_V128(void *addr) u.u64[0] = ((uint64 *)addr)[0]; u.u64[1] = ((uint64 *)addr)[1]; } - else { - bh_assert((addr1 & (uintptr_t)3) == 0); + else if ((addr1 & (uintptr_t)3) == 0) { u.u32[0] = ((uint32 *)addr)[0]; u.u32[1] = ((uint32 *)addr)[1]; u.u32[2] = ((uint32 *)addr)[2]; u.u32[3] = ((uint32 *)addr)[3]; } + else if ((addr1 & (uintptr_t)1) == 0) { + u.u16[0] = ((uint16 *)addr)[0]; + u.u16[1] = ((uint16 *)addr)[1]; + u.u16[2] = ((uint16 *)addr)[2]; + u.u16[3] = ((uint16 *)addr)[3]; + u.u16[4] = ((uint16 *)addr)[4]; + u.u16[5] = ((uint16 *)addr)[5]; + u.u16[6] = ((uint16 *)addr)[6]; + u.u16[7] = ((uint16 *)addr)[7]; + } + else { + for (int i = 0; i < 16; i++) + u.u8[i] = ((uint8 *)addr)[i]; + } return u.val; } diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index 6dc9351e03..d1be86f081 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -5843,60 +5843,58 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, PUT_V128_TO_ADDR(frame_lp + addr_ret, LOAD_V128(maddr)); break; } -#define SIMD_LOAD_OP(simde_func, element_size, num_elements) \ - do { \ - uint32 offset, addr; \ - offset = read_uint32(frame_ip); \ - addr = GET_OPERAND(uint32, I32, 0); \ - frame_ip += 2; \ - addr_ret = GET_OFFSET(); \ - CHECK_MEMORY_OVERFLOW(16); \ - \ - simde_v128_t simde_result = simde_func(maddr); \ - \ - V128 result; \ - SIMDE_V128_TO_SIMD_V128(simde_result, result); \ - PUT_V128_TO_ADDR(frame_lp + addr_ret, result); \ - \ +#define SIMD_LOAD_OP(simde_func) \ + do { \ + uint32 offset, addr; \ + offset = read_uint32(frame_ip); \ + addr = POP_I32(); \ + addr_ret = GET_OFFSET(); \ + CHECK_MEMORY_OVERFLOW(8); \ + \ + simde_v128_t simde_result = simde_func(maddr); \ + \ + V128 result; \ + SIMDE_V128_TO_SIMD_V128(simde_result, result); \ + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); \ + \ } while (0) case SIMD_v128_load8x8_s: { - SIMD_LOAD_OP(simde_wasm_i16x8_load8x8, 16, 8); + SIMD_LOAD_OP(simde_wasm_i16x8_load8x8); break; } case SIMD_v128_load8x8_u: { - SIMD_LOAD_OP(simde_wasm_u16x8_load8x8, 16, 8); + SIMD_LOAD_OP(simde_wasm_u16x8_load8x8); break; } case SIMD_v128_load16x4_s: { - SIMD_LOAD_OP(simde_wasm_i32x4_load16x4, 32, 4); + SIMD_LOAD_OP(simde_wasm_i32x4_load16x4); break; } case SIMD_v128_load16x4_u: { - SIMD_LOAD_OP(simde_wasm_u32x4_load16x4, 32, 4); + SIMD_LOAD_OP(simde_wasm_u32x4_load16x4); break; } case SIMD_v128_load32x2_s: { - SIMD_LOAD_OP(simde_wasm_i64x2_load32x2, 64, 2); + SIMD_LOAD_OP(simde_wasm_i64x2_load32x2); break; } case SIMD_v128_load32x2_u: { - SIMD_LOAD_OP(simde_wasm_u64x2_load32x2, 64, 2); + SIMD_LOAD_OP(simde_wasm_u64x2_load32x2); break; } -#define SIMD_LOAD_SPLAT_OP(simde_func) \ +#define SIMD_LOAD_SPLAT_OP(simde_func, width) \ do { \ uint32 offset, addr; \ offset = read_uint32(frame_ip); \ - addr = GET_OPERAND(uint32, I32, 0); \ - frame_ip += 2; \ + addr = POP_I32(); \ addr_ret = GET_OFFSET(); \ - CHECK_MEMORY_OVERFLOW(4); \ + CHECK_MEMORY_OVERFLOW(width / 8); \ \ simde_v128_t simde_result = simde_func(maddr); \ \ @@ -5908,22 +5906,22 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, case SIMD_v128_load8_splat: { - SIMD_LOAD_SPLAT_OP(simde_wasm_v128_load8_splat); + SIMD_LOAD_SPLAT_OP(simde_wasm_v128_load8_splat, 8); break; } case SIMD_v128_load16_splat: { - SIMD_LOAD_SPLAT_OP(simde_wasm_v128_load16_splat); + SIMD_LOAD_SPLAT_OP(simde_wasm_v128_load16_splat, 16); break; } case SIMD_v128_load32_splat: { - SIMD_LOAD_SPLAT_OP(simde_wasm_v128_load32_splat); + SIMD_LOAD_SPLAT_OP(simde_wasm_v128_load32_splat, 32); break; } case SIMD_v128_load64_splat: { - SIMD_LOAD_SPLAT_OP(simde_wasm_v128_load64_splat); + SIMD_LOAD_SPLAT_OP(simde_wasm_v128_load64_splat, 64); break; } case SIMD_v128_store: @@ -5933,9 +5931,6 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, V128 data = POP_V128(); addr = POP_I32(); - V128 data; - data = POP_V128(); - CHECK_MEMORY_OVERFLOW(16); STORE_V128(maddr, data); break; @@ -5952,7 +5947,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, PUT_V128_TO_ADDR(frame_lp + addr_ret, *(V128 *)orig_ip); break; } - // TODO: Add a faster SIMD implementation + /* TODO: Add a faster SIMD implementation */ case SIMD_v8x16_shuffle: { V128 indices; @@ -6053,15 +6048,22 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, SIMD_SPLAT_OP_F64(simde_wasm_f64x2_splat); break; } +#if WASM_CPU_SUPPORTS_UNALIGNED_ADDR_ACCESS != 0 +#define SIMD_LANE_HANDLE_UNALIGNED_ACCESS() +#else +#define SIMD_LANE_HANDLE_UNALIGNED_ACCESS() *frame_ip++; +#endif #define SIMD_EXTRACT_LANE_OP(register, return_type, push_elem) \ do { \ uint8 lane = *frame_ip++; \ + SIMD_LANE_HANDLE_UNALIGNED_ACCESS(); \ V128 v = POP_V128(); \ push_elem((return_type)(v.register[lane])); \ } while (0) #define SIMD_REPLACE_LANE_OP(register, return_type, pop_elem) \ do { \ uint8 lane = *frame_ip++; \ + SIMD_LANE_HANDLE_UNALIGNED_ACCESS(); \ return_type replacement = pop_elem(); \ V128 v = POP_V128(); \ v.register[lane] = replacement; \ @@ -6482,6 +6484,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, V128 vec = POP_V128(); \ addr = POP_I32(); \ int lane = *frame_ip++; \ + SIMD_LANE_HANDLE_UNALIGNED_ACCESS(); \ SIMD_LOAD_LANE_COMMON(vec, register, lane, width); \ } while (0) @@ -6512,6 +6515,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, V128 vec = POP_V128(); \ addr = POP_I32(); \ int lane = *frame_ip++; \ + SIMD_LANE_HANDLE_UNALIGNED_ACCESS(); \ CHECK_MEMORY_OVERFLOW(width / 8); \ if (width == 64) { \ STORE_I64(maddr, vec.register[lane]); \ @@ -6659,7 +6663,6 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, SIMD_SINGLE_OP(simde_wasm_f32x4_nearest); break; } -// TODO: Check count? #define SIMD_LANE_SHIFT(simde_func) \ do { \ int32 count = POP_I32(); \ @@ -7131,8 +7134,6 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, SIMD_SINGLE_OP(simde_wasm_u64x2_extend_high_u32x4); break; } - - // TODO: Verify count works case SIMD_i64x2_shl: { SIMD_LANE_SHIFT(simde_wasm_i64x2_shl); From dea2e168123a09aabc9df3aee8d8524af3591324 Mon Sep 17 00:00:00 2001 From: James Marsh Date: Fri, 21 Feb 2025 14:44:12 +0000 Subject: [PATCH 29/32] Resolve merge conflicts arising from main -> dev/simd_for_interp and implement fast interpreter const offset loader support for V128 --- .github/workflows/compilation_on_sgx.yml | 2 +- build-scripts/config_common.cmake | 20 +-- core/iwasm/interpreter/wasm_loader.c | 204 +++++++++++------------ 3 files changed, 107 insertions(+), 119 deletions(-) diff --git a/.github/workflows/compilation_on_sgx.yml b/.github/workflows/compilation_on_sgx.yml index 70597c366a..3233d35e4d 100644 --- a/.github/workflows/compilation_on_sgx.yml +++ b/.github/workflows/compilation_on_sgx.yml @@ -49,7 +49,7 @@ env: # ref types enabled in wamrc by default, so we need to enable it for iwasm in AOT mode AOT_BUILD_OPTIONS: "-DWAMR_BUILD_AOT=1 -DWAMR_BUILD_FAST_INTERP=0 -DWAMR_BUILD_INTERP=0 -DWAMR_BUILD_JIT=0 -DWAMR_BUILD_LAZY_JIT=0 -DWAMR_BUILD_REF_TYPES=1" CLASSIC_INTERP_BUILD_OPTIONS: "-DWAMR_BUILD_AOT=0 -DWAMR_BUILD_FAST_INTERP=0 -DWAMR_BUILD_INTERP=1 -DWAMR_BUILD_JIT=0 -DWAMR_BUILD_LAZY_JIT=0" - FAST_INTERP_BUILD_OPTIONS: "-DWAMR_BUILD_AOT=0 -DWAMR_BUILD_FAST_INTERP=1 -DWAMR_BUILD_INTERP=1 -DWAMR_BUILD_JIT=0 -DWAMR_BUILD_LAZY_JIT=0" + FAST_INTERP_BUILD_OPTIONS: "-DWAMR_BUILD_AOT=0 -DWAMR_BUILD_FAST_INTERP=1 -DWAMR_BUILD_INTERP=1 -DWAMR_BUILD_JIT=0 -DWAMR_BUILD_LAZY_JIT=0 -DWAMR_BUILD_SIMD=0" FAST_JIT_BUILD_OPTIONS: "-DWAMR_BUILD_AOT=1 -DWAMR_BUILD_FAST_INTERP=0 -DWAMR_BUILD_INTERP=1 -DWAMR_BUILD_FAST_JIT=1 -DWAMR_BUILD_JIT=0 -DWAMR_BUILD_LAZY_JIT=1" LLVM_LAZY_JIT_BUILD_OPTIONS: "-DWAMR_BUILD_AOT=1 -DWAMR_BUILD_FAST_INTERP=0 -DWAMR_BUILD_INTERP=0 -DWAMR_BUILD_JIT=1 -DWAMR_BUILD_LAZY_JIT=1" LLVM_EAGER_JIT_BUILD_OPTIONS: "-DWAMR_BUILD_AOT=1 -DWAMR_BUILD_FAST_INTERP=0 -DWAMR_BUILD_INTERP=0 -DWAMR_BUILD_JIT=1 -DWAMR_BUILD_LAZY_JIT=0" diff --git a/build-scripts/config_common.cmake b/build-scripts/config_common.cmake index 8a8155231d..f1001afe5a 100644 --- a/build-scripts/config_common.cmake +++ b/build-scripts/config_common.cmake @@ -300,11 +300,9 @@ endif () if (WAMR_BUILD_LIB_RATS EQUAL 1) message (" Lib rats enabled") endif() -<<<<<<< HEAD if ((WAMR_BUILD_LIB_SIMDE EQUAL 1)) message (" Lib simde enabled") endif() -======= ################## WAMR features ################## if (WAMR_BUILD_MULTI_MODULE EQUAL 1) add_definitions (-DWASM_ENABLE_MULTI_MODULE=1) @@ -341,7 +339,6 @@ if (WAMR_BUILD_MULTI_MEMORY EQUAL 1) add_definitions (-DWASM_ENABLE_MULTI_MEMORY=1) set (WAMR_BUILD_DEBUG_INTERP 0) endif () ->>>>>>> original/main if (WAMR_BUILD_MINI_LOADER EQUAL 1) add_definitions (-DWASM_ENABLE_MINI_LOADER=1) message (" WASM mini loader enabled") @@ -369,19 +366,12 @@ else () message (" Wakeup of blocking operations enabled") endif () if (WAMR_BUILD_SIMD EQUAL 1) -<<<<<<< HEAD - set(SIMD_ENABLED 0) - if (WAMR_BUILD_TARGET MATCHES "RISCV64.*") - set(WAMR_BUILD_SIMD 0) -======= - if (NOT WAMR_BUILD_TARGET MATCHES "RISCV64.*") - add_definitions (-DWASM_ENABLE_SIMD=1) - else () ->>>>>>> original/main + if (WAMR_BUILD_FAST_INTERP EQUAL 1 AND WAMR_BUILD_SIMDE EQUAL 0) + set(SIMD_ENABLED 0) + message(" SIMD disabled for fast-interp as simde is not being built") + elseif (WAMR_BUILD_TARGET MATCHES "RISCV64.*") + set(SIMD_ENABLED 0) message (" SIMD disabled due to not supported on target RISCV64") - elseif (WAMR_BUILD_FAST_INTERP EQUAL 1 AND WAMR_BUILD_SIMDE EQUAL 0) - set(WAMR_BUILD_SIMD 0) - message(" SIMD disabled as the simde is not built in fast interpreter mode") else() set(SIMD_ENABLED 1) message (" SIMD enabled") diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index ac5622025a..630a64ea31 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -7999,6 +7999,10 @@ typedef struct WASMLoaderContext { int32 *i32_consts; uint32 i32_const_max_num; uint32 i32_const_num; + /* const buffer for V128 */ + V128 *v128_consts; + uint32 v128_const_max_num; + uint32 v128_const_num; /* processed code */ uint8 *p_code_compiled; @@ -8232,6 +8236,8 @@ wasm_loader_ctx_destroy(WASMLoaderContext *ctx) wasm_runtime_free(ctx->i64_consts); if (ctx->i32_consts) wasm_runtime_free(ctx->i32_consts); + if (ctx->v128_consts) + wasm_runtime_free(ctx->v128_consts); #endif wasm_runtime_free(ctx); } @@ -8289,6 +8295,11 @@ wasm_loader_ctx_init(WASMFunction *func, char *error_buf, uint32 error_buf_size) loader_malloc(sizeof(int32) * loader_ctx->i32_const_max_num, error_buf, error_buf_size))) goto fail; + loader_ctx->v128_const_max_num = 8; + if (!(loader_ctx->v128_consts = + loader_malloc(sizeof(V128) * loader_ctx->v128_const_max_num, + error_buf, error_buf_size))) + goto fail; if (func->param_cell_num >= (int32)INT16_MAX - func->local_cell_num) { set_error_buf(error_buf, error_buf_size, @@ -9569,6 +9580,15 @@ cmp_i32_const(const void *p_i32_const1, const void *p_i32_const2) return (i32_const1 < i32_const2) ? -1 : (i32_const1 > i32_const2) ? 1 : 0; } +static int +cmp_v128_const(const void *p_v128_const1, const void *p_v128_const2) +{ + V128 v128_const1 = *(V128 *)p_v128_const1; + V128 v128_const2 = *(V128 *)p_v128_const2; + + return memcmp(&v128_const1, &v128_const2, sizeof(V128)); +} + static bool wasm_loader_get_const_offset(WASMLoaderContext *ctx, uint8 type, void *value, int16 *offset, char *error_buf, @@ -9584,39 +9604,6 @@ wasm_loader_get_const_offset(WASMLoaderContext *ctx, uint8 type, void *value, return true; } -<<<<<<< HEAD - /* Search existing constant */ - for (c = (Const *)ctx->const_buf; - (uint8 *)c < ctx->const_buf + ctx->num_const * sizeof(Const); c++) { - /* TODO: handle v128 type? */ - if ((type == c->value_type) - && ((type == VALUE_TYPE_I64 && *(int64 *)value == c->value.i64) - || (type == VALUE_TYPE_I32 && *(int32 *)value == c->value.i32) -#if WASM_ENABLE_REF_TYPES != 0 && WASM_ENABLE_GC == 0 - || (type == VALUE_TYPE_FUNCREF - && *(int32 *)value == c->value.i32) - || (type == VALUE_TYPE_EXTERNREF - && *(int32 *)value == c->value.i32) -#endif - || (type == VALUE_TYPE_V128 - && (0 == memcmp(value, &(c->value.v128), sizeof(V128)))) - || (type == VALUE_TYPE_F64 - && (0 == memcmp(value, &(c->value.f64), sizeof(float64)))) - || (type == VALUE_TYPE_F32 - && (0 - == memcmp(value, &(c->value.f32), sizeof(float32)))))) { - operand_offset = c->slot_index; - break; - } - if (is_32bit_type(c->value_type)) - operand_offset += 1; - else if (c->value_type == VALUE_TYPE_V128) { - operand_offset += 4; - } - else - operand_offset += 2; - } -======= /* Traverse the list if the const num is small */ if (ctx->i64_const_num < 10) { for (uint32 i = 0; i < ctx->i64_const_num; i++) { @@ -9626,7 +9613,6 @@ wasm_loader_get_const_offset(WASMLoaderContext *ctx, uint8 type, void *value, } } } ->>>>>>> original/main if (ctx->i64_const_num >= ctx->i64_const_max_num) { MEM_REALLOC(ctx->i64_consts, @@ -9636,6 +9622,32 @@ wasm_loader_get_const_offset(WASMLoaderContext *ctx, uint8 type, void *value, } ctx->i64_consts[ctx->i64_const_num++] = *(int64 *)value; } + else if (type == VALUE_TYPE_V128) { + /* No slot left, emit const instead */ + if (ctx->v128_const_num * 4 > INT16_MAX - 2) { + *offset = 0; + return true; + } + + /* Traverse the list if the const num is small */ + if (ctx->v128_const_num < 10) { + for (uint32 i = 0; i < ctx->v128_const_num; i++) { + if (memcmp(&ctx->v128_consts[i], value, sizeof(V128)) + == 0) { + *offset = -1; + return true; + } + } + } + + if (ctx->v128_const_num >= ctx->v128_const_max_num) { + MEM_REALLOC(ctx->v128_consts, + sizeof(V128) * ctx->v128_const_max_num, + sizeof(V128) * (ctx->v128_const_max_num * 2)); + ctx->v128_const_max_num *= 2; + } + ctx->v128_consts[ctx->v128_const_num++] = *(V128 *)value; + } else { /* Treat i32 and f32 as the same by reading i32 value from the raw bytes */ @@ -9666,65 +9678,6 @@ wasm_loader_get_const_offset(WASMLoaderContext *ctx, uint8 type, void *value, ctx->i32_consts[ctx->i32_const_num++] = *(int32 *)value; } -<<<<<<< HEAD - /* The max cell num of const buffer is 32768 since the valid index range - * is -32768 ~ -1. Return an invalid index 0 to indicate the buffer is - * full */ - if (ctx->const_cell_num > INT16_MAX - bytes_to_increase + 1) { - *offset = 0; - return true; - } - - if ((uint8 *)c == ctx->const_buf + ctx->const_buf_size) { - MEM_REALLOC(ctx->const_buf, ctx->const_buf_size, - ctx->const_buf_size + 4 * sizeof(Const)); - ctx->const_buf_size += 4 * sizeof(Const); - c = (Const *)(ctx->const_buf + ctx->num_const * sizeof(Const)); - } - c->value_type = type; - switch (type) { - case VALUE_TYPE_F64: - bh_memcpy_s(&(c->value.f64), sizeof(WASMValue), value, - sizeof(float64)); - ctx->const_cell_num += 2; - /* The const buf will be reversed, we use the second cell */ - /* of the i64/f64 const so the final offset is correct */ - operand_offset++; - break; - case VALUE_TYPE_I64: - c->value.i64 = *(int64 *)value; - ctx->const_cell_num += 2; - operand_offset++; - break; - case VALUE_TYPE_F32: - bh_memcpy_s(&(c->value.f32), sizeof(WASMValue), value, - sizeof(float32)); - ctx->const_cell_num++; - break; - case VALUE_TYPE_I32: - c->value.i32 = *(int32 *)value; - ctx->const_cell_num++; - break; - case VALUE_TYPE_V128: - bh_memcpy_s(&(c->value.v128), sizeof(WASMValue), value, - sizeof(V128)); - ctx->const_cell_num++; - break; -#if WASM_ENABLE_REF_TYPES != 0 && WASM_ENABLE_GC == 0 - case VALUE_TYPE_EXTERNREF: - case VALUE_TYPE_FUNCREF: - c->value.i32 = *(int32 *)value; - ctx->const_cell_num++; - break; -#endif - default: - break; - } - c->slot_index = operand_offset; - ctx->num_const++; - LOG_OP("#### new const [%d]: %ld\n", ctx->num_const, - (int64)c->value.i64); -======= *offset = -1; return true; } @@ -9740,6 +9693,17 @@ wasm_loader_get_const_offset(WASMLoaderContext *ctx, uint8 type, void *value, *offset = -(uint32)(ctx->i64_const_num * 2 + ctx->i32_const_num) + (uint32)(i64_const - ctx->i64_consts) * 2; } + else if (type == VALUE_TYPE_V128) { + V128 key = *(V128 *)value, *v128_const; + v128_const = bsearch(&key, ctx->v128_consts, ctx->v128_const_num, + sizeof(V128), cmp_v128_const); + if (!v128_const) { /* not found, emit const instead */ + *offset = 0; + return true; + } + *offset = -(uint32)(ctx->v128_const_num) + + (uint32)(v128_const - ctx->v128_consts); + } else { int32 key = *(int32 *)value, *i32_const; i32_const = bsearch(&key, ctx->i32_consts, ctx->i32_const_num, @@ -9753,7 +9717,6 @@ wasm_loader_get_const_offset(WASMLoaderContext *ctx, uint8 type, void *value, } return true; ->>>>>>> original/main } fail: return false; @@ -11354,6 +11317,39 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, } } + if (loader_ctx->v128_const_num > 0) { + V128 *v128_consts_old = loader_ctx->v128_consts; + + /* Sort the v128 consts */ + qsort(v128_consts_old, loader_ctx->v128_const_num, sizeof(V128), + cmp_v128_const); + + /* Remove the duplicated v128 consts */ + uint32 k = 1; + for (i = 1; i < loader_ctx->v128_const_num; i++) { + if (!(memcmp(&v128_consts_old[i], &v128_consts_old[i - 1], + sizeof(V128)) + == 0)) { + v128_consts_old[k++] = v128_consts_old[i]; + } + } + + if (k < loader_ctx->v128_const_num) { + V128 *v128_consts_new; + /* Try to reallocate memory with a smaller size */ + if ((v128_consts_new = + wasm_runtime_malloc((uint32)sizeof(V128) * k))) { + bh_memcpy_s(v128_consts_new, (uint32)sizeof(V128) * k, + v128_consts_old, (uint32)sizeof(V128) * k); + /* Free the old memory */ + wasm_runtime_free(v128_consts_old); + loader_ctx->v128_consts = v128_consts_new; + loader_ctx->v128_const_max_num = k; + } + loader_ctx->v128_const_num = k; + } + } + if (loader_ctx->i32_const_num > 0) { int32 *i32_consts_old = loader_ctx->i32_consts; @@ -15856,16 +15852,11 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, goto fail; } -<<<<<<< HEAD - read_leb_mem_offset(p, p_end, mem_offset); /* offset */ + pb_read_leb_mem_offset(p, p_end, + mem_offset); /* offset */ #if WASM_ENABLE_FAST_INTERP != 0 emit_uint32(loader_ctx, mem_offset); #endif -======= - pb_read_leb_mem_offset(p, p_end, - mem_offset); /* offset */ - ->>>>>>> original/main POP_AND_PUSH(mem_offset_type, VALUE_TYPE_V128); #if WASM_ENABLE_JIT != 0 || WASM_ENABLE_WAMR_COMPILER != 0 func->has_memory_operations = true; @@ -16395,8 +16386,9 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, if (loader_ctx->p_code_compiled == NULL) goto re_scan; - func->const_cell_num = - loader_ctx->i64_const_num * 2 + loader_ctx->i32_const_num; + func->const_cell_num = loader_ctx->i64_const_num * 2 + + loader_ctx->v128_const_num * 4 + + loader_ctx->i32_const_num; if (func->const_cell_num > 0) { if (!(func->consts = loader_malloc((uint64)sizeof(uint32) * func->const_cell_num, @@ -16415,6 +16407,12 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, loader_ctx->i32_consts, (uint32)sizeof(int32) * loader_ctx->i32_const_num); } + if (loader_ctx->v128_const_num > 0) { + bh_memcpy_s(func->consts, + (uint32)sizeof(V128) * loader_ctx->v128_const_num, + loader_ctx->v128_consts, + (uint32)sizeof(V128) * loader_ctx->v128_const_num); + } } func->max_stack_cell_num = loader_ctx->preserved_local_offset From 05a25df6cd016cc5b319b6b565e34061281edd61 Mon Sep 17 00:00:00 2001 From: James Marsh Date: Wed, 19 Feb 2025 14:17:40 +0000 Subject: [PATCH 30/32] Enable SIMDe tests on CI --- .github/workflows/compilation_on_android_ubuntu.yml | 9 +++------ .github/workflows/compilation_on_sgx.yml | 2 +- build-scripts/runtime_lib.cmake | 4 ++-- tests/wamr-test-suites/test_wamr.sh | 4 ++-- 4 files changed, 8 insertions(+), 11 deletions(-) diff --git a/.github/workflows/compilation_on_android_ubuntu.yml b/.github/workflows/compilation_on_android_ubuntu.yml index 057082ebc5..83cd154afe 100644 --- a/.github/workflows/compilation_on_android_ubuntu.yml +++ b/.github/workflows/compilation_on_android_ubuntu.yml @@ -158,6 +158,7 @@ jobs: "-DWAMR_BUILD_PERF_PROFILING=1", "-DWAMR_BUILD_REF_TYPES=1", "-DWAMR_BUILD_SIMD=1", + "-DWAMR_BUILD_LIB_SIMDE=1", "-DWAMR_BUILD_TAIL_CALL=1", "-DWAMR_DISABLE_HW_BOUND_CHECK=1", "-DWAMR_BUILD_MEMORY64=1", @@ -178,11 +179,9 @@ jobs: make_options_feature: "-DWAMR_BUILD_MULTI_MODULE=1" - make_options_run_mode: $MULTI_TIER_JIT_BUILD_OPTIONS make_options_feature: "-DWAMR_BUILD_MULTI_MODULE=1" - # SIMD only on JIT/AOT mode + # SIMD only on JIT/AOT/fast interpreter mode - make_options_run_mode: $CLASSIC_INTERP_BUILD_OPTIONS make_options_feature: "-DWAMR_BUILD_SIMD=1" - - make_options_run_mode: $FAST_INTERP_BUILD_OPTIONS - make_options_feature: "-DWAMR_BUILD_SIMD=1" # DEBUG_INTERP only on CLASSIC INTERP mode - make_options_run_mode: $AOT_BUILD_OPTIONS make_options_feature: "-DWAMR_BUILD_DEBUG_INTERP=1" @@ -649,11 +648,9 @@ jobs: test_option: $WAMR_COMPILER_TEST_OPTIONS exclude: # incompatible modes and features - # classic-interp and fast-interp don't support simd + # classic-interp doesn't support simd - running_mode: "classic-interp" test_option: $SIMD_TEST_OPTIONS - - running_mode: "fast-interp" - test_option: $SIMD_TEST_OPTIONS # llvm jit doesn't support multi module - running_mode: "jit" test_option: $MULTI_MODULES_TEST_OPTIONS diff --git a/.github/workflows/compilation_on_sgx.yml b/.github/workflows/compilation_on_sgx.yml index 3233d35e4d..b865a59fe2 100644 --- a/.github/workflows/compilation_on_sgx.yml +++ b/.github/workflows/compilation_on_sgx.yml @@ -97,7 +97,7 @@ jobs: "-DWAMR_BUILD_PERF_PROFILING=1", "-DWAMR_BUILD_REF_TYPES=1", # doesn't support - # "-DWAMR_BUILD_SIMD=1", + "-DWAMR_BUILD_SIMD=0", "-DWAMR_BUILD_TAIL_CALL=1", "-DWAMR_DISABLE_HW_BOUND_CHECK=1", "-DWAMR_BUILD_SGX_IPFS=1", diff --git a/build-scripts/runtime_lib.cmake b/build-scripts/runtime_lib.cmake index ec3a370d61..cf778ad981 100644 --- a/build-scripts/runtime_lib.cmake +++ b/build-scripts/runtime_lib.cmake @@ -156,8 +156,8 @@ if (WAMR_BUILD_LIB_RATS EQUAL 1) endif () if (WAMR_BUILD_SIMD EQUAL 1 AND WAMR_BUILD_FAST_INTERP EQUAL 1) - if (NOT (WAMR_BUILD_TARGET MATCHES "AARCH64.*" OR WAMR_BUILD_TARGET MATCHES "ARM.*")) - message(STATUS "SIMDe doesnt support platform " ${WAMR_BUILD_TARGET}) + if (WAMR_BUILD_PLATFORM STREQUAL "windows") + message(STATUS "SIMDe doesnt support platform " ${WAMR_BUILD_PLATFORM}) set(WAMR_BUILD_SIMDE 0) else() include (${IWASM_DIR}/libraries/simde/simde.cmake) diff --git a/tests/wamr-test-suites/test_wamr.sh b/tests/wamr-test-suites/test_wamr.sh index 31f8b3746b..183033751a 100755 --- a/tests/wamr-test-suites/test_wamr.sh +++ b/tests/wamr-test-suites/test_wamr.sh @@ -913,8 +913,8 @@ function do_execute_in_running_mode() fi if [[ ${ENABLE_SIMD} -eq 1 ]]; then - if [[ "${RUNNING_MODE}" != "jit" && "${RUNNING_MODE}" != "aot" ]]; then - echo "support simd in llvm-jit mode and aot mode" + if [[ "${RUNNING_MODE}" != "jit" && "${RUNNING_MODE}" != "aot" && "${RUNNING_MODE}" != "fast-interp" ]]; then + echo "support simd in llvm-jit, aot and fast-interp mode" return 0; fi fi From 418195af609f0dc105213f918d8d8b762eb30214 Mon Sep 17 00:00:00 2001 From: James Marsh Date: Mon, 17 Mar 2025 15:36:41 +0000 Subject: [PATCH 31/32] Apply clang-format to dev/simd_for_interp --- core/iwasm/interpreter/wasm_interp_fast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index ae076ca870..817ce1aeaf 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -7488,7 +7488,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, #if WASM_ENABLE_LABELS_AS_VALUES == 0 continue; #else - FETCH_OPCODE_AND_DISPATCH(); + FETCH_OPCODE_AND_DISPATCH(); #endif #if WASM_ENABLE_TAIL_CALL != 0 || WASM_ENABLE_GC != 0 From e9d25f97a179fbf44c3f51b1145459ce09b879ea Mon Sep 17 00:00:00 2001 From: James Marsh Date: Tue, 18 Mar 2025 07:12:31 +0000 Subject: [PATCH 32/32] Document WAMR_BUILD_LIB_SIMDE --- doc/build_wamr.md | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/doc/build_wamr.md b/doc/build_wamr.md index cde884457b..cdbeb10f2d 100644 --- a/doc/build_wamr.md +++ b/doc/build_wamr.md @@ -132,7 +132,11 @@ cmake -DWAMR_BUILD_PLATFORM=linux -DWAMR_BUILD_TARGET=ARM ### **Enable 128-bit SIMD feature** - **WAMR_BUILD_SIMD**=1/0, default to enable if not set -> Note: only supported in AOT mode x86-64 target. +> Note: supported in AOT mode, JIT mode, and fast-interpreter mode with SIMDe library. + +### **Enable SIMDe library for SIMD in fast interpreter** +- **WAMR_BUILD_LIB_SIMDE**=1/0, default to disable if not set +> Note: If enabled, SIMDe (SIMD Everywhere) library will be used to implement SIMD operations in fast interpreter mode. ### **Enable Exception Handling** - **WAMR_BUILD_EXCE_HANDLING**=1/0, default to disable if not set @@ -332,4 +336,11 @@ Or if we want to enable interpreter, disable AOT and WASI, and build as X86_32, ``` Bash cmake .. -DWAMR_BUILD_INTERP=1 -DWAMR_BUILD_AOT=0 -DWAMR_BUILD_LIBC_WASI=0 -DWAMR_BUILD_TARGET=X86_32 -``` \ No newline at end of file +``` + +When enabling SIMD for fast interpreter mode, you'll need to enable both SIMD and the SIMDe library: + +``` Bash + +cmake .. -DWAMR_BUILD_INTERP=1 -DWAMR_BUILD_FAST_INTERP=1 -DWAMR_BUILD_SIMD=1 -DWAMR_BUILD_LIB_SIMDE=1 +```