diff --git a/CMakeLists.txt b/CMakeLists.txt index e5538569..ec36f9da 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,10 +1,25 @@ cmake_minimum_required(VERSION 3.15) + +if(POLICY CMP0194) + cmake_policy(SET CMP0194 NEW) +endif() + project(async_simple VERSION 1.3 - LANGUAGES CXX C ASM) + LANGUAGES C CXX) + +if(MSVC) + enable_language(ASM_MASM) +else() + enable_language(ASM) +endif() set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +if(WIN32) + set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) +endif() + enable_testing() set(CMAKE_CXX_STANDARD 20) @@ -158,6 +173,11 @@ elseif ("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin") # uname -s set(UTHREAD ON) message("-- Uthread on") endif() +elseif("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows") + if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "AMD64" AND MSVC) + set(UTHREAD ON) + message("-- Uthread on") + endif() endif() # End Detecing Uthread diff --git a/async_simple/CMakeLists.txt b/async_simple/CMakeLists.txt index 722e818c..aac5627f 100644 --- a/async_simple/CMakeLists.txt +++ b/async_simple/CMakeLists.txt @@ -3,7 +3,11 @@ if(${UTHREAD}) if (CMAKE_BUILD_TYPE STREQUAL "Debug" AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "13") endif() -file(GLOB uthread_asm_src "uthread/internal/${CMAKE_SYSTEM_NAME}/${CMAKE_SYSTEM_PROCESSOR}/*.S") + if(MSVC) + file(GLOB uthread_asm_src "uthread/internal/${CMAKE_SYSTEM_NAME}/${CMAKE_SYSTEM_PROCESSOR}/*.asm") + else() + file(GLOB uthread_asm_src "uthread/internal/${CMAKE_SYSTEM_NAME}/${CMAKE_SYSTEM_PROCESSOR}/*.S") + endif() endif() file(GLOB headers "*.h") diff --git a/async_simple/uthread/internal/Windows/AMD64/jump_x86_64_ms_pe_masm.asm b/async_simple/uthread/internal/Windows/AMD64/jump_x86_64_ms_pe_masm.asm new file mode 100644 index 00000000..6eb3a89c --- /dev/null +++ b/async_simple/uthread/internal/Windows/AMD64/jump_x86_64_ms_pe_masm.asm @@ -0,0 +1,201 @@ + +; Copyright Oliver Kowalke 2009. +; Distributed under the Boost Software License, Version 1.0. +; (See accompanying file LICENSE_1_0.txt or copy at +; http://www.boost.org/LICENSE_1_0.txt) + +; ---------------------------------------------------------------------------------- +; | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | +; ---------------------------------------------------------------------------------- +; | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | +; ---------------------------------------------------------------------------------- +; | SEE registers (XMM6-XMM15) | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | +; ---------------------------------------------------------------------------------- +; | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c | +; ---------------------------------------------------------------------------------- +; | SEE registers (XMM6-XMM15) | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | +; ---------------------------------------------------------------------------------- +; | 0xe40 | 0x44 | 0x48 | 0x4c | 0x50 | 0x54 | 0x58 | 0x5c | +; ---------------------------------------------------------------------------------- +; | SEE registers (XMM6-XMM15) | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | +; ---------------------------------------------------------------------------------- +; | 0x60 | 0x64 | 0x68 | 0x6c | 0x70 | 0x74 | 0x78 | 0x7c | +; ---------------------------------------------------------------------------------- +; | SEE registers (XMM6-XMM15) | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 32 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | +; ---------------------------------------------------------------------------------- +; | 0x80 | 0x84 | 0x88 | 0x8c | 0x90 | 0x94 | 0x98 | 0x9c | +; ---------------------------------------------------------------------------------- +; | SEE registers (XMM6-XMM15) | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | +; ---------------------------------------------------------------------------------- +; | 0xa0 | 0xa4 | 0xa8 | 0xac | 0xb0 | 0xb4 | 0xb8 | 0xbc | +; ---------------------------------------------------------------------------------- +; | fc_mxcsr|fc_x87_cw| | fbr_strg | fc_dealloc | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | +; ---------------------------------------------------------------------------------- +; | 0xc0 | 0xc4 | 0xc8 | 0xcc | 0xd0 | 0xd4 | 0xd8 | 0xdc | +; ---------------------------------------------------------------------------------- +; | limit | base | R12 | R13 | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | +; ---------------------------------------------------------------------------------- +; | 0xe0 | 0xe4 | 0xe8 | 0xec | 0xf0 | 0xf4 | 0xf8 | 0xfc | +; ---------------------------------------------------------------------------------- +; | R14 | R15 | RDI | RSI | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | +; ---------------------------------------------------------------------------------- +; | 0x100 | 0x104 | 0x108 | 0x10c | 0x110 | 0x114 | 0x118 | 0x11c | +; ---------------------------------------------------------------------------------- +; | RBX | RBP | hidden | RIP | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | +; ---------------------------------------------------------------------------------- +; | 0x120 | 0x124 | 0x128 | 0x12c | 0x130 | 0x134 | 0x138 | 0x13c | +; ---------------------------------------------------------------------------------- +; | parameter area | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | +; ---------------------------------------------------------------------------------- +; | 0x140 | 0x144 | 0x148 | 0x14c | 0x150 | 0x154 | 0x158 | 0x15c | +; ---------------------------------------------------------------------------------- +; | FCTX | DATA | | +; ---------------------------------------------------------------------------------- + +.code + +_fl_jump_fcontext PROC EXPORT FRAME + .endprolog + + ; prepare stack + lea rsp, [rsp-0118h] + + ; save XMM storage + movaps [rsp], xmm6 + movaps [rsp+010h], xmm7 + movaps [rsp+020h], xmm8 + movaps [rsp+030h], xmm9 + movaps [rsp+040h], xmm10 + movaps [rsp+050h], xmm11 + movaps [rsp+060h], xmm12 + movaps [rsp+070h], xmm13 + movaps [rsp+080h], xmm14 + movaps [rsp+090h], xmm15 + ; save MMX control- and status-word + stmxcsr [rsp+0a0h] + ; save x87 control-word + fnstcw [rsp+0a4h] + + ; load NT_TIB + mov r10, gs:[030h] + ; save fiber local storage + mov rax, [r10+020h] + mov [rsp+0b0h], rax + ; save current deallocation stack + mov rax, [r10+01478h] + mov [rsp+0b8h], rax + ; save current stack limit + mov rax, [r10+010h] + mov [rsp+0c0h], rax + ; save current stack base + mov rax, [r10+08h] + mov [rsp+0c8h], rax + + mov [rsp+0d0h], r12 ; save R12 + mov [rsp+0d8h], r13 ; save R13 + mov [rsp+0e0h], r14 ; save R14 + mov [rsp+0e8h], r15 ; save R15 + mov [rsp+0f0h], rdi ; save RDI + mov [rsp+0f8h], rsi ; save RSI + mov [rsp+0100h], rbx ; save RBX + mov [rsp+0108h], rbp ; save RBP + + mov [rsp+0110h], rcx ; save hidden address of transport_t + + ; preserve RSP (pointing to context-data) in R9 + mov r9, rsp + + ; restore RSP (pointing to context-data) from RDX + mov rsp, rdx + + ; restore XMM storage + movaps xmm6, [rsp] + movaps xmm7, [rsp+010h] + movaps xmm8, [rsp+020h] + movaps xmm9, [rsp+030h] + movaps xmm10, [rsp+040h] + movaps xmm11, [rsp+050h] + movaps xmm12, [rsp+060h] + movaps xmm13, [rsp+070h] + movaps xmm14, [rsp+080h] + movaps xmm15, [rsp+090h] + ; restore MMX control- and status-word + ldmxcsr [rsp+0a0h] + ; save x87 control-word + fldcw [rsp+0a4h] + + ; load NT_TIB + mov r10, gs:[030h] + ; restore fiber local storage + mov rax, [rsp+0b0h] + mov [r10+020h], rax + ; restore current deallocation stack + mov rax, [rsp+0b8h] + mov [r10+01478h], rax + ; restore current stack limit + mov rax, [rsp+0c0h] + mov [r10+010h], rax + ; restore current stack base + mov rax, [rsp+0c8h] + mov [r10+08h], rax + + mov r12, [rsp+0d0h] ; restore R12 + mov r13, [rsp+0d8h] ; restore R13 + mov r14, [rsp+0e0h] ; restore R14 + mov r15, [rsp+0e8h] ; restore R15 + mov rdi, [rsp+0f0h] ; restore RDI + mov rsi, [rsp+0f8h] ; restore RSI + mov rbx, [rsp+0100h] ; restore RBX + mov rbp, [rsp+0108h] ; restore RBP + + mov rax, [rsp+0110h] ; restore hidden address of transport_t + + ; prepare stack + lea rsp, [rsp+0118h] + + ; load return-address + pop r10 + + ; transport_t returned in RAX + ; return parent fcontext_t + mov [rax], r9 + ; return data + mov [rax+08h], r8 + + ; transport_t as 1.arg of context-function + mov rcx, rax + + ; indirect jump to context + jmp r10 +_fl_jump_fcontext ENDP +END diff --git a/async_simple/uthread/internal/Windows/AMD64/make_x86_64_ms_pe_masm.asm b/async_simple/uthread/internal/Windows/AMD64/make_x86_64_ms_pe_masm.asm new file mode 100644 index 00000000..c5206bf7 --- /dev/null +++ b/async_simple/uthread/internal/Windows/AMD64/make_x86_64_ms_pe_masm.asm @@ -0,0 +1,163 @@ + +; Copyright Oliver Kowalke 2009. +; Distributed under the Boost Software License, Version 1.0. +; (See accompanying file LICENSE_1_0.txt or copy at +; http://www.boost.org/LICENSE_1_0.txt) + +; ---------------------------------------------------------------------------------- +; | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | +; ---------------------------------------------------------------------------------- +; | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | +; ---------------------------------------------------------------------------------- +; | SEE registers (XMM6-XMM15) | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | +; ---------------------------------------------------------------------------------- +; | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c | +; ---------------------------------------------------------------------------------- +; | SEE registers (XMM6-XMM15) | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | +; ---------------------------------------------------------------------------------- +; | 0xe40 | 0x44 | 0x48 | 0x4c | 0x50 | 0x54 | 0x58 | 0x5c | +; ---------------------------------------------------------------------------------- +; | SEE registers (XMM6-XMM15) | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | +; ---------------------------------------------------------------------------------- +; | 0x60 | 0x64 | 0x68 | 0x6c | 0x70 | 0x74 | 0x78 | 0x7c | +; ---------------------------------------------------------------------------------- +; | SEE registers (XMM6-XMM15) | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 32 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | +; ---------------------------------------------------------------------------------- +; | 0x80 | 0x84 | 0x88 | 0x8c | 0x90 | 0x94 | 0x98 | 0x9c | +; ---------------------------------------------------------------------------------- +; | SEE registers (XMM6-XMM15) | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | +; ---------------------------------------------------------------------------------- +; | 0xa0 | 0xa4 | 0xa8 | 0xac | 0xb0 | 0xb4 | 0xb8 | 0xbc | +; ---------------------------------------------------------------------------------- +; | fc_mxcsr|fc_x87_cw| | fbr_strg | fc_dealloc | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | +; ---------------------------------------------------------------------------------- +; | 0xc0 | 0xc4 | 0xc8 | 0xcc | 0xd0 | 0xd4 | 0xd8 | 0xdc | +; ---------------------------------------------------------------------------------- +; | limit | base | R12 | R13 | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | +; ---------------------------------------------------------------------------------- +; | 0xe0 | 0xe4 | 0xe8 | 0xec | 0xf0 | 0xf4 | 0xf8 | 0xfc | +; ---------------------------------------------------------------------------------- +; | R14 | R15 | RDI | RSI | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | +; ---------------------------------------------------------------------------------- +; | 0x100 | 0x104 | 0x108 | 0x10c | 0x110 | 0x114 | 0x118 | 0x11c | +; ---------------------------------------------------------------------------------- +; | RBX | RBP | hidden | RIP | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | +; ---------------------------------------------------------------------------------- +; | 0x120 | 0x124 | 0x128 | 0x12c | 0x130 | 0x134 | 0x138 | 0x13c | +; ---------------------------------------------------------------------------------- +; | parameter area | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | +; ---------------------------------------------------------------------------------- +; | 0x140 | 0x144 | 0x148 | 0x14c | 0x150 | 0x154 | 0x158 | 0x15c | +; ---------------------------------------------------------------------------------- +; | FCTX | DATA | | +; ---------------------------------------------------------------------------------- + +; standard C library function +EXTERN _exit:PROC +.code + +; generate function table entry in .pdata and unwind information in +_fl_make_fcontext PROC EXPORT FRAME + ; .xdata for a function's structured exception handling unwind behavior + .endprolog + + ; first arg of _fl_make_fcontext() == top of context-stack + mov rax, rcx + + ; shift address in RAX to lower 16 byte boundary + ; == pointer to fcontext_t and address of context stack + and rax, -16 + + ; reserve space for context-data on context-stack + ; on context-function entry: (RSP -0x8) % 16 == 0 + sub rax, 0150h + + ; third arg of _fl_make_fcontext() == address of context-function + ; stored in RBX + mov [rax+0100h], r8 + + ; first arg of _fl_make_fcontext() == top of context-stack + ; save top address of context stack as 'base' + mov [rax+0c8h], rcx + ; second arg of _fl_make_fcontext() == size of context-stack + ; negate stack size for LEA instruction (== subtraction) + neg rdx + ; compute bottom address of context stack (limit) + lea rcx, [rcx+rdx] + ; save bottom address of context stack as 'limit' + mov [rax+0c0h], rcx + ; save address of context stack limit as 'dealloction stack' + mov [rax+0b8h], rcx + ; set fiber-storage to zero + xor rcx, rcx + mov [rax+0b0h], rcx + + ; save MMX control- and status-word + stmxcsr [rax+0a0h] + ; save x87 control-word + fnstcw [rax+0a4h] + + ; compute address of transport_t + lea rcx, [rax+0140h] + ; store address of transport_t in hidden field + mov [rax+0110h], rcx + + ; compute abs address of label trampoline + lea rcx, trampoline + ; save address of trampoline as return-address for context-function + ; will be entered after calling jump_fcontext() first time + mov [rax+0118h], rcx + + ; compute abs address of label finish + lea rcx, finish + ; save address of finish as return-address for context-function in RBP + ; will be entered after context-function returns + mov [rax+0108h], rcx + + ret ; return pointer to context-data + +trampoline: + ; store return address on stack + ; fix stack alignment + push rbp + ; jump to context-function + jmp rbx + +finish: + ; exit code is zero + xor rcx, rcx + ; exit application + call _exit + hlt +_fl_make_fcontext ENDP +END diff --git a/async_simple/uthread/internal/Windows/AMD64/ontop_x86_64_ms_pe_masm.asm b/async_simple/uthread/internal/Windows/AMD64/ontop_x86_64_ms_pe_masm.asm new file mode 100644 index 00000000..0c6cee89 --- /dev/null +++ b/async_simple/uthread/internal/Windows/AMD64/ontop_x86_64_ms_pe_masm.asm @@ -0,0 +1,203 @@ + +; Copyright Oliver Kowalke 2009. +; Distributed under the Boost Software License, Version 1.0. +; (See accompanying file LICENSE_1_0.txt or copy at +; http://www.boost.org/LICENSE_1_0.txt) + +; ---------------------------------------------------------------------------------- +; | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | +; ---------------------------------------------------------------------------------- +; | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | +; ---------------------------------------------------------------------------------- +; | SEE registers (XMM6-XMM15) | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | +; ---------------------------------------------------------------------------------- +; | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c | +; ---------------------------------------------------------------------------------- +; | SEE registers (XMM6-XMM15) | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | +; ---------------------------------------------------------------------------------- +; | 0xe40 | 0x44 | 0x48 | 0x4c | 0x50 | 0x54 | 0x58 | 0x5c | +; ---------------------------------------------------------------------------------- +; | SEE registers (XMM6-XMM15) | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | +; ---------------------------------------------------------------------------------- +; | 0x60 | 0x64 | 0x68 | 0x6c | 0x70 | 0x74 | 0x78 | 0x7c | +; ---------------------------------------------------------------------------------- +; | SEE registers (XMM6-XMM15) | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 32 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | +; ---------------------------------------------------------------------------------- +; | 0x80 | 0x84 | 0x88 | 0x8c | 0x90 | 0x94 | 0x98 | 0x9c | +; ---------------------------------------------------------------------------------- +; | SEE registers (XMM6-XMM15) | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | +; ---------------------------------------------------------------------------------- +; | 0xa0 | 0xa4 | 0xa8 | 0xac | 0xb0 | 0xb4 | 0xb8 | 0xbc | +; ---------------------------------------------------------------------------------- +; | fc_mxcsr|fc_x87_cw| | fbr_strg | fc_dealloc | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | +; ---------------------------------------------------------------------------------- +; | 0xc0 | 0xc4 | 0xc8 | 0xcc | 0xd0 | 0xd4 | 0xd8 | 0xdc | +; ---------------------------------------------------------------------------------- +; | limit | base | R12 | R13 | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | +; ---------------------------------------------------------------------------------- +; | 0xe0 | 0xe4 | 0xe8 | 0xec | 0xf0 | 0xf4 | 0xf8 | 0xfc | +; ---------------------------------------------------------------------------------- +; | R14 | R15 | RDI | RSI | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | +; ---------------------------------------------------------------------------------- +; | 0x100 | 0x104 | 0x108 | 0x10c | 0x110 | 0x114 | 0x118 | 0x11c | +; ---------------------------------------------------------------------------------- +; | RBX | RBP | hidden | RIP | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | +; ---------------------------------------------------------------------------------- +; | 0x120 | 0x124 | 0x128 | 0x12c | 0x130 | 0x134 | 0x138 | 0x13c | +; ---------------------------------------------------------------------------------- +; | parameter area | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | +; ---------------------------------------------------------------------------------- +; | 0x140 | 0x144 | 0x148 | 0x14c | 0x150 | 0x154 | 0x158 | 0x15c | +; ---------------------------------------------------------------------------------- +; | FCTX | DATA | | +; ---------------------------------------------------------------------------------- + +.code + +_fl_ontop_fcontext PROC EXPORT FRAME + .endprolog + + ; prepare stack + lea rsp, [rsp-0118h] + + ; save XMM storage + movaps [rsp], xmm6 + movaps [rsp+010h], xmm7 + movaps [rsp+020h], xmm8 + movaps [rsp+030h], xmm9 + movaps [rsp+040h], xmm10 + movaps [rsp+050h], xmm11 + movaps [rsp+060h], xmm12 + movaps [rsp+070h], xmm13 + movaps [rsp+080h], xmm14 + movaps [rsp+090h], xmm15 + ; save MMX control- and status-word + stmxcsr [rsp+0a0h] + ; save x87 control-word + fnstcw [rsp+0a4h] + + ; load NT_TIB + mov r10, gs:[030h] + ; save fiber local storage + mov rax, [r10+020h] + mov [rsp+0b0h], rax + ; save current deallocation stack + mov rax, [r10+01478h] + mov [rsp+0b8h], rax + ; save current stack limit + mov rax, [r10+010h] + mov [rsp+0c0h], rax + ; save current stack base + mov rax, [r10+08h] + mov [rsp+0c8h], rax + + mov [rsp+0d0h], r12 ; save R12 + mov [rsp+0d8h], r13 ; save R13 + mov [rsp+0e0h], r14 ; save R14 + mov [rsp+0e8h], r15 ; save R15 + mov [rsp+0f0h], rdi ; save RDI + mov [rsp+0f8h], rsi ; save RSI + mov [rsp+0100h], rbx ; save RBX + mov [rsp+0108h], rbp ; save RBP + + mov [rsp+0110h], rcx ; save hidden address of transport_t + + ; preserve RSP (pointing to context-data) in RCX + mov rcx, rsp + + ; restore RSP (pointing to context-data) from RDX + mov rsp, rdx + + ; restore XMM storage + movaps xmm6, [rsp] + movaps xmm7, [rsp+010h] + movaps xmm8, [rsp+020h] + movaps xmm9, [rsp+030h] + movaps xmm10, [rsp+040h] + movaps xmm11, [rsp+050h] + movaps xmm12, [rsp+060h] + movaps xmm13, [rsp+070h] + movaps xmm14, [rsp+080h] + movaps xmm15, [rsp+090h] + ; restore MMX control- and status-word + ldmxcsr [rsp+0a0h] + ; save x87 control-word + fldcw [rsp+0a4h] + + ; load NT_TIB + mov r10, gs:[030h] + ; restore fiber local storage + mov rax, [rsp+0b0h] + mov [r10+020h], rax + ; restore current deallocation stack + mov rax, [rsp+0b8h] + mov [r10+01478h], rax + ; restore current stack limit + mov rax, [rsp+0c0h] + mov [r10+010h], rax + ; restore current stack base + mov rax, [rsp+0c8h] + mov [r10+08h], rax + + mov r12, [rsp+0d0h] ; restore R12 + mov r13, [rsp+0d8h] ; restore R13 + mov r14, [rsp+0e0h] ; restore R14 + mov r15, [rsp+0e8h] ; restore R15 + mov rdi, [rsp+0f0h] ; restore RDI + mov rsi, [rsp+0f8h] ; restore RSI + mov rbx, [rsp+0100h] ; restore RBX + mov rbp, [rsp+0108h] ; restore RBP + + mov rax, [rsp+0110h] ; restore hidden address of transport_t + + ; prepare stack + lea rsp, [rsp+0118h] + + ; keep return-address on stack + + ; transport_t returned in RAX + ; return parent fcontext_t + mov [rax], rcx + ; return data + mov [rax+08h], r8 + + ; transport_t as 1.arg of context-function + ; RCX contains address of returned (hidden) transfer_t + mov rcx, rax + ; RDX contains address of passed transfer_t + mov rdx, rax + + ; indirect jump to context + jmp r9 +_fl_ontop_fcontext ENDP +END diff --git a/async_simple/uthread/internal/thread.cc b/async_simple/uthread/internal/thread.cc index b0bba260..b8a27afd 100644 --- a/async_simple/uthread/internal/thread.cc +++ b/async_simple/uthread/internal/thread.cc @@ -61,6 +61,25 @@ inline void finish_switch_fiber(jmp_buf_link* context, void *stack) {} #endif // AS_INTERNAL_USE_ASAN thread_local jmp_buf_link g_unthreaded_context; + +stack_deleter_fn& get_stack_deleter() noexcept { + static stack_deleter_fn fn = &default_stack_deleter; + return fn; +} + +stack_allocator_fn& get_stack_allocator() noexcept { + static stack_allocator_fn fn = &default_get_stack_holder; + return fn; +} + +void set_stack_deleter(stack_deleter_fn fn) noexcept { + get_stack_deleter() = fn ? fn : &default_stack_deleter; +} + +void set_stack_allocator(stack_allocator_fn fn) noexcept { + get_stack_allocator() = fn ? fn : &default_get_stack_holder; +} + thread_local jmp_buf_link* g_current_context = nullptr; static const std::string uthread_stack_size = "UTHREAD_STACK_SIZE_KB"; @@ -169,8 +188,13 @@ void thread_context::main() { asm(".cfi_undefined x30"); #elif defined(__riscv) asm(".cfi_undefined ra"); // The return address register in RISC-V is 'ra' +#elif defined(_MSC_VER) + // MSVC uses SEH (structured exception handling) with .pdata/.xdata for + // unwinding instead of DWARF CFI directives. The unwind information is + // already provided in the MASM assembly files, so no inline annotation is + // needed here. #else -#warning "Backtracing from uthreads may be broken" +#error "Backtracing from uthreads may be broken" #endif context_.initial_switch_in_completed(); try { @@ -189,6 +213,8 @@ void switch_in(thread_context* to) { to->switch_in(); } void switch_out(thread_context* from) { from->switch_out(); } +thread_context* get() { return g_current_context->thread; } + bool can_switch_out() { return g_current_context && g_current_context->thread; } } // namespace thread_impl diff --git a/async_simple/uthread/internal/thread.h b/async_simple/uthread/internal/thread.h index d522de22..71efe7af 100644 --- a/async_simple/uthread/internal/thread.h +++ b/async_simple/uthread/internal/thread.h @@ -52,23 +52,13 @@ inline stack_holder default_get_stack_holder(unsigned stack_size) { return stack_holder(new char[stack_size]); } -inline stack_deleter_fn& get_stack_deleter() noexcept { - static stack_deleter_fn fn = &default_stack_deleter; - return fn; -} +AS_EXPORT stack_deleter_fn& get_stack_deleter() noexcept; -inline stack_allocator_fn& get_stack_allocator() noexcept { - static stack_allocator_fn fn = &default_get_stack_holder; - return fn; -} +AS_EXPORT stack_allocator_fn& get_stack_allocator() noexcept; -inline void set_stack_deleter(stack_deleter_fn fn) noexcept { - get_stack_deleter() = fn ? fn : &default_stack_deleter; -} +AS_EXPORT void set_stack_deleter(stack_deleter_fn fn) noexcept; -inline void set_stack_allocator(stack_allocator_fn fn) noexcept { - get_stack_allocator() = fn ? fn : &default_get_stack_holder; -} +AS_EXPORT void set_stack_allocator(stack_allocator_fn fn) noexcept; inline void stack_deleter::operator()(char* ptr) const noexcept { get_stack_deleter()(ptr); @@ -78,7 +68,7 @@ inline stack_holder get_stack_holder(unsigned stack_size) { return get_stack_allocator()(stack_size); } -class thread_context { +class AS_EXPORT thread_context { const size_t stack_size_; stack_holder stack_{make_stack()}; std::function func_; diff --git a/async_simple/uthread/internal/thread_impl.h b/async_simple/uthread/internal/thread_impl.h index 5304dbf5..1dd10e1b 100644 --- a/async_simple/uthread/internal/thread_impl.h +++ b/async_simple/uthread/internal/thread_impl.h @@ -61,15 +61,12 @@ struct jmp_buf_link { void final_switch_out(); }; -extern thread_local jmp_buf_link* g_current_context; - namespace thread_impl { -inline thread_context* get() { return g_current_context->thread; } - -void switch_in(thread_context* to); -void switch_out(thread_context* from); -bool can_switch_out(); +AS_EXPORT thread_context* get(); +AS_EXPORT void switch_in(thread_context* to); +AS_EXPORT void switch_out(thread_context* from); +AS_EXPORT bool can_switch_out(); } // namespace thread_impl diff --git a/async_simple/uthread/test/UthreadTest.cpp b/async_simple/uthread/test/UthreadTest.cpp index 4973d0e1..9f7d78cf 100644 --- a/async_simple/uthread/test/UthreadTest.cpp +++ b/async_simple/uthread/test/UthreadTest.cpp @@ -515,7 +515,7 @@ TEST_F(UthreadTest, testLatchThreadSafe) { auto f = [&taskEx, &taskNotify]() mutable { Latch latch(1u); taskNotify.schedule([latchPtr = &latch]() mutable { - std::this_thread::sleep_for(1us); + std::this_thread::yield(); latchPtr->downCount(); }); latch.await(&taskEx); @@ -533,7 +533,7 @@ TEST_F(UthreadTest, testLatchThreadSafe) { &taskEx); }) .thenValue([&runningTask]() { runningTask.fetch_sub(1u); }); - std::this_thread::sleep_for(10us); + std::this_thread::yield(); } while (runningTask) {