Skip to content

Commit 6e127a8

Browse files
[V4] Fork infrastructure (#56)
* move ops to new file * add frame members * add fork pkg * fix catagory * context (not compiling) * interfaces * temp fill in type * spell * rename * todo * trim deps * take 3 * use kind * add noexcept * comment * todo * force colour * todo (use llm agent) * use call by default * all initialized * fixup tuple impl * add tests for tuple * add template kw * very rough dispatch on final suspend * defer in final suspend * assume -> ensure * non_null and safe_cast * some fork handling code (unused by bench) * todo * more comments * re-order parts * missing contexts in task/promise * export/tidy context * vector context in bench header * add forking version * todo * renames * benchmark tls on stack alloc * more checks * rm dead type * rm defer comment * rename * rename * fix missing include * specify void return type
1 parent 718ac93 commit 6e127a8

15 files changed

Lines changed: 538 additions & 108 deletions

File tree

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ target_sources(libfork_libfork
6262
src/core/frame.cxx
6363
src/core/constants.cxx
6464
src/core/tuple.cxx
65+
src/core/ops.cxx
66+
src/core/context.cxx
6567
PRIVATE
6668
src/exception.cpp
6769
)

CMakeUserPresets.json

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,19 @@
55
"name": "dev",
66
"inherits": "ci-hardened",
77
"displayName": "Hardened development build",
8-
"toolchainFile": "${sourceDir}/cmake/llvm-brew-toolchain.cmake"
8+
"toolchainFile": "${sourceDir}/cmake/llvm-brew-toolchain.cmake",
9+
"cacheVariables": {
10+
"CMAKE_COLOR_DIAGNOSTICS": "ON"
11+
}
912
},
1013
{
1114
"name": "bench",
1215
"inherits": "ci-release",
1316
"displayName": "Release build for benchmarks",
14-
"toolchainFile": "${sourceDir}/cmake/llvm-brew-toolchain.cmake"
17+
"toolchainFile": "${sourceDir}/cmake/llvm-brew-toolchain.cmake",
18+
"cacheVariables": {
19+
"CMAKE_COLOR_DIAGNOSTICS": "ON"
20+
}
1521
}
1622
],
1723
"buildPresets": [

benchmark/src/libfork_benchmark/fib/baremetal.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
namespace {
1313

1414
struct task {
15-
struct promise_type : fib_bump_allocator {
15+
struct promise_type : tls_bump {
1616

1717
auto get_return_object() -> task { return {std::coroutine_handle<promise_type>::from_promise(*this)}; }
1818

@@ -81,7 +81,7 @@ void fib_coro_no_queue(benchmark::State &state) {
8181

8282
// 8MB stack
8383
std::unique_ptr buffer = std::make_unique<std::byte[]>(1024 * 1024 * 8);
84-
fib_bump_ptr = buffer.get();
84+
tls_bump_ptr = buffer.get();
8585

8686
for (auto _ : state) {
8787
benchmark::DoNotOptimize(n);
@@ -91,7 +91,7 @@ void fib_coro_no_queue(benchmark::State &state) {
9191
benchmark::DoNotOptimize(result);
9292
}
9393

94-
if (fib_bump_ptr != buffer.get()) {
94+
if (tls_bump_ptr != buffer.get()) {
9595
std::terminate(); // Stack leak
9696
}
9797
}

benchmark/src/libfork_benchmark/fib/fib.hpp

Lines changed: 42 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,14 @@
33
#include <bit>
44
#include <cstddef>
55
#include <cstdint>
6+
#include <vector>
7+
8+
#include "libfork/__impl/compiler.hpp"
69

710
#include "libfork_benchmark/common.hpp"
811

12+
import libfork.core;
13+
914
inline constexpr int fib_test = 3;
1015
inline constexpr int fib_base = 37;
1116

@@ -39,17 +44,49 @@ inline auto fib_align_size(std::size_t n) -> std::size_t {
3944
return (n + k_fib_align - 1) & ~(k_fib_align - 1);
4045
}
4146

42-
inline thread_local std::byte *fib_bump_ptr = nullptr;
47+
constinit inline thread_local std::byte *tls_bump_ptr = nullptr;
48+
49+
struct tls_bump {
50+
51+
static auto operator new(std::size_t sz) -> void * {
52+
auto *prev = tls_bump_ptr;
53+
tls_bump_ptr += fib_align_size(sz);
54+
return prev;
55+
}
56+
57+
static auto operator delete(void *p, [[maybe_unused]] std::size_t sz) noexcept -> void {
58+
tls_bump_ptr = std::bit_cast<std::byte *>(p);
59+
}
60+
};
61+
62+
constinit inline std::byte *bump_ptr = nullptr;
4363

44-
struct fib_bump_allocator {
64+
struct global_bump {
4565

4666
static auto operator new(std::size_t sz) -> void * {
47-
auto *prev = fib_bump_ptr;
48-
fib_bump_ptr += fib_align_size(sz);
67+
auto *prev = bump_ptr;
68+
bump_ptr += fib_align_size(sz);
4969
return prev;
5070
}
5171

5272
static auto operator delete(void *p, [[maybe_unused]] std::size_t sz) noexcept -> void {
53-
fib_bump_ptr = std::bit_cast<std::byte *>(p);
73+
bump_ptr = std::bit_cast<std::byte *>(p);
74+
}
75+
};
76+
77+
// === Shared Context Logic ===
78+
79+
struct vector_ctx final : lf::polymorphic_context {
80+
81+
std::vector<lf::work_handle> work;
82+
83+
vector_ctx() { work.reserve(1024); }
84+
85+
void push(lf::work_handle handle) override { work.push_back(handle); }
86+
87+
auto pop() noexcept -> lf::work_handle override {
88+
auto handle = work.back();
89+
work.pop_back();
90+
return handle;
5491
}
5592
};

benchmark/src/libfork_benchmark/fib/lf_parts.cpp

Lines changed: 89 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,8 @@ struct stack_on_heap {
1919
}
2020
};
2121

22-
template <lf::alloc_mixin StackPolicy>
23-
constexpr auto no_await =
24-
[](this auto fib, std::int64_t *ret, std::int64_t n) -> lf::task<void, StackPolicy> {
22+
template <lf::alloc_mixin Stack>
23+
constexpr auto no_await = [](this auto fib, std::int64_t *ret, std::int64_t n) -> lf::task<void, Stack> {
2524
if (n < 2) {
2625
*ret = n;
2726
co_return;
@@ -30,14 +29,19 @@ constexpr auto no_await =
3029
std::int64_t lhs = 0;
3130
std::int64_t rhs = 0;
3231

33-
fib(&lhs, n - 1).promise->handle().resume();
34-
fib(&rhs, n - 2).promise->handle().resume();
32+
auto t1 = fib(&lhs, n - 1);
33+
t1.promise->frame.kind = lf::category::root;
34+
t1.promise->handle().resume();
35+
36+
auto t2 = fib(&rhs, n - 2);
37+
t2.promise->frame.kind = lf::category::root;
38+
t2.promise->handle().resume();
3539

3640
*ret = lhs + rhs;
3741
};
3842

39-
template <lf::alloc_mixin StackPolicy>
40-
constexpr auto await = [](this auto fib, std::int64_t *ret, std::int64_t n) -> lf::task<void, StackPolicy> {
43+
template <lf::alloc_mixin Stack>
44+
constexpr auto await = [](this auto fib, std::int64_t *ret, std::int64_t n) -> lf::task<void, Stack> {
4145
if (n < 2) {
4246
*ret = n;
4347
co_return;
@@ -52,6 +56,35 @@ constexpr auto await = [](this auto fib, std::int64_t *ret, std::int64_t n) -> l
5256
*ret = lhs + rhs;
5357
};
5458

59+
constexpr auto ret = [](this auto fib, std::int64_t n) -> lf::task<std::int64_t, tls_bump> {
60+
if (n < 2) {
61+
co_return n;
62+
}
63+
64+
std::int64_t lhs = 0;
65+
std::int64_t rhs = 0;
66+
67+
co_await lf::call(&lhs, fib, n - 1);
68+
co_await lf::call(&rhs, fib, n - 2);
69+
70+
co_return lhs + rhs;
71+
};
72+
73+
template <typename Ctx, typename A = tls_bump>
74+
constexpr auto fork_call = [](this auto fib, std::int64_t n) -> lf::task<std::int64_t, A, Ctx> {
75+
if (n < 2) {
76+
co_return n;
77+
}
78+
79+
std::int64_t lhs = 0;
80+
std::int64_t rhs = 0;
81+
82+
co_await lf::fork(&rhs, fib, n - 2);
83+
co_await lf::call(&lhs, fib, n - 1);
84+
85+
co_return lhs + rhs;
86+
};
87+
5588
template <auto Fn>
5689
void fib(benchmark::State &state) {
5790

@@ -60,18 +93,27 @@ void fib(benchmark::State &state) {
6093

6194
state.counters["n"] = static_cast<double>(n);
6295

63-
std::unique_ptr buffer = std::make_unique<std::byte[]>(1024 * 1024);
96+
// Set bump allocator buffer
97+
std::unique_ptr buf = std::make_unique<std::byte[]>(1024 * 1024);
98+
tls_bump_ptr = buf.get();
99+
bump_ptr = buf.get();
64100

65-
fib_bump_ptr = buffer.get();
101+
// Set both context and poly context
102+
std::unique_ptr ctx = std::make_unique<vector_ctx>();
103+
lf::thread_context<vector_ctx> = ctx.get();
104+
lf::thread_context<lf::polymorphic_context> = ctx.get();
66105

67106
for (auto _ : state) {
68107
benchmark::DoNotOptimize(n);
69108
std::int64_t result = 0;
70109

71110
if constexpr (requires { Fn(&result, n); }) {
72-
Fn(&result, n).promise->handle().resume();
111+
auto task = Fn(&result, n);
112+
task.promise->frame.kind = lf::category::root;
113+
task.promise->handle().resume();
73114
} else {
74115
auto task = Fn(n);
116+
task.promise->frame.kind = lf::category::root;
75117
task.promise->return_address = &result;
76118
task.promise->handle().resume();
77119
}
@@ -80,39 +122,54 @@ void fib(benchmark::State &state) {
80122
benchmark::DoNotOptimize(result);
81123
}
82124

83-
if (fib_bump_ptr != buffer.get()) {
125+
if (tls_bump_ptr != buf.get() || bump_ptr != buf.get()) {
84126
LF_TERMINATE("Stack leak detected");
85127
}
86-
}
87-
88-
template <lf::alloc_mixin StackPolicy>
89-
constexpr auto ret = [](this auto fib, std::int64_t n) -> lf::task<std::int64_t, StackPolicy> {
90-
if (n < 2) {
91-
co_return n;
92-
}
93-
94-
std::int64_t lhs = 0;
95-
std::int64_t rhs = 0;
96128

97-
co_await lf::call(&lhs, fib, n - 1);
98-
co_await lf::call(&rhs, fib, n - 2);
99-
100-
co_return lhs + rhs;
101-
};
129+
tls_bump_ptr = nullptr;
130+
bump_ptr = nullptr;
131+
lf::thread_context<vector_ctx> = nullptr;
132+
lf::thread_context<lf::polymorphic_context> = nullptr;
133+
}
102134

103135
} // namespace
104136

137+
// Return by ref-arg, test direct root, no co-await, direct resumes, uses new/delete for alloc
105138
BENCHMARK(fib<no_await<stack_on_heap>>)->Name("test/libfork/fib/heap/no_await")->Arg(fib_test);
106139
BENCHMARK(fib<no_await<stack_on_heap>>)->Name("base/libfork/fib/heap/no_await")->Arg(fib_base);
107140

141+
// Same as above but uses tls bump allocator
142+
BENCHMARK(fib<no_await<tls_bump>>)->Name("test/libfork/fib/tls_bump/no_await")->Arg(fib_test);
143+
BENCHMARK(fib<no_await<tls_bump>>)->Name("base/libfork/fib/tls_bump/no_await")->Arg(fib_base);
144+
145+
// Same as above but with global bump allocator
146+
BENCHMARK(fib<no_await<global_bump>>)->Name("test/libfork/fib/global_bump/no_await")->Arg(fib_test);
147+
BENCHMARK(fib<no_await<global_bump>>)->Name("base/libfork/fib/global_bump/no_await")->Arg(fib_base);
148+
149+
// TODO: no_await with segmented stack allocator?
150+
151+
// Return by ref-arg, libfork call/call with co-await, uses new/delete for alloc
108152
BENCHMARK(fib<await<stack_on_heap>>)->Name("test/libfork/fib/heap/await")->Arg(fib_test);
109153
BENCHMARK(fib<await<stack_on_heap>>)->Name("base/libfork/fib/heap/await")->Arg(fib_base);
110154

111-
BENCHMARK(fib<no_await<fib_bump_allocator>>)->Name("test/libfork/fib/bump_alloc/no_await")->Arg(fib_test);
112-
BENCHMARK(fib<no_await<fib_bump_allocator>>)->Name("base/libfork/fib/bump_alloc/no_await")->Arg(fib_base);
155+
// Same as above but uses tls bump allocator
156+
BENCHMARK(fib<await<tls_bump>>)->Name("test/libfork/fib/tls_bump/await")->Arg(fib_test);
157+
BENCHMARK(fib<await<tls_bump>>)->Name("base/libfork/fib/tls_bump/await")->Arg(fib_base);
158+
159+
// Same as above but with global bump allocator
160+
BENCHMARK(fib<await<global_bump>>)->Name("test/libfork/fib/global_bump/await")->Arg(fib_test);
161+
BENCHMARK(fib<await<global_bump>>)->Name("base/libfork/fib/global_bump/await")->Arg(fib_base);
162+
163+
// Return by value
164+
// libfork call/call with co-await
165+
BENCHMARK(fib<ret>)->Name("test/libfork/fib/tls_bump/return")->Arg(fib_test);
166+
BENCHMARK(fib<ret>)->Name("base/libfork/fib/tls_bump/return")->Arg(fib_base);
113167

114-
BENCHMARK(fib<await<fib_bump_allocator>>)->Name("test/libfork/fib/bump_alloc/await")->Arg(fib_test);
115-
BENCHMARK(fib<await<fib_bump_allocator>>)->Name("base/libfork/fib/bump_alloc/await")->Arg(fib_base);
168+
// Return by value
169+
// libfork call/fork (no join)
170+
// Non-polymorphic vector-backed context
171+
BENCHMARK(fib<fork_call<vector_ctx>>)->Name("test/libfork/fib/vector_ctx")->Arg(fib_test);
172+
BENCHMARK(fib<fork_call<vector_ctx>>)->Name("base/libfork/fib/vector_ctx")->Arg(fib_base);
116173

117-
BENCHMARK(fib<ret<fib_bump_allocator>>)->Name("test/libfork/fib/bump_alloc/return")->Arg(fib_test);
118-
BENCHMARK(fib<ret<fib_bump_allocator>>)->Name("base/libfork/fib/bump_alloc/return")->Arg(fib_base);
174+
BENCHMARK(fib<fork_call<lf::polymorphic_context>>)->Name("test/libfork/fib/poly_vector_ctx")->Arg(fib_test);
175+
BENCHMARK(fib<fork_call<lf::polymorphic_context>>)->Name("base/libfork/fib/poly_vector_ctx")->Arg(fib_base);

include/libfork/__impl/assume.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
*
1616
* This macro is always active, regardless of optimization settings or `NDEBUG`.
1717
*/
18-
#define LF_ASSERT(expr) \
18+
#define LF_ENSURE(expr) \
1919
do { \
2020
if (!(expr)) { \
2121
LF_TERMINATE("Assumption '" #expr "' failed!"); \
@@ -43,5 +43,5 @@
4343
} \
4444
} while (false)
4545
#else
46-
#define LF_ASSUME(expr) LF_ASSERT(expr)
46+
#define LF_ASSUME(expr) LF_ENSURE(expr)
4747
#endif

src/core/concepts.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ template <typename T, template <typename...> typename Template>
3535
concept specialization_of = is_specialization_of<std::remove_cvref_t<T>, Template>::value;
3636

3737
// Forward-decl
38-
export template <returnable T, alloc_mixin Stack>
38+
export template <returnable T, alloc_mixin Stack, typename Context>
3939
struct task;
4040

4141
/**

src/core/context.cxx

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
module;
2+
export module libfork.core:context;
3+
4+
import std;
5+
6+
import :frame;
7+
8+
namespace lf {
9+
10+
// TODO: private bits / split
11+
export struct work_handle {
12+
frame_type *frame;
13+
};
14+
15+
static_assert(std::atomic<work_handle>::is_always_lock_free);
16+
17+
template <typename T>
18+
concept context = requires (T &ctx, work_handle h) {
19+
{ ctx.push(h) } -> std::same_as<void>;
20+
{ ctx.pop() } noexcept -> std::same_as<work_handle>;
21+
};
22+
23+
export struct polymorphic_context {
24+
virtual void push(work_handle h) = 0;
25+
virtual auto pop() noexcept -> work_handle = 0;
26+
virtual ~polymorphic_context() = default;
27+
};
28+
29+
static_assert(context<polymorphic_context>);
30+
31+
export template <typename Context>
32+
constinit inline thread_local Context *thread_context = nullptr;
33+
34+
} // namespace lf

src/core/core.cxx

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,15 @@
11
export module libfork.core;
22

3-
// All partitions
4-
export import :promise;
3+
// T1 partitions
54
export import :concepts;
6-
export import :utility;
7-
export import :frame;
85
export import :constants;
6+
export import :frame;
7+
export import :utility;
98
export import :tuple;
9+
10+
// T2 partitions
11+
export import :ops; // concepts, tuple, utility
12+
export import :context; // frame
13+
14+
// T3 partitions
15+
export import :promise; // context, ops

0 commit comments

Comments
 (0)