Skip to content

Commit 78f58c6

Browse files
[V4] Join (#60)
* Tidy up comments * deduplicate transforms * convert to macros * comment * deduplicate fork/call awaitables * less initialization * api todo * propagate cancel * fix constexpr * return parent on exception * revert switch api * reorder immovable * comments * rethrow from await_suspend * spell * add atomic ops to frame * correct frame init * regular constraint on checkpoint * full final suspend * comment * spell * joins ops * join primer * sketch of a join * join basics * add constexpr * optimise fast path * why does this slow me down? * small refactor * tweaks for perf * golf * remove no inline * set no inline * spell * additional asserts * drop todo * constexpr gaurd * init cancel in root * Update src/core/promise.cxx Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/core/promise.cxx Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * spell * Update src/core/promise.cxx Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/core/concepts.cxx Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * rename reset -> reset_counters * rm false asserts * add todo * spell * fix bugged indexing
1 parent 25165b7 commit 78f58c6

9 files changed

Lines changed: 306 additions & 154 deletions

File tree

benchmark/src/libfork_benchmark/fib/fib.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ struct vector_ctx {
7373

7474
auto alloc() noexcept -> Alloc & { return allocator; }
7575

76+
// TODO: try LF_NO_INLINE for final allocator
77+
LF_NO_INLINE
7678
void push(handle_type handle) { work.push_back(handle); }
7779

7880
auto pop() noexcept -> handle_type {

benchmark/src/libfork_benchmark/fib/lf_parts.cpp

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,15 @@ namespace {
1414

1515
struct global_allocator {
1616

17-
struct empty {};
17+
struct empty {
18+
auto operator==(empty const &) const -> bool = default;
19+
};
1820

1921
constexpr static auto push(std::size_t sz) -> void * { return ::operator new(sz); }
2022
constexpr static auto pop(void *p, std::size_t sz) noexcept -> void { ::operator delete(p, sz); }
2123
constexpr static auto checkpoint() noexcept -> empty { return {}; }
22-
constexpr static auto switch_to(empty) noexcept -> void {}
24+
constexpr static auto release() noexcept -> void {}
25+
constexpr static auto acquire(empty) noexcept -> void {}
2326
};
2427

2528
static_assert(lf::stack_allocator<global_allocator>);
@@ -37,8 +40,8 @@ struct linear_allocator {
3740
constexpr auto pop(void *p, std::size_t) noexcept -> void { ptr = static_cast<std::byte *>(p); }
3841

3942
constexpr auto checkpoint() noexcept -> std::byte * { return data.get(); }
40-
41-
constexpr static auto switch_to(std::byte *) noexcept -> void {}
43+
constexpr auto release() noexcept -> void {}
44+
constexpr auto acquire(std::byte *) noexcept -> void {}
4245
};
4346

4447
static_assert(lf::stack_allocator<linear_allocator>);
@@ -57,10 +60,14 @@ constexpr auto no_await = [](this auto fib, std::int64_t *ret, std::int64_t n) -
5760

5861
auto t1 = fib(&lhs, n - 1);
5962
t1.promise->frame.kind = lf::category::root;
63+
t1.promise->frame.stack_ckpt = lf::thread_context<T>->alloc().checkpoint();
64+
t1.promise->frame.cancel = nullptr;
6065
t1.promise->handle().resume();
6166

6267
auto t2 = fib(&rhs, n - 2);
6368
t2.promise->frame.kind = lf::category::root;
69+
t2.promise->frame.stack_ckpt = lf::thread_context<T>->alloc().checkpoint();
70+
t2.promise->frame.cancel = nullptr;
6471
t2.promise->handle().resume();
6572

6673
*ret = lhs + rhs;
@@ -97,7 +104,7 @@ constexpr auto ret = [](this auto fib, std::int64_t n) -> lf::task<std::int64_t,
97104
co_return lhs + rhs;
98105
};
99106

100-
template <typename T>
107+
template <typename T, bool Join = false>
101108
constexpr auto fork_call = [](this auto fib, std::int64_t n) -> lf::task<std::int64_t, T> {
102109
if (n < 2) {
103110
co_return n;
@@ -109,6 +116,10 @@ constexpr auto fork_call = [](this auto fib, std::int64_t n) -> lf::task<std::in
109116
co_await lf::fork(&rhs, fib, n - 2);
110117
co_await lf::call(&lhs, fib, n - 1);
111118

119+
if constexpr (Join) {
120+
co_await lf::join();
121+
}
122+
112123
co_return lhs + rhs;
113124
};
114125

@@ -138,11 +149,15 @@ void fib(benchmark::State &state) {
138149
if constexpr (requires { Fn(&result, n); }) {
139150
auto task = Fn(&result, n);
140151
task.promise->frame.kind = lf::category::root;
152+
task.promise->frame.cancel = nullptr;
153+
task.promise->frame.stack_ckpt = lf::thread_context<U>->alloc().checkpoint();
141154
task.promise->handle().resume();
142155
} else {
143156
auto task = Fn(n);
144157
task.promise->frame.kind = lf::category::root;
158+
task.promise->frame.cancel = nullptr;
145159
task.promise->return_address = &result;
160+
task.promise->frame.stack_ckpt = lf::thread_context<U>->alloc().checkpoint();
146161
task.promise->handle().resume();
147162
}
148163

@@ -184,9 +199,21 @@ BENCHMARK(fib<ret<linear_alloc>, linear_alloc>)->Name("base/libfork/fib/bump/ret
184199
BENCHMARK(fib<fork_call<linear_alloc>, linear_alloc>)->Name("test/libfork/fib/vector_ctx")->Arg(fib_test);
185200
BENCHMARK(fib<fork_call<linear_alloc>, linear_alloc>)->Name("base/libfork/fib/vector_ctx")->Arg(fib_base);
186201

202+
// Same as above but with join.
203+
BENCHMARK(fib<fork_call<linear_alloc, true>, linear_alloc>)
204+
->Name("test/libfork/fib/vector_ctx/join")
205+
->Arg(fib_test);
206+
BENCHMARK(fib<fork_call<linear_alloc, true>, linear_alloc>)
207+
->Name("base/libfork/fib/vector_ctx/join")
208+
->Arg(fib_base);
209+
187210
using A = poly_vector_ctx<linear_allocator>;
188211
using B = lf::polymorphic_context<linear_allocator>;
189212

190213
// Same as above but with polymorphic contexts.
191214
BENCHMARK(fib<fork_call<B>, A, B>)->Name("test/libfork/fib/poly_vector_ctx")->Arg(fib_test);
192215
BENCHMARK(fib<fork_call<B>, A, B>)->Name("base/libfork/fib/poly_vector_ctx")->Arg(fib_base);
216+
217+
// Same as above but with join.
218+
BENCHMARK(fib<fork_call<B, true>, A, B>)->Name("test/libfork/fib/poly_vector_ctx/join")->Arg(fib_test);
219+
BENCHMARK(fib<fork_call<B, true>, A, B>)->Name("base/libfork/fib/poly_vector_ctx/join")->Arg(fib_base);

include/libfork/__impl/utils.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,8 @@
2525
* @brief Use like `std::forward` to perfectly forward an expression.
2626
*/
2727
#define LF_FWD(...) ::std::forward<decltype(__VA_ARGS__)>(__VA_ARGS__)
28+
29+
/**
30+
* @brief Use to define a `T` that is aligned to the required alignment of `std::atomic_ref<T>`.
31+
*/
32+
#define ATOMIC_ALIGN(T) alignas(std::atomic_ref<T>::required_alignment) T

src/core/concepts.cxx

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,25 +40,28 @@ consteval auto constify(T &&x) noexcept -> std::add_const_t<T> &;
4040
/**
4141
* @brief Defines the API for a libfork compatible stack allocator.
4242
*
43-
* - After construction push is valid.
43+
* - After construction `this` is in the empty state and push is valid.
4444
* - Pop is valid provided the FILO order is respected.
4545
* - Destruction is expected to only occur when the stack is empty.
46-
* - Result of `.checkpoint()` is expected to be "cheap to copy".
47-
* - Switch releases the current stack and resumes from the checkpoint:
48-
* - This is a noop if the checkpoint is from this stack.
49-
* - If the checkpoint is default-constructed it is expected to switch to a new stack.
46+
* - Result of `.checkpoint()` is expected to:
47+
* - Be "cheap to copy".
48+
* - Compare equal if they belong to the same stack.
49+
* - Release detaches the current stack and leaves `this` in the empty state.
50+
* - Acquire attaches to the stack that the checkpoint came from:
51+
* - This is a noop if the checkpoint is from the current stack.
52+
* - Otherwise `this` is empty.
5053
*
5154
* Fast-path operations: empty, push, pop, checkpoint
52-
* Slow-path operations: switch
55+
* Slow-path operations: release, acquire
5356
*/
5457
export template <typename T>
5558
concept stack_allocator = std::is_object_v<T> && requires (T alloc, std::size_t n, void *ptr) {
5659
// { alloc.empty() } noexcept -> std::same_as<bool>;
5760
{ alloc.push(n) } -> std::same_as<void *>;
5861
{ alloc.pop(ptr, n) } noexcept -> std::same_as<void>;
59-
{ alloc.checkpoint() } noexcept -> std::semiregular;
60-
{ alloc.switch_to({}) } noexcept -> std::same_as<void>;
61-
{ alloc.switch_to(constify(alloc.checkpoint())) } noexcept -> std::same_as<void>;
62+
{ alloc.checkpoint() } noexcept -> std::regular;
63+
{ alloc.release() } noexcept -> std::same_as<void>;
64+
{ alloc.acquire(constify(alloc.checkpoint())) } noexcept -> std::same_as<void>;
6265
};
6366

6467
/**

src/core/constants.cxx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ namespace lf {
88
constexpr std::size_t k_kilobyte = 1024;
99
constexpr std::size_t k_megabyte = 1024 * k_kilobyte;
1010

11+
constexpr std::uint32_t k_u16_max = std::numeric_limits<std::uint16_t>::max();
12+
1113
export constexpr std::size_t k_new_align = __STDCPP_DEFAULT_NEW_ALIGNMENT__;
1214
export constexpr std::size_t k_cache_line = std::hardware_destructive_interference_size;
1315

src/core/frame.cxx

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ export module libfork.core:frame;
55
import std;
66

77
import :concepts;
8+
import :constants;
89

910
namespace lf {
1011

@@ -26,18 +27,39 @@ struct frame_type {
2627
using allocator_type = allocator_t<Context>;
2728
using checkpoint_type = checkpoint_t<allocator_type>;
2829

29-
frame_type *parent = nullptr;
30-
cancellation *cancel = nullptr;
30+
frame_type *parent;
31+
cancellation *cancel;
32+
3133
[[no_unique_address]]
3234
checkpoint_type stack_ckpt;
3335

34-
std::uint32_t merges = 0; // Atomic is 32 bits for speed
35-
std::uint16_t steals = 0; // In debug do overflow checking
36-
category kind = category::call; // Fork/Call/Just/Root
37-
std::uint8_t exception_bit = 0; // Atomically set
36+
ATOMIC_ALIGN(std::uint32_t) joins = 0; // Atomic is 32 bits for speed
37+
std::uint16_t steals = 0; // In debug do overflow checking
38+
category kind = static_cast<category>(0); // Fork/Call/Just/Root
39+
ATOMIC_ALIGN(std::uint8_t) exception_bit = 0; // Atomically set
40+
41+
// Explicitly post construction, this allows the compiler to emit a single
42+
// instruction for the zero init then an instruction for the joins init,
43+
// instead of three instructions.
44+
constexpr frame_type() noexcept { joins = k_u16_max; }
3845

3946
[[nodiscard]]
4047
constexpr auto handle() LF_HOF(std::coroutine_handle<frame_type>::from_promise(*this))
48+
49+
[[nodiscard]]
50+
constexpr auto atomic_joins() noexcept -> std::atomic_ref<std::uint32_t> {
51+
return std::atomic_ref{joins};
52+
}
53+
54+
[[nodiscard]]
55+
constexpr auto atomic_except() noexcept -> std::atomic_ref<std::uint8_t> {
56+
return std::atomic_ref{exception_bit};
57+
}
58+
59+
constexpr void reset_counters() noexcept {
60+
joins = k_u16_max;
61+
steals = 0;
62+
}
4163
};
4264

4365
// =================== Handle =================== //

src/core/ops.cxx

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,47 +13,56 @@ namespace lf {
1313
// clang-format off
1414

1515
template <typename R, typename Fn, typename... Args>
16-
struct pkg {
16+
struct pkg : immovable {
1717
R *return_address;
1818
[[no_unique_address]] Fn fn;
1919
[[no_unique_address]] tuple<Args...> args;
2020
};
2121

2222
template <typename Fn, typename... Args>
23-
struct pkg<void, Fn, Args...> {
23+
struct pkg<void, Fn, Args...> : immovable {
2424
[[no_unique_address]] Fn fn;
2525
[[no_unique_address]] tuple<Args...> args;
2626
};
2727

2828
// clang-format on
2929

30+
// TODO: consider a prelude namespace for these ops + task
31+
// TODO: consider neibloids to block ADL
32+
3033
// ======== Fork ======== //
3134

3235
template <typename R, typename Fn, typename... Args>
33-
struct [[nodiscard("You should immediately co_await this!")]] fork_pkg : pkg<R, Fn, Args...>, immovable {};
36+
struct [[nodiscard("You should immediately co_await this!")]] fork_pkg : pkg<R, Fn, Args...> {};
3437

3538
export template <typename... Args, async_invocable_to<void, Args...> Fn>
3639
constexpr auto fork(Fn &&fn, Args &&...args) noexcept -> fork_pkg<void, Fn, Args &&...> {
37-
return {LF_FWD(fn), {LF_FWD(args)...}};
40+
return {{.fn = LF_FWD(fn), .args = {LF_FWD(args)...}}};
3841
}
3942

4043
export template <typename R, typename... Args, async_invocable_to<R, Args...> Fn>
4144
constexpr auto fork(R *ret, Fn &&fn, Args &&...args) noexcept -> fork_pkg<R, Fn, Args &&...> {
42-
return {ret, LF_FWD(fn), {LF_FWD(args)...}};
45+
return {{.return_address = ret, .fn = LF_FWD(fn), .args = {LF_FWD(args)...}}};
4346
}
4447
// ======== Call ======== //
4548

4649
template <typename R, typename Fn, typename... Args>
47-
struct [[nodiscard("You should immediately co_await this!")]] call_pkg : pkg<R, Fn, Args...>, immovable {};
50+
struct [[nodiscard("You should immediately co_await this!")]] call_pkg : pkg<R, Fn, Args...> {};
4851

4952
export template <typename... Args, async_invocable_to<void, Args...> Fn>
5053
constexpr auto call(Fn &&fn, Args &&...args) noexcept -> call_pkg<void, Fn, Args &&...> {
51-
return {LF_FWD(fn), {LF_FWD(args)...}};
54+
return {{.fn = LF_FWD(fn), .args = {LF_FWD(args)...}}};
5255
}
5356

5457
export template <typename R, typename... Args, async_invocable_to<R, Args...> Fn>
5558
constexpr auto call(R *ret, Fn &&fn, Args &&...args) noexcept -> call_pkg<R, Fn, Args &&...> {
56-
return {ret, LF_FWD(fn), {LF_FWD(args)...}};
59+
return {{.return_address = ret, .fn = LF_FWD(fn), .args = {LF_FWD(args)...}}};
5760
}
5861

62+
// =============== Join =============== //
63+
64+
struct [[nodiscard("You should immediately co_await this!")]] join_type {};
65+
66+
export constexpr auto join() noexcept -> join_type { return {}; }
67+
5968
} // namespace lf

0 commit comments

Comments
 (0)