Skip to content

Commit 25165b7

Browse files
[V4] Half thread-local storage (#59)
* stack concept * refined concepts * context concepts * thoughts * wip * invocability * wip * handle const contexts/stacks * .switch API for stack allocator * rm no format bits * make frame_handle templated on Context * todo * rename * rename context -> worker_context * tweak * add comment * fixes * comment out bits * ops in struct * compiling * just compiling * type-check promise * polymorphic + global alocator * no await * linear alloc (unused) * break circular dep * first arg but no destroy * running with global again * stash in allocation * some tail code * no tls downpropagate call * set ctx call return * fix spell * rename * revert context/first arg in concepts * Revert "ops in struct" This reverts commit a6ab424. * re-apply promise tls usage * fix benchmarks * more parts * restore fork path * poly vector context * add final * tidy ups * drop dead function * store checkpoints * rm todo * rm dead includes * rm dead comments * fixup comments * comments * move static asserts to test file * rm dead benchmark code * constructor for polymorphic context * remove definition from constify * header * reduce buffer size * clean up of thread context * default constructor
1 parent 6e127a8 commit 25165b7

9 files changed

Lines changed: 348 additions & 174 deletions

File tree

benchmark/src/libfork_benchmark/fib/fib.hpp

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -59,32 +59,41 @@ struct tls_bump {
5959
}
6060
};
6161

62-
constinit inline std::byte *bump_ptr = nullptr;
62+
// === Shared Context Logic ===
6363

64-
struct global_bump {
64+
template <lf::stack_allocator Alloc>
65+
struct vector_ctx {
6566

66-
static auto operator new(std::size_t sz) -> void * {
67-
auto *prev = bump_ptr;
68-
bump_ptr += fib_align_size(sz);
69-
return prev;
70-
}
67+
using handle_type = lf::frame_handle<vector_ctx>;
7168

72-
static auto operator delete(void *p, [[maybe_unused]] std::size_t sz) noexcept -> void {
73-
bump_ptr = std::bit_cast<std::byte *>(p);
69+
std::vector<handle_type> work;
70+
Alloc allocator;
71+
72+
vector_ctx() { work.reserve(1024); }
73+
74+
auto alloc() noexcept -> Alloc & { return allocator; }
75+
76+
void push(handle_type handle) { work.push_back(handle); }
77+
78+
auto pop() noexcept -> handle_type {
79+
auto handle = work.back();
80+
work.pop_back();
81+
return handle;
7482
}
7583
};
7684

77-
// === Shared Context Logic ===
85+
template <lf::stack_allocator Alloc>
86+
struct poly_vector_ctx final : lf::polymorphic_context<Alloc> {
7887

79-
struct vector_ctx final : lf::polymorphic_context {
88+
using handle_type = lf::frame_handle<lf::polymorphic_context<Alloc>>;
8089

81-
std::vector<lf::work_handle> work;
90+
std::vector<handle_type> work;
8291

83-
vector_ctx() { work.reserve(1024); }
92+
poly_vector_ctx() { work.reserve(1024); }
8493

85-
void push(lf::work_handle handle) override { work.push_back(handle); }
94+
void push(handle_type handle) override { work.push_back(handle); }
8695

87-
auto pop() noexcept -> lf::work_handle override {
96+
auto pop() noexcept -> handle_type override {
8897
auto handle = work.back();
8998
work.pop_back();
9099
return handle;

benchmark/src/libfork_benchmark/fib/lf_parts.cpp

Lines changed: 70 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,41 @@ import libfork.core;
1212

1313
namespace {
1414

15-
struct stack_on_heap {
16-
static constexpr auto operator new(std::size_t sz) -> void * { return ::operator new(sz); }
17-
static constexpr auto operator delete(void *p, [[maybe_unused]] std::size_t sz) noexcept -> void {
18-
::operator delete(p, sz);
15+
struct global_allocator {
16+
17+
struct empty {};
18+
19+
constexpr static auto push(std::size_t sz) -> void * { return ::operator new(sz); }
20+
constexpr static auto pop(void *p, std::size_t sz) noexcept -> void { ::operator delete(p, sz); }
21+
constexpr static auto checkpoint() noexcept -> empty { return {}; }
22+
constexpr static auto switch_to(empty) noexcept -> void {}
23+
};
24+
25+
static_assert(lf::stack_allocator<global_allocator>);
26+
27+
struct linear_allocator {
28+
29+
std::unique_ptr<std::byte[]> data = std::make_unique<std::byte[]>(1024 * 1024);
30+
std::byte *ptr = data.get();
31+
32+
constexpr auto push(std::size_t sz) -> void * {
33+
auto *prev = ptr;
34+
ptr += fib_align_size(sz);
35+
return prev;
1936
}
37+
constexpr auto pop(void *p, std::size_t) noexcept -> void { ptr = static_cast<std::byte *>(p); }
38+
39+
constexpr auto checkpoint() noexcept -> std::byte * { return data.get(); }
40+
41+
constexpr static auto switch_to(std::byte *) noexcept -> void {}
2042
};
2143

22-
template <lf::alloc_mixin Stack>
23-
constexpr auto no_await = [](this auto fib, std::int64_t *ret, std::int64_t n) -> lf::task<void, Stack> {
44+
static_assert(lf::stack_allocator<linear_allocator>);
45+
46+
using lf::task;
47+
48+
template <lf::worker_context T>
49+
constexpr auto no_await = [](this auto fib, std::int64_t *ret, std::int64_t n) -> task<void, T> {
2450
if (n < 2) {
2551
*ret = n;
2652
co_return;
@@ -40,8 +66,8 @@ constexpr auto no_await = [](this auto fib, std::int64_t *ret, std::int64_t n) -
4066
*ret = lhs + rhs;
4167
};
4268

43-
template <lf::alloc_mixin Stack>
44-
constexpr auto await = [](this auto fib, std::int64_t *ret, std::int64_t n) -> lf::task<void, Stack> {
69+
template <lf::worker_context T>
70+
constexpr auto await = [](this auto fib, std::int64_t *ret, std::int64_t n) -> lf::task<void, T> {
4571
if (n < 2) {
4672
*ret = n;
4773
co_return;
@@ -56,7 +82,8 @@ constexpr auto await = [](this auto fib, std::int64_t *ret, std::int64_t n) -> l
5682
*ret = lhs + rhs;
5783
};
5884

59-
constexpr auto ret = [](this auto fib, std::int64_t n) -> lf::task<std::int64_t, tls_bump> {
85+
template <lf::worker_context T>
86+
constexpr auto ret = [](this auto fib, std::int64_t n) -> lf::task<std::int64_t, T> {
6087
if (n < 2) {
6188
co_return n;
6289
}
@@ -70,8 +97,8 @@ constexpr auto ret = [](this auto fib, std::int64_t n) -> lf::task<std::int64_t,
7097
co_return lhs + rhs;
7198
};
7299

73-
template <typename Ctx, typename A = tls_bump>
74-
constexpr auto fork_call = [](this auto fib, std::int64_t n) -> lf::task<std::int64_t, A, Ctx> {
100+
template <typename T>
101+
constexpr auto fork_call = [](this auto fib, std::int64_t n) -> lf::task<std::int64_t, T> {
75102
if (n < 2) {
76103
co_return n;
77104
}
@@ -85,23 +112,24 @@ constexpr auto fork_call = [](this auto fib, std::int64_t n) -> lf::task<std::in
85112
co_return lhs + rhs;
86113
};
87114

88-
template <auto Fn>
115+
using global_alloc = vector_ctx<global_allocator>;
116+
using linear_alloc = vector_ctx<linear_allocator>;
117+
118+
template <auto Fn, typename T, typename U = T>
89119
void fib(benchmark::State &state) {
90120

91121
std::int64_t n = state.range(0);
92122
std::int64_t expect = fib_ref(n);
93123

94124
state.counters["n"] = static_cast<double>(n);
95125

96-
// Set bump allocator buffer
97-
std::unique_ptr buf = std::make_unique<std::byte[]>(1024 * 1024);
98-
tls_bump_ptr = buf.get();
99-
bump_ptr = buf.get();
126+
T context;
127+
128+
lf::thread_context<U> = static_cast<U *>(&context);
100129

101-
// Set both context and poly context
102-
std::unique_ptr ctx = std::make_unique<vector_ctx>();
103-
lf::thread_context<vector_ctx> = ctx.get();
104-
lf::thread_context<lf::polymorphic_context> = ctx.get();
130+
lf::defer _ = [] static noexcept {
131+
lf::thread_context<U> = nullptr;
132+
};
105133

106134
for (auto _ : state) {
107135
benchmark::DoNotOptimize(n);
@@ -121,55 +149,44 @@ void fib(benchmark::State &state) {
121149
CHECK_RESULT(result, expect);
122150
benchmark::DoNotOptimize(result);
123151
}
124-
125-
if (tls_bump_ptr != buf.get() || bump_ptr != buf.get()) {
126-
LF_TERMINATE("Stack leak detected");
127-
}
128-
129-
tls_bump_ptr = nullptr;
130-
bump_ptr = nullptr;
131-
lf::thread_context<vector_ctx> = nullptr;
132-
lf::thread_context<lf::polymorphic_context> = nullptr;
133152
}
134153

135154
} // namespace
136155

137-
// Return by ref-arg, test direct root, no co-await, direct resumes, uses new/delete for alloc
138-
BENCHMARK(fib<no_await<stack_on_heap>>)->Name("test/libfork/fib/heap/no_await")->Arg(fib_test);
139-
BENCHMARK(fib<no_await<stack_on_heap>>)->Name("base/libfork/fib/heap/no_await")->Arg(fib_base);
156+
static_assert(lf::worker_context<global_alloc>);
140157

141-
// Same as above but uses tls bump allocator
142-
BENCHMARK(fib<no_await<tls_bump>>)->Name("test/libfork/fib/tls_bump/no_await")->Arg(fib_test);
143-
BENCHMARK(fib<no_await<tls_bump>>)->Name("base/libfork/fib/tls_bump/no_await")->Arg(fib_base);
158+
// Return by ref-arg, test direct root, no co-await, direct resumes, uses new/delete for alloc
159+
BENCHMARK(fib<no_await<global_alloc>, global_alloc>)->Name("test/libfork/fib/heap/no_await")->Arg(fib_test);
160+
BENCHMARK(fib<no_await<global_alloc>, global_alloc>)->Name("base/libfork/fib/heap/no_await")->Arg(fib_base);
144161

145-
// Same as above but with global bump allocator
146-
BENCHMARK(fib<no_await<global_bump>>)->Name("test/libfork/fib/global_bump/no_await")->Arg(fib_test);
147-
BENCHMARK(fib<no_await<global_bump>>)->Name("base/libfork/fib/global_bump/no_await")->Arg(fib_base);
162+
// Same as above but uses bump allocator
163+
BENCHMARK(fib<no_await<linear_alloc>, linear_alloc>)->Name("test/libfork/fib/bump/no_await")->Arg(fib_test);
164+
BENCHMARK(fib<no_await<linear_alloc>, linear_alloc>)->Name("base/libfork/fib/bump/no_await")->Arg(fib_base);
148165

149166
// TODO: no_await with segmented stack allocator?
150167

151168
// Return by ref-arg, libfork call/call with co-await, uses new/delete for alloc
152-
BENCHMARK(fib<await<stack_on_heap>>)->Name("test/libfork/fib/heap/await")->Arg(fib_test);
153-
BENCHMARK(fib<await<stack_on_heap>>)->Name("base/libfork/fib/heap/await")->Arg(fib_base);
169+
BENCHMARK(fib<await<global_alloc>, global_alloc>)->Name("test/libfork/fib/heap/await")->Arg(fib_test);
170+
BENCHMARK(fib<await<global_alloc>, global_alloc>)->Name("base/libfork/fib/heap/await")->Arg(fib_base);
154171

155-
// Same as above but uses tls bump allocator
156-
BENCHMARK(fib<await<tls_bump>>)->Name("test/libfork/fib/tls_bump/await")->Arg(fib_test);
157-
BENCHMARK(fib<await<tls_bump>>)->Name("base/libfork/fib/tls_bump/await")->Arg(fib_base);
158-
159-
// Same as above but with global bump allocator
160-
BENCHMARK(fib<await<global_bump>>)->Name("test/libfork/fib/global_bump/await")->Arg(fib_test);
161-
BENCHMARK(fib<await<global_bump>>)->Name("base/libfork/fib/global_bump/await")->Arg(fib_base);
172+
// // Same as above but uses bump allocator
173+
BENCHMARK(fib<await<linear_alloc>, linear_alloc>)->Name("test/libfork/fib/bump/await")->Arg(fib_test);
174+
BENCHMARK(fib<await<linear_alloc>, linear_alloc>)->Name("base/libfork/fib/bump/await")->Arg(fib_base);
162175

163176
// Return by value
164177
// libfork call/call with co-await
165-
BENCHMARK(fib<ret>)->Name("test/libfork/fib/tls_bump/return")->Arg(fib_test);
166-
BENCHMARK(fib<ret>)->Name("base/libfork/fib/tls_bump/return")->Arg(fib_base);
178+
BENCHMARK(fib<ret<linear_alloc>, linear_alloc>)->Name("test/libfork/fib/bump/return")->Arg(fib_test);
179+
BENCHMARK(fib<ret<linear_alloc>, linear_alloc>)->Name("base/libfork/fib/bump/return")->Arg(fib_base);
167180

168181
// Return by value
169182
// libfork call/fork (no join)
170183
// Non-polymorphic vector-backed context
171-
BENCHMARK(fib<fork_call<vector_ctx>>)->Name("test/libfork/fib/vector_ctx")->Arg(fib_test);
172-
BENCHMARK(fib<fork_call<vector_ctx>>)->Name("base/libfork/fib/vector_ctx")->Arg(fib_base);
184+
BENCHMARK(fib<fork_call<linear_alloc>, linear_alloc>)->Name("test/libfork/fib/vector_ctx")->Arg(fib_test);
185+
BENCHMARK(fib<fork_call<linear_alloc>, linear_alloc>)->Name("base/libfork/fib/vector_ctx")->Arg(fib_base);
186+
187+
using A = poly_vector_ctx<linear_allocator>;
188+
using B = lf::polymorphic_context<linear_allocator>;
173189

174-
BENCHMARK(fib<fork_call<lf::polymorphic_context>>)->Name("test/libfork/fib/poly_vector_ctx")->Arg(fib_test);
175-
BENCHMARK(fib<fork_call<lf::polymorphic_context>>)->Name("base/libfork/fib/poly_vector_ctx")->Arg(fib_base);
190+
// Same as above but with polymorphic contexts.
191+
BENCHMARK(fib<fork_call<B>, A, B>)->Name("test/libfork/fib/poly_vector_ctx")->Arg(fib_test);
192+
BENCHMARK(fib<fork_call<B>, A, B>)->Name("base/libfork/fib/poly_vector_ctx")->Arg(fib_base);

src/core/concepts.cxx

Lines changed: 84 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,24 @@ import std;
55

66
namespace lf {
77

8+
// ========== Specialization ========== //
9+
10+
template <typename T, template <typename...> typename Template>
11+
struct is_specialization_of : std::false_type {};
12+
13+
template <template <typename...> typename Template, typename... Args>
14+
struct is_specialization_of<Template<Args...>, Template> : std::true_type {};
15+
16+
/**
17+
* @brief Test if `T` is a specialization of the template `Template`.
18+
*/
19+
export template <typename T, template <typename...> typename Template>
20+
concept specialization_of = is_specialization_of<std::remove_cvref_t<T>, Template>::value;
21+
22+
// ========== Task constraint related concepts ========== //
23+
24+
// ==== Returnable
25+
826
/**
927
* @brief A type returnable from libfork's async functions/coroutines.
1028
*
@@ -13,31 +31,78 @@ namespace lf {
1331
template <typename T>
1432
concept returnable = std::is_void_v<T> || std::movable<T>;
1533

34+
// ==== Stack
35+
1636
template <typename T>
17-
concept mixinable = std::is_empty_v<T> && !std::is_final_v<T>;
37+
requires std::is_object_v<T>
38+
consteval auto constify(T &&x) noexcept -> std::add_const_t<T> &;
1839

40+
/**
41+
* @brief Defines the API for a libfork compatible stack allocator.
42+
*
43+
* - After construction push is valid.
44+
* - Pop is valid provided the FILO order is respected.
45+
* - Destruction is expected to only occur when the stack is empty.
46+
* - Result of `.checkpoint()` is expected to be "cheap to copy".
47+
* - Switch releases the current stack and resumes from the checkpoint:
48+
* - This is a noop if the checkpoint is from this stack.
49+
* - If the checkpoint is default-constructed it is expected to switch to a new stack.
50+
*
51+
* Fast-path operations: empty, push, pop, checkpoint
52+
* Slow-path operations: switch
53+
*/
1954
export template <typename T>
20-
concept alloc_mixin = mixinable<T> && requires (std::size_t n, T *ptr) {
21-
{ T::operator new(n) } -> std::same_as<void *>;
22-
{ T::operator delete(ptr, n) } noexcept -> std::same_as<void>;
55+
concept stack_allocator = std::is_object_v<T> && requires (T alloc, std::size_t n, void *ptr) {
56+
// { alloc.empty() } noexcept -> std::same_as<bool>;
57+
{ alloc.push(n) } -> std::same_as<void *>;
58+
{ alloc.pop(ptr, n) } noexcept -> std::same_as<void>;
59+
{ alloc.checkpoint() } noexcept -> std::semiregular;
60+
{ alloc.switch_to({}) } noexcept -> std::same_as<void>;
61+
{ alloc.switch_to(constify(alloc.checkpoint())) } noexcept -> std::same_as<void>;
2362
};
2463

25-
template <typename T, template <typename...> typename Template>
26-
struct is_specialization_of : std::false_type {};
64+
/**
65+
* @brief Fetch the checkpoint type of a stack allocator `T`.
66+
*/
67+
template <stack_allocator T>
68+
using checkpoint_t = decltype(std::declval<T &>().checkpoint());
2769

28-
template <template <typename...> typename Template, typename... Args>
29-
struct is_specialization_of<Template<Args...>, Template> : std::true_type {};
70+
// ==== Context
71+
72+
export template <typename T>
73+
class frame_handle;
74+
75+
template <typename T>
76+
concept ref_to_stack_allocator = std::is_lvalue_reference_v<T> && stack_allocator<std::remove_reference_t<T>>;
3077

3178
/**
32-
* @brief Test if `T` is a specialization of the template `Template`.
79+
* @brief Defines the API for a libfork compatible worker context.
80+
*
81+
* This requires that `T` is an object type and supports the following operations:
82+
*
83+
* - Push/pop a frame handle onto the context in a LIFO manner.
84+
* - Have a `stack_allocator` that can be accessed via `alloc()`.
3385
*/
34-
template <typename T, template <typename...> typename Template>
35-
concept specialization_of = is_specialization_of<std::remove_cvref_t<T>, Template>::value;
86+
export template <typename T>
87+
concept worker_context = std::is_object_v<T> && requires (T ctx, frame_handle<T> handle) {
88+
{ ctx.alloc() } noexcept -> ref_to_stack_allocator;
89+
{ ctx.push(handle) } -> std::same_as<void>;
90+
{ ctx.pop() } noexcept -> std::same_as<frame_handle<T>>;
91+
};
92+
93+
/**
94+
* @brief Fetch the allocator type of a worker context `T`.
95+
*/
96+
template <worker_context T>
97+
using allocator_t = std::remove_reference_t<decltype(std::declval<T &>().alloc())>;
98+
99+
// ==== Forward-decl
36100

37-
// Forward-decl
38-
export template <returnable T, alloc_mixin Stack, typename Context>
101+
export template <returnable T, worker_context Context>
39102
struct task;
40103

104+
// ========== Invocability ========== //
105+
41106
/**
42107
* @brief Test if a callable `Fn` when invoked with `Args...` returns an `lf::task`.
43108
*/
@@ -50,9 +115,12 @@ concept async_invocable =
50115
*/
51116
export template <typename Fn, typename... Args>
52117
requires async_invocable<Fn, Args...>
53-
using async_result_t = std::invoke_result_t<Fn, Args...>::type;
118+
using async_result_t = std::invoke_result_t<Fn, Args...>::value_type;
54119

55-
template <typename Fn, typename R, typename... Args>
56-
concept async_invocable_to = async_invocable<Fn, Args...> && std::same_as<async_result_t<Fn, Args...>, R>;
120+
/**
121+
* @brief Subsumes `async_invocable` and checks the result type is `R`.
122+
*/
123+
export template <typename Fn, typename R, typename... Args>
124+
concept async_invocable_to = async_invocable<Fn, Args...> && std::same_as<R, async_result_t<Fn, Args...>>;
57125

58126
} // namespace lf

0 commit comments

Comments
 (0)