Skip to content

Commit 6b7abcf

Browse files
[V4] Baremetal (#54)
* copy in coro.hpp * diff * rename * refactor * tidy * de templatify * !TMP faster * Revert "!TMP faster" This reverts commit ca602b7. * fast * clean up new API * fix order
1 parent 91e14c5 commit 6b7abcf

6 files changed

Lines changed: 163 additions & 58 deletions

File tree

benchmark/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,13 @@ target_sources(libfork_benchmark
4242
src/libfork_benchmark/fib/serial_return.cpp
4343
)
4444

45+
# ---- Baremetal ----
46+
47+
target_sources(libfork_benchmark
48+
PRIVATE
49+
src/libfork_benchmark/fib/baremetal.cpp
50+
)
51+
4552
# ---- Libfork ----
4653

4754
target_sources(libfork_benchmark
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
#include <coroutine>
2+
#include <cstddef>
3+
#include <exception>
4+
#include <memory>
5+
6+
#include <benchmark/benchmark.h>
7+
8+
#include "libfork_benchmark/fib/fib.hpp"
9+
10+
// === Coroutine
11+
12+
namespace {
13+
14+
struct task {
15+
struct promise_type : fib_bump_allocator {
16+
17+
auto get_return_object() -> task { return {std::coroutine_handle<promise_type>::from_promise(*this)}; }
18+
19+
auto initial_suspend() -> std::suspend_always { return {}; }
20+
21+
auto final_suspend() noexcept {
22+
struct final_awaitable : std::suspend_always {
23+
auto await_suspend(std::coroutine_handle<promise_type> h) noexcept -> std::coroutine_handle<> {
24+
25+
std::coroutine_handle<> cont = h.promise().continuation;
26+
27+
h.destroy();
28+
29+
if (cont) {
30+
return cont;
31+
}
32+
33+
return std::noop_coroutine();
34+
}
35+
};
36+
37+
return final_awaitable{};
38+
}
39+
40+
void return_value(std::int64_t val) { *value = val; }
41+
void unhandled_exception() { std::terminate(); }
42+
43+
std::int64_t *value = nullptr;
44+
std::coroutine_handle<> continuation = nullptr;
45+
};
46+
47+
std::coroutine_handle<promise_type> coro;
48+
49+
auto set(std::int64_t &out) -> task & {
50+
coro.promise().value = &out;
51+
return *this;
52+
}
53+
54+
auto await_ready() noexcept -> bool { return false; }
55+
56+
auto await_suspend(std::coroutine_handle<> h) -> std::coroutine_handle<promise_type> {
57+
coro.promise().continuation = h;
58+
return coro;
59+
}
60+
61+
void await_resume() noexcept {}
62+
};
63+
64+
auto fib(std::int64_t n) -> task {
65+
if (n <= 1) {
66+
co_return n;
67+
}
68+
std::int64_t a = 0;
69+
std::int64_t b = 0;
70+
co_await fib(n - 1).set(a);
71+
co_await fib(n - 2).set(b);
72+
co_return a + b;
73+
}
74+
75+
void fib_coro_no_queue(benchmark::State &state) {
76+
77+
std::int64_t n = state.range(0);
78+
std::int64_t expect = fib_ref(n);
79+
80+
state.counters["n"] = static_cast<double>(n);
81+
82+
// 8MB stack
83+
std::unique_ptr buffer = std::make_unique<std::byte[]>(1024 * 1024 * 8);
84+
fib_bump_ptr = buffer.get();
85+
86+
for (auto _ : state) {
87+
benchmark::DoNotOptimize(n);
88+
std::int64_t result = 0;
89+
fib(n).set(result).coro.resume();
90+
CHECK_RESULT(result, expect);
91+
benchmark::DoNotOptimize(result);
92+
}
93+
94+
if (fib_bump_ptr != buffer.get()) {
95+
std::terminate(); // Stack leak
96+
}
97+
}
98+
99+
} // namespace
100+
101+
BENCHMARK(fib_coro_no_queue)->Name("test/baremetal/fib")->Arg(fib_test);
102+
BENCHMARK(fib_coro_no_queue)->Name("base/baremetal/fib")->Arg(fib_base);

benchmark/src/libfork_benchmark/fib/fib.hpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#pragma once
22

3+
#include <bit>
4+
#include <cstddef>
35
#include <cstdint>
46

57
#include "libfork_benchmark/common.hpp"
@@ -27,3 +29,27 @@ constexpr auto fib_ref(std::int64_t n) -> std::int64_t {
2729

2830
return curr;
2931
}
32+
33+
// === Shared Allocator Logic ===
34+
35+
inline constexpr std::size_t k_fib_align = 2 * sizeof(void *);
36+
37+
[[nodiscard]]
38+
inline auto fib_align_size(std::size_t n) -> std::size_t {
39+
return (n + k_fib_align - 1) & ~(k_fib_align - 1);
40+
}
41+
42+
inline thread_local std::byte *fib_bump_ptr = nullptr;
43+
44+
struct fib_bump_allocator {
45+
46+
static auto operator new(std::size_t sz) -> void * {
47+
auto *prev = fib_bump_ptr;
48+
fib_bump_ptr += fib_align_size(sz);
49+
return prev;
50+
}
51+
52+
static auto operator delete(void *p, [[maybe_unused]] std::size_t sz) noexcept -> void {
53+
fib_bump_ptr = std::bit_cast<std::byte *>(p);
54+
}
55+
};

benchmark/src/libfork_benchmark/fib/lf_parts.cpp

Lines changed: 9 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -19,26 +19,6 @@ struct stack_on_heap {
1919
}
2020
};
2121

22-
thread_local static std::byte *sp = nullptr;
23-
24-
[[nodiscard]]
25-
auto align(std::size_t n) -> std::size_t {
26-
return (n + lf::k_new_align - 1) & ~(lf::k_new_align - 1);
27-
}
28-
29-
struct tls_stack {
30-
31-
static auto operator new(std::size_t sz) -> void * {
32-
auto *prev = sp;
33-
sp += align(sz);
34-
return prev;
35-
}
36-
37-
static auto operator delete([[maybe_unused]] void *p, [[maybe_unused]] std::size_t sz) noexcept -> void {
38-
sp = std::bit_cast<std::byte *>(p);
39-
}
40-
};
41-
4222
template <lf::alloc_mixin StackPolicy>
4323
constexpr auto no_await =
4424
[](this auto fib, std::int64_t *ret, std::int64_t n) -> lf::task<void, StackPolicy> {
@@ -50,8 +30,8 @@ constexpr auto no_await =
5030
std::int64_t lhs = 0;
5131
std::int64_t rhs = 0;
5232

53-
fib(&lhs, n - 1).release()->handle().resume();
54-
fib(&rhs, n - 2).release()->handle().resume();
33+
fib(&lhs, n - 1).promise->handle().resume();
34+
fib(&rhs, n - 2).promise->handle().resume();
5535

5636
*ret = lhs + rhs;
5737
};
@@ -82,19 +62,19 @@ void fib(benchmark::State &state) {
8262

8363
std::unique_ptr buffer = std::make_unique<std::byte[]>(1024 * 1024);
8464

85-
sp = buffer.get();
65+
fib_bump_ptr = buffer.get();
8666

8767
for (auto _ : state) {
8868
benchmark::DoNotOptimize(n);
8969
std::int64_t result = 0;
9070

91-
Fn(&result, n).release()->handle().resume();
71+
Fn(&result, n).promise->handle().resume();
9272

9373
CHECK_RESULT(result, expect);
9474
benchmark::DoNotOptimize(result);
9575
}
9676

97-
if (sp != buffer.get()) {
77+
if (fib_bump_ptr != buffer.get()) {
9878
LF_TERMINATE("Stack leak detected");
9979
}
10080
}
@@ -107,8 +87,8 @@ BENCHMARK(fib<no_await<stack_on_heap>>)->Name("base/libfork/fib/heap/no_await")-
10787
BENCHMARK(fib<await<stack_on_heap>>)->Name("test/libfork/fib/heap/await")->Arg(fib_test);
10888
BENCHMARK(fib<await<stack_on_heap>>)->Name("base/libfork/fib/heap/await")->Arg(fib_base);
10989

110-
BENCHMARK(fib<no_await<tls_stack>>)->Name("test/libfork/fib/data/no_await")->Arg(fib_test);
111-
BENCHMARK(fib<no_await<tls_stack>>)->Name("base/libfork/fib/data/no_await")->Arg(fib_base);
90+
BENCHMARK(fib<no_await<fib_bump_allocator>>)->Name("test/libfork/fib/data/no_await")->Arg(fib_test);
91+
BENCHMARK(fib<no_await<fib_bump_allocator>>)->Name("base/libfork/fib/data/no_await")->Arg(fib_base);
11292

113-
BENCHMARK(fib<await<tls_stack>>)->Name("test/libfork/fib/data/await")->Arg(fib_test);
114-
BENCHMARK(fib<await<tls_stack>>)->Name("base/libfork/fib/data/await")->Arg(fib_base);
93+
BENCHMARK(fib<await<fib_bump_allocator>>)->Name("test/libfork/fib/data/await")->Arg(fib_test);
94+
BENCHMARK(fib<await<fib_bump_allocator>>)->Name("base/libfork/fib/data/await")->Arg(fib_base);

src/core/frame.cxx

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,17 @@
1+
module;
2+
#include "libfork/__impl/utils.hpp"
13
export module libfork.core:frame;
24

35
import std;
46

57
namespace lf {
68

79
struct frame_type {
10+
811
frame_type *parent = nullptr;
12+
13+
[[nodiscard]]
14+
constexpr auto handle() LF_HOF(std::coroutine_handle<frame_type>::from_promise(*this))
915
};
1016

1117
static_assert(std::is_standard_layout_v<frame_type>);

src/core/promise.cxx

Lines changed: 13 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -20,19 +20,6 @@ struct promise_type;
2020

2121
// =============== Task =============== //
2222

23-
/**
24-
* @brief `std::unique_ptr` compatible deleter for coroutine promises.
25-
*/
26-
struct promise_deleter {
27-
template <typename T>
28-
constexpr static void operator()(T *ptr) noexcept {
29-
std::coroutine_handle<T>::from_promise(*ptr).destroy();
30-
}
31-
};
32-
33-
template <typename T>
34-
using unique_promise = std::unique_ptr<T, promise_deleter>;
35-
3623
/**
3724
* @brief The return type for libfork's async functions/coroutines.
3825
*
@@ -50,7 +37,9 @@ using unique_promise = std::unique_ptr<T, promise_deleter>;
5037
* \endrst
5138
*/
5239
export template <returnable T, alloc_mixin Stack>
53-
struct task final : unique_promise<promise_type<T, Stack>> {};
40+
struct task {
41+
promise_type<T, Stack> *promise;
42+
};
5443

5544
// =============== Frame-mixin =============== //
5645

@@ -61,13 +50,11 @@ constexpr auto final_suspend(frame_type *frame) -> std::coroutine_handle<> {
6150

6251
frame_type *parent_frame = frame->parent;
6352

64-
{
65-
// Destroy the child frame
66-
unique_promise<frame_type> _{frame};
67-
}
53+
// Destroy the child frame
54+
frame->handle().destroy();
6855

6956
if (parent_frame != nullptr) {
70-
return std::coroutine_handle<frame_type>::from_promise(*parent_frame);
57+
return parent_frame->handle();
7158
}
7259

7360
return std::noop_coroutine();
@@ -81,22 +68,19 @@ struct final_awaitable : std::suspend_always {
8168
}
8269
};
8370

84-
// TODO: can we type-erase T/Policy here?
85-
86-
template <typename T, alloc_mixin StackPolicy>
8771
struct just_awaitable : std::suspend_always {
8872

89-
task<T, StackPolicy> child;
73+
frame_type *child;
9074

9175
template <typename... Us>
9276
auto await_suspend(std::coroutine_handle<promise_type<Us...>> parent) noexcept -> std::coroutine_handle<> {
9377

9478
LF_ASSUME(child != nullptr);
95-
LF_ASSUME(child->frame.parent == nullptr);
79+
LF_ASSUME(child->parent == nullptr);
9680

97-
child->frame.parent = &parent.promise().frame;
81+
child->parent = &parent.promise().frame;
9882

99-
return child.release()->handle();
83+
return child->handle();
10084
}
10185
};
10286

@@ -116,8 +100,8 @@ struct mixin_frame {
116100
// === Called by the compiler === //
117101

118102
template <alloc_mixin P>
119-
static constexpr auto await_transform(task<void, P> child) -> just_awaitable<void, P> {
120-
return {.child = std::move(child)};
103+
constexpr static auto await_transform(task<void, P> child) noexcept -> just_awaitable {
104+
return {.child = &child.promise->frame};
121105
}
122106

123107
constexpr static auto initial_suspend() noexcept -> std::suspend_always { return {}; }
@@ -136,7 +120,7 @@ struct promise_type<void, StackPolicy> : StackPolicy, mixin_frame {
136120

137121
frame_type frame;
138122

139-
constexpr auto get_return_object() -> task<void, StackPolicy> { return {{this, {}}}; }
123+
constexpr auto get_return_object() -> task<void, StackPolicy> { return {.promise = this}; }
140124

141125
constexpr static void return_void() {}
142126
};

0 commit comments

Comments
 (0)