Skip to content

Commit 42fa179

Browse files
committed
perf/bench: align asio benches with corosio for apples-to-apples comparison
Add _lockless variants to asio and asio_callback across socket_throughput, socket_latency, local_socket_throughput, local_socket_latency, fan_out, accept_churn, http_server, and timer, mirroring corosio's single-threaded configurations by constructing the io_context with BOOST_ASIO_CONCURRENCY_HINT_UNSAFE. Follows the existing pattern in io_context_bench.cpp. Move throughput byte accounting out of the read loop into a local int64_t accumulator, calling state.add_bytes once after ioc.run() returns. The previous per-read state.add_bytes was an atomic fetch_add on every completion, which added ~20ns × N_reads to the measured elapsed time and structurally disadvantaged the faster library at small chunk sizes. Multithread benches still use atomic aggregation (required for correctness across N runner threads).
1 parent 3fc8c97 commit 42fa179

18 files changed

Lines changed: 1765 additions & 18 deletions

perf/bench/asio/callback/accept_churn_bench.cpp

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <boost/asio/post.hpp>
1717
#include <boost/asio/read.hpp>
1818
#include <boost/asio/write.hpp>
19+
#include <boost/asio/detail/concurrency_hint.hpp>
1920

2021
#include <atomic>
2122
#include <chrono>
@@ -187,6 +188,35 @@ bench_sequential_churn(bench::state& state)
187188
acc.close();
188189
}
189190

191+
void
192+
bench_sequential_churn_lockless(bench::state& state)
193+
{
194+
asio::io_context ioc(BOOST_ASIO_CONCURRENCY_HINT_UNSAFE);
195+
auto acc = make_churn_acceptor( ioc );
196+
auto ep = tcp::endpoint( asio::ip::address_v4::loopback(), acc.local_endpoint().port() );
197+
198+
std::atomic<bool> running{true};
199+
200+
sequential_churn_op op{ioc, acc, ep, running, state.latency(),
201+
state.ops(), {}, {}, {}};
202+
203+
perf::stopwatch total_sw;
204+
205+
op.start();
206+
207+
std::thread timer([&]() {
208+
std::this_thread::sleep_for(std::chrono::duration<double>(state.duration()));
209+
running.store(false, std::memory_order_relaxed);
210+
ioc.stop();
211+
});
212+
213+
ioc.run();
214+
timer.join();
215+
216+
state.set_elapsed(total_sw.elapsed_seconds());
217+
acc.close();
218+
}
219+
190220
// N independent accept loops on separate listeners. Reveals whether
191221
// fd allocation or acceptor state scales linearly under callbacks.
192222
void
@@ -344,6 +374,39 @@ bench_burst_churn(bench::state& state)
344374
acc.close();
345375
}
346376

377+
void
378+
bench_burst_churn_lockless(bench::state& state)
379+
{
380+
int burst_size = static_cast<int>(state.range(0));
381+
state.counters["burst_size"] = burst_size;
382+
383+
asio::io_context ioc(BOOST_ASIO_CONCURRENCY_HINT_UNSAFE);
384+
auto acc = make_churn_acceptor( ioc );
385+
auto ep = tcp::endpoint( asio::ip::address_v4::loopback(), acc.local_endpoint().port() );
386+
387+
std::atomic<bool> running{true};
388+
389+
burst_churn_op op{ioc, acc, ep, running, state.latency(),
390+
state.ops(), burst_size, {}, {}, {},
391+
{}};
392+
393+
perf::stopwatch total_sw;
394+
395+
op.start();
396+
397+
std::thread stopper([&]() {
398+
std::this_thread::sleep_for(std::chrono::duration<double>(state.duration()));
399+
running.store(false, std::memory_order_relaxed);
400+
ioc.stop();
401+
});
402+
403+
ioc.run();
404+
stopper.join();
405+
406+
state.set_elapsed(total_sw.elapsed_seconds());
407+
acc.close();
408+
}
409+
347410
} // anonymous namespace
348411

349412
bench::benchmark_suite
@@ -352,9 +415,12 @@ make_accept_churn_suite()
352415
using F = bench::bench_flags;
353416
return bench::benchmark_suite("accept_churn", F::needs_conntrack_drain)
354417
.add("sequential", bench_sequential_churn)
418+
.add("sequential_lockless", bench_sequential_churn_lockless)
355419
.add("concurrent", bench_concurrent_churn)
356420
.args({1, 4, 16})
357421
.add("burst", bench_burst_churn)
422+
.args({10, 100})
423+
.add("burst_lockless", bench_burst_churn_lockless)
358424
.args({10, 100});
359425
}
360426

perf/bench/asio/callback/fan_out_bench.cpp

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <boost/asio/read.hpp>
1717
#include <boost/asio/steady_timer.hpp>
1818
#include <boost/asio/write.hpp>
19+
#include <boost/asio/detail/concurrency_hint.hpp>
1920

2021
#include <atomic>
2122
#include <chrono>
@@ -466,6 +467,154 @@ bench_concurrent_parents(bench::state& state)
466467
state.set_elapsed(sw.elapsed_seconds());
467468
}
468469

470+
void
471+
bench_fork_join_lockless(bench::state& state)
472+
{
473+
int fan_out = static_cast<int>(state.range(0));
474+
state.counters["fan_out"] = fan_out;
475+
476+
asio::io_context ioc(BOOST_ASIO_CONCURRENCY_HINT_UNSAFE);
477+
478+
std::vector<tcp_socket> clients;
479+
std::vector<tcp_socket> servers;
480+
clients.reserve(fan_out);
481+
servers.reserve(fan_out);
482+
483+
for (int i = 0; i < fan_out; ++i)
484+
{
485+
auto [c, s] = asio_bench::make_socket_pair(ioc);
486+
clients.push_back(std::move(c));
487+
servers.push_back(std::move(s));
488+
}
489+
490+
for (int i = 0; i < fan_out; ++i)
491+
{
492+
auto echo = std::make_shared<echo_server_op>(servers[i]);
493+
echo->start();
494+
}
495+
496+
fork_join_op op{ioc, clients, servers, fan_out, state, {}, {}};
497+
498+
op.start();
499+
500+
std::thread stopper([&]() {
501+
std::this_thread::sleep_for(
502+
std::chrono::duration<double>(state.duration()));
503+
state.stop();
504+
});
505+
506+
perf::stopwatch sw;
507+
ioc.run();
508+
stopper.join();
509+
510+
state.set_elapsed(sw.elapsed_seconds());
511+
}
512+
513+
void
514+
bench_nested_lockless(bench::state& state)
515+
{
516+
int groups = static_cast<int>(state.range(0));
517+
int subs_per_group = 4;
518+
int total_subs = groups * subs_per_group;
519+
520+
state.counters["groups"] = groups;
521+
state.counters["subs_per_group"] = subs_per_group;
522+
523+
asio::io_context ioc(BOOST_ASIO_CONCURRENCY_HINT_UNSAFE);
524+
525+
std::vector<tcp_socket> clients;
526+
std::vector<tcp_socket> servers;
527+
clients.reserve(total_subs);
528+
servers.reserve(total_subs);
529+
530+
for (int i = 0; i < total_subs; ++i)
531+
{
532+
auto [c, s] = asio_bench::make_socket_pair(ioc);
533+
clients.push_back(std::move(c));
534+
servers.push_back(std::move(s));
535+
}
536+
537+
for (int i = 0; i < total_subs; ++i)
538+
{
539+
auto echo = std::make_shared<echo_server_op>(servers[i]);
540+
echo->start();
541+
}
542+
543+
nested_op op{ioc, clients, servers, groups, subs_per_group,
544+
state, {}, {}, {}};
545+
546+
op.start();
547+
548+
std::thread stopper([&]() {
549+
std::this_thread::sleep_for(
550+
std::chrono::duration<double>(state.duration()));
551+
state.stop();
552+
});
553+
554+
perf::stopwatch sw;
555+
ioc.run();
556+
stopper.join();
557+
558+
state.set_elapsed(sw.elapsed_seconds());
559+
}
560+
561+
void
562+
bench_concurrent_parents_lockless(bench::state& state)
563+
{
564+
int num_parents = static_cast<int>(state.range(0));
565+
int fan_out = 16;
566+
int total_subs = num_parents * fan_out;
567+
568+
state.counters["num_parents"] = num_parents;
569+
state.counters["fan_out"] = fan_out;
570+
571+
asio::io_context ioc(BOOST_ASIO_CONCURRENCY_HINT_UNSAFE);
572+
573+
std::vector<tcp_socket> clients;
574+
std::vector<tcp_socket> servers;
575+
clients.reserve(total_subs);
576+
servers.reserve(total_subs);
577+
578+
for (int i = 0; i < total_subs; ++i)
579+
{
580+
auto [c, s] = asio_bench::make_socket_pair(ioc);
581+
clients.push_back(std::move(c));
582+
servers.push_back(std::move(s));
583+
}
584+
585+
for (int i = 0; i < total_subs; ++i)
586+
{
587+
auto echo = std::make_shared<echo_server_op>(servers[i]);
588+
echo->start();
589+
}
590+
591+
std::atomic<int> parents_done{0};
592+
593+
std::vector<std::unique_ptr<parent_fork_join_op>> parent_ops;
594+
parent_ops.reserve(num_parents);
595+
596+
for (int p = 0; p < num_parents; ++p)
597+
{
598+
parent_ops.push_back(
599+
std::make_unique<parent_fork_join_op>(
600+
ioc, clients, servers, p * fan_out, fan_out, num_parents,
601+
state, parents_done));
602+
parent_ops.back()->start();
603+
}
604+
605+
std::thread stopper([&]() {
606+
std::this_thread::sleep_for(
607+
std::chrono::duration<double>(state.duration()));
608+
state.stop();
609+
});
610+
611+
perf::stopwatch sw;
612+
ioc.run();
613+
stopper.join();
614+
615+
state.set_elapsed(sw.elapsed_seconds());
616+
}
617+
469618
} // anonymous namespace
470619

471620
bench::benchmark_suite
@@ -475,9 +624,15 @@ make_fan_out_suite()
475624
return bench::benchmark_suite("fan_out", F::needs_conntrack_drain)
476625
.add("fork_join", bench_fork_join)
477626
.args({1, 4, 16, 64})
627+
.add("fork_join_lockless", bench_fork_join_lockless)
628+
.args({1, 4, 16, 64})
478629
.add("nested", bench_nested)
479630
.args({4, 16})
631+
.add("nested_lockless", bench_nested_lockless)
632+
.args({4, 16})
480633
.add("concurrent_parents", bench_concurrent_parents)
634+
.args({1, 4, 16})
635+
.add("concurrent_parents_lockless", bench_concurrent_parents_lockless)
481636
.args({1, 4, 16});
482637
}
483638

perf/bench/asio/callback/http_server_bench.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <boost/asio/read.hpp>
1515
#include <boost/asio/read_until.hpp>
1616
#include <boost/asio/write.hpp>
17+
#include <boost/asio/detail/concurrency_hint.hpp>
1718

1819
#include <atomic>
1920
#include <chrono>
@@ -181,6 +182,33 @@ bench_single_connection(bench::state& state)
181182
server.close();
182183
}
183184

185+
void
186+
bench_single_connection_lockless(bench::state& state)
187+
{
188+
asio::io_context ioc(BOOST_ASIO_CONCURRENCY_HINT_UNSAFE);
189+
auto [client, server] = asio_bench::make_socket_pair(ioc);
190+
191+
server_op sop{server, {}};
192+
client_op cop{client, state, {}, {}};
193+
194+
sop.start();
195+
cop.start();
196+
197+
std::thread timer([&]() {
198+
std::this_thread::sleep_for(
199+
std::chrono::duration<double>(state.duration()));
200+
state.stop();
201+
});
202+
203+
perf::stopwatch sw;
204+
ioc.run();
205+
timer.join();
206+
207+
state.set_elapsed(sw.elapsed_seconds());
208+
client.close();
209+
server.close();
210+
}
211+
184212
void
185213
bench_concurrent_connections(bench::state& state)
186214
{
@@ -335,6 +363,7 @@ make_http_server_suite()
335363
s.close();
336364
})
337365
.add("single_conn", bench_single_connection)
366+
.add("single_conn_lockless", bench_single_connection_lockless)
338367
.add("concurrent", bench_concurrent_connections)
339368
.args({1, 4, 16, 32})
340369
.add("multithread", bench_multithread)

0 commit comments

Comments
 (0)