Skip to content

Commit d00a033

Browse files
committed
Add O(1) indexed service slots to execution_context
find_service<T>() and use_service<T>() previously acquired a mutex and walked an O(n) linked list on every call. This becomes a bottleneck when services are looked up frequently at runtime (e.g. per-request timer construction). Add a fixed-size array of 32 atomic service pointers indexed by a per-type slot ID, giving lock-free O(1) reads on the fast path. The linked-list registry remains as a fallback for overflow.
1 parent bf08032 commit d00a033

File tree

5 files changed

+207
-4
lines changed

5 files changed

+207
-4
lines changed

doc/unlisted/execution-contexts.adoc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,13 @@ Service management functions (`use_service`, `make_service`, `find_service`)
271271
are thread-safe. The `shutdown()` and `destroy()` functions are NOT thread-safe
272272
and must only be called during destruction.
273273

274+
=== Performance
275+
276+
The first 32 distinct service types registered across the program benefit from
277+
an O(1) lock-free fast path for `find_service` and `use_service`. Beyond 32
278+
types, lookups fall back to a mutex-protected linear scan. In practice, 32
279+
slots is sufficient for any realistic program.
280+
274281
== When NOT to Use execution_context Directly
275282

276283
Use `execution_context` directly when:
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
//
2+
// Copyright (c) 2026 Michael Vandeberg
3+
//
4+
// Distributed under the Boost Software License, Version 1.0. (See accompanying
5+
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6+
//
7+
// Official repository: https://github.com/cppalliance/capy
8+
//
9+
10+
#ifndef BOOST_CAPY_DETAIL_SERVICE_SLOT_HPP
11+
#define BOOST_CAPY_DETAIL_SERVICE_SLOT_HPP
12+
13+
#include <atomic>
14+
#include <cstddef>
15+
16+
namespace boost {
17+
namespace capy {
18+
namespace detail {
19+
20+
/* Slot ID infrastructure for O(1) service lookup.
21+
22+
Each distinct service type T gets a unique integer index via
23+
service_slot<T>(). The index is assigned on first call from a
24+
global atomic counter and cached in a function-local static.
25+
Cross-DLL safety relies on COMDAT deduplication (same mechanism
26+
as type_id_impl<T>::tag).
27+
*/
28+
29+
inline std::atomic<std::size_t> next_service_slot{0};
30+
31+
template<class T>
32+
std::size_t
33+
service_slot() noexcept
34+
{
35+
static const std::size_t id =
36+
next_service_slot.fetch_add(1, std::memory_order_relaxed);
37+
return id;
38+
}
39+
40+
} // namespace detail
41+
} // namespace capy
42+
} // namespace boost
43+
44+
#endif

include/boost/capy/ex/execution_context.hpp

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,10 @@
1212

1313
#include <boost/capy/detail/config.hpp>
1414
#include <boost/capy/detail/frame_memory_resource.hpp>
15+
#include <boost/capy/detail/service_slot.hpp>
1516
#include <boost/capy/detail/type_id.hpp>
1617
#include <boost/capy/concept/executor.hpp>
18+
#include <atomic>
1719
#include <concepts>
1820
#include <memory>
1921
#include <memory_resource>
@@ -223,6 +225,14 @@ class BOOST_CAPY_DECL
223225
template<class T>
224226
T* find_service() const noexcept
225227
{
228+
auto id = detail::service_slot<T>();
229+
if(id < max_service_slots)
230+
{
231+
auto* p = slots_[id].load(
232+
std::memory_order_acquire);
233+
if(p)
234+
return static_cast<T*>(p);
235+
}
226236
std::lock_guard<std::mutex> lock(mutex_);
227237
return static_cast<T*>(find_impl(detail::type_id<T>()));
228238
}
@@ -255,6 +265,24 @@ class BOOST_CAPY_DECL
255265
"T must derive from service");
256266
static_assert(std::is_constructible<T, execution_context&>::value,
257267
"T must be constructible from execution_context&");
268+
if constexpr(get_key<T>::value)
269+
{
270+
static_assert(
271+
std::is_convertible<T&, typename get_key<T>::type&>::value,
272+
"T& must be convertible to key_type&");
273+
}
274+
275+
// Fast path: O(1) slot lookup
276+
{
277+
auto id = detail::service_slot<T>();
278+
if(id < max_service_slots)
279+
{
280+
auto* p = slots_[id].load(
281+
std::memory_order_acquire);
282+
if(p)
283+
return static_cast<T&>(*p);
284+
}
285+
}
258286

259287
struct impl : factory
260288
{
@@ -263,7 +291,11 @@ class BOOST_CAPY_DECL
263291
detail::type_id<T>(),
264292
get_key<T>::value
265293
? detail::type_id<typename get_key<T>::type>()
266-
: detail::type_id<T>())
294+
: detail::type_id<T>(),
295+
detail::service_slot<T>(),
296+
get_key<T>::value
297+
? detail::service_slot<typename get_key<T>::type>()
298+
: detail::service_slot<T>())
267299
{
268300
}
269301

@@ -325,7 +357,11 @@ class BOOST_CAPY_DECL
325357
detail::type_id<T>(),
326358
get_key<T>::value
327359
? detail::type_id<typename get_key<T>::type>()
328-
: detail::type_id<T>())
360+
: detail::type_id<T>(),
361+
detail::service_slot<T>(),
362+
get_key<T>::value
363+
? detail::service_slot<typename get_key<T>::type>()
364+
: detail::service_slot<T>())
329365
, args_(std::forward<Args>(a)...)
330366
{
331367
}
@@ -505,11 +541,16 @@ class BOOST_CAPY_DECL
505541
detail::type_index t0;
506542
detail::type_index t1;
507543
BOOST_CAPY_MSVC_WARNING_POP
544+
std::size_t slot0;
545+
std::size_t slot1;
508546

509547
factory(
510548
detail::type_info const& t0_,
511-
detail::type_info const& t1_)
549+
detail::type_info const& t1_,
550+
std::size_t s0,
551+
std::size_t s1)
512552
: t0(t0_), t1(t1_)
553+
, slot0(s0), slot1(s1)
513554
{
514555
}
515556

@@ -523,7 +564,7 @@ class BOOST_CAPY_DECL
523564
service& use_service_impl(factory& f);
524565
service& make_service_impl(factory& f);
525566

526-
// warning C4251: std::mutex, std::shared_ptr need dll-interface
567+
// warning C4251: std::mutex, std::shared_ptr, std::atomic need dll-interface
527568
BOOST_CAPY_MSVC_WARNING_PUSH
528569
BOOST_CAPY_MSVC_WARNING_DISABLE(4251)
529570
mutable std::mutex mutex_;
@@ -532,6 +573,12 @@ class BOOST_CAPY_DECL
532573
std::pmr::memory_resource* frame_alloc_ = nullptr;
533574
service* head_ = nullptr;
534575
bool shutdown_ = false;
576+
577+
static constexpr std::size_t max_service_slots = 32;
578+
BOOST_CAPY_MSVC_WARNING_PUSH
579+
BOOST_CAPY_MSVC_WARNING_DISABLE(4251)
580+
std::atomic<service*> slots_[max_service_slots] = {};
581+
BOOST_CAPY_MSVC_WARNING_POP
535582
};
536583

537584
template< typename Derived >

src/ex/execution_context.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ destroy() noexcept
5555
delete p;
5656
p = next;
5757
}
58+
for(auto& s : slots_)
59+
s.store(nullptr, std::memory_order_relaxed);
5860
}
5961

6062
execution_context::service*
@@ -78,7 +80,13 @@ use_service_impl(factory& f)
7880
std::unique_lock<std::mutex> lock(mutex_);
7981

8082
if(auto* p = find_impl(f.t0))
83+
{
84+
if(f.slot0 < max_service_slots)
85+
slots_[f.slot0].store(p, std::memory_order_release);
86+
if(f.slot0 != f.slot1 && f.slot1 < max_service_slots)
87+
slots_[f.slot1].store(p, std::memory_order_release);
8188
return *p;
89+
}
8290

8391
lock.unlock();
8492

@@ -91,13 +99,22 @@ use_service_impl(factory& f)
9199

92100
if(auto* p = find_impl(f.t0))
93101
{
102+
if(f.slot0 < max_service_slots)
103+
slots_[f.slot0].store(p, std::memory_order_release);
104+
if(f.slot0 != f.slot1 && f.slot1 < max_service_slots)
105+
slots_[f.slot1].store(p, std::memory_order_release);
94106
delete sp;
95107
return *p;
96108
}
97109

98110
sp->next_ = head_;
99111
head_ = sp;
100112

113+
if(f.slot0 < max_service_slots)
114+
slots_[f.slot0].store(sp, std::memory_order_release);
115+
if(f.slot0 != f.slot1 && f.slot1 < max_service_slots)
116+
slots_[f.slot1].store(sp, std::memory_order_release);
117+
101118
return *sp;
102119
}
103120

@@ -141,6 +158,11 @@ make_service_impl(factory& f)
141158
p->next_ = head_;
142159
head_ = p;
143160

161+
if(f.slot0 < max_service_slots)
162+
slots_[f.slot0].store(p, std::memory_order_release);
163+
if(f.slot0 != f.slot1 && f.slot1 < max_service_slots)
164+
slots_[f.slot1].store(p, std::memory_order_release);
165+
144166
return *p;
145167
}
146168

test/unit/ex/execution_context.cpp

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,85 @@ struct execution_context_test
387387
BOOST_TEST_NE(new_mr, default_mr);
388388
}
389389

390+
void
391+
testSlotLookupConsistency()
392+
{
393+
// Verify that find_service returns the same pointer
394+
// whether from the slot fast path or linked list fallback.
395+
test_io_context ctx;
396+
397+
auto& svc = ctx.make_service<simple_service>(77);
398+
auto* p1 = ctx.find_service<simple_service>();
399+
auto* p2 = ctx.find_service<simple_service>();
400+
401+
BOOST_TEST_NE(p1, nullptr);
402+
BOOST_TEST_EQ(p1, p2);
403+
BOOST_TEST_EQ(p1, &svc);
404+
}
405+
406+
void
407+
testSlotKeyTypeLookup()
408+
{
409+
// Verify slot lookup works for both concrete and key_type.
410+
test_io_context ctx;
411+
412+
ctx.make_service<derived_service>(55);
413+
414+
auto* p1 = ctx.find_service<derived_service>();
415+
BOOST_TEST_NE(p1, nullptr);
416+
BOOST_TEST_EQ(p1->value, 55);
417+
418+
auto* p2 = ctx.find_service<base_service>();
419+
BOOST_TEST_NE(p2, nullptr);
420+
BOOST_TEST_EQ(p2->get_value(), 55);
421+
422+
// Both should point to the same object
423+
BOOST_TEST_EQ(
424+
static_cast<base_service*>(p1), p2);
425+
}
426+
427+
void
428+
testUseServiceSlotFastPath()
429+
{
430+
// Verify use_service fast path returns same instance.
431+
test_io_context ctx;
432+
433+
auto& svc1 = ctx.use_service<simple_service>();
434+
auto& svc2 = ctx.use_service<simple_service>();
435+
436+
BOOST_TEST_EQ(&svc1, &svc2);
437+
}
438+
439+
void
440+
testConcurrentUseServiceSlots()
441+
{
442+
// Stress test: many threads calling use_service simultaneously.
443+
// All must get the same service instance.
444+
test_io_context ctx;
445+
constexpr int num_threads = 16;
446+
std::atomic<simple_service*> results[num_threads] = {};
447+
448+
std::vector<std::thread> threads;
449+
threads.reserve(num_threads);
450+
451+
for(int i = 0; i < num_threads; ++i)
452+
{
453+
threads.emplace_back([&ctx, &results, i]{
454+
auto& svc = ctx.use_service<simple_service>();
455+
results[i].store(&svc,
456+
std::memory_order_relaxed);
457+
});
458+
}
459+
460+
for(auto& t : threads)
461+
t.join();
462+
463+
auto* expected = results[0].load();
464+
BOOST_TEST_NE(expected, nullptr);
465+
for(int i = 1; i < num_threads; ++i)
466+
BOOST_TEST_EQ(results[i].load(), expected);
467+
}
468+
390469
void
391470
run()
392471
{
@@ -406,6 +485,10 @@ struct execution_context_test
406485
testGetFrameAllocator();
407486
testSetFrameAllocatorRawPointer();
408487
testSetFrameAllocatorTemplate();
488+
testSlotLookupConsistency();
489+
testSlotKeyTypeLookup();
490+
testUseServiceSlotFastPath();
491+
testConcurrentUseServiceSlots();
409492
}
410493
};
411494

0 commit comments

Comments
 (0)