Add O(1) indexed service slots to execution_context

mvandeberg · mvandeberg · commit d00a033898b8 · 2026-04-01T15:53:03.000-06:00
find_service&lt;T&gt;() and use_service&lt;T&gt;() previously acquired a mutex and
walked an O(n) linked list on every call. This becomes a bottleneck when
services are looked up frequently at runtime (e.g. per-request timer
construction). Add a fixed-size array of 32 atomic service pointers
indexed by a per-type slot ID, giving lock-free O(1) reads on the fast
path. The linked-list registry remains as a fallback for overflow.
diff --git a/doc/unlisted/execution-contexts.adoc b/doc/unlisted/execution-contexts.adoc
@@ -271,6 +271,13 @@ Service management functions (`use_service`, `make_service`, `find_service`)
 are thread-safe. The `shutdown()` and `destroy()` functions are NOT thread-safe
 and must only be called during destruction.
 
+=== Performance
+
+The first 32 distinct service types registered across the program benefit from
+an O(1) lock-free fast path for `find_service` and `use_service`. Beyond 32
+types, lookups fall back to a mutex-protected linear scan. In practice, 32
+slots is sufficient for any realistic program.
+
 == When NOT to Use execution_context Directly
 
 Use `execution_context` directly when:
diff --git a/include/boost/capy/detail/service_slot.hpp b/include/boost/capy/detail/service_slot.hpp
@@ -0,0 +1,44 @@
+//
+// Copyright (c) 2026 Michael Vandeberg
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+//
+// Official repository: https://github.com/cppalliance/capy
+//
+
+#ifndef BOOST_CAPY_DETAIL_SERVICE_SLOT_HPP
+#define BOOST_CAPY_DETAIL_SERVICE_SLOT_HPP
+
+#include <atomic>
+#include <cstddef>
+
+namespace boost {
+namespace capy {
+namespace detail {
+
+/* Slot ID infrastructure for O(1) service lookup.
+
+   Each distinct service type T gets a unique integer index via
+   service_slot<T>(). The index is assigned on first call from a
+   global atomic counter and cached in a function-local static.
+   Cross-DLL safety relies on COMDAT deduplication (same mechanism
+   as type_id_impl<T>::tag).
+*/
+
+inline std::atomic<std::size_t> next_service_slot{0};
+
+template<class T>
+std::size_t
+service_slot() noexcept
+{
+    static const std::size_t id =
+        next_service_slot.fetch_add(1, std::memory_order_relaxed);
+    return id;
+}
+
+} // namespace detail
+} // namespace capy
+} // namespace boost
+
+#endif
diff --git a/include/boost/capy/ex/execution_context.hpp b/include/boost/capy/ex/execution_context.hpp
@@ -12,8 +12,10 @@
 
 #include <boost/capy/detail/config.hpp>
 #include <boost/capy/detail/frame_memory_resource.hpp>
+#include <boost/capy/detail/service_slot.hpp>
 #include <boost/capy/detail/type_id.hpp>
 #include <boost/capy/concept/executor.hpp>
+#include <atomic>
 #include <concepts>
 #include <memory>
 #include <memory_resource>
@@ -223,6 +225,14 @@ class BOOST_CAPY_DECL
     template<class T>
     T* find_service() const noexcept
     {
+        auto id = detail::service_slot<T>();
+        if(id < max_service_slots)
+        {
+            auto* p = slots_[id].load(
+                std::memory_order_acquire);
+            if(p)
+                return static_cast<T*>(p);
+        }
         std::lock_guard<std::mutex> lock(mutex_);
         return static_cast<T*>(find_impl(detail::type_id<T>()));
     }
@@ -255,6 +265,24 @@ class BOOST_CAPY_DECL
             "T must derive from service");
         static_assert(std::is_constructible<T, execution_context&>::value,
             "T must be constructible from execution_context&");
+        if constexpr(get_key<T>::value)
+        {
+            static_assert(
+                std::is_convertible<T&, typename get_key<T>::type&>::value,
+                "T& must be convertible to key_type&");
+        }
+
+        // Fast path: O(1) slot lookup
+        {
+            auto id = detail::service_slot<T>();
+            if(id < max_service_slots)
+            {
+                auto* p = slots_[id].load(
+                    std::memory_order_acquire);
+                if(p)
+                    return static_cast<T&>(*p);
+            }
+        }
 
         struct impl : factory
         {
@@ -263,7 +291,11 @@ class BOOST_CAPY_DECL
                     detail::type_id<T>(),
                     get_key<T>::value
                         ? detail::type_id<typename get_key<T>::type>()
-                        : detail::type_id<T>())
+                        : detail::type_id<T>(),
+                    detail::service_slot<T>(),
+                    get_key<T>::value
+                        ? detail::service_slot<typename get_key<T>::type>()
+                        : detail::service_slot<T>())
             {
             }
 
@@ -325,7 +357,11 @@ class BOOST_CAPY_DECL
                     detail::type_id<T>(),
                     get_key<T>::value
                         ? detail::type_id<typename get_key<T>::type>()
-                        : detail::type_id<T>())
+                        : detail::type_id<T>(),
+                    detail::service_slot<T>(),
+                    get_key<T>::value
+                        ? detail::service_slot<typename get_key<T>::type>()
+                        : detail::service_slot<T>())
                 , args_(std::forward<Args>(a)...)
             {
             }
@@ -505,11 +541,16 @@ class BOOST_CAPY_DECL
         detail::type_index t0;
         detail::type_index t1;
         BOOST_CAPY_MSVC_WARNING_POP
+        std::size_t slot0;
+        std::size_t slot1;
 
         factory(
             detail::type_info const& t0_,
-            detail::type_info const& t1_)
+            detail::type_info const& t1_,
+            std::size_t s0,
+            std::size_t s1)
             : t0(t0_), t1(t1_)
+            , slot0(s0), slot1(s1)
         {
         }
 
@@ -523,7 +564,7 @@ class BOOST_CAPY_DECL
     service& use_service_impl(factory& f);
     service& make_service_impl(factory& f);
 
-// warning C4251: std::mutex, std::shared_ptr need dll-interface
+// warning C4251: std::mutex, std::shared_ptr, std::atomic need dll-interface
     BOOST_CAPY_MSVC_WARNING_PUSH
     BOOST_CAPY_MSVC_WARNING_DISABLE(4251)
     mutable std::mutex mutex_;
@@ -532,6 +573,12 @@ class BOOST_CAPY_DECL
     std::pmr::memory_resource* frame_alloc_ = nullptr;
     service* head_ = nullptr;
     bool shutdown_ = false;
+
+    static constexpr std::size_t max_service_slots = 32;
+    BOOST_CAPY_MSVC_WARNING_PUSH
+    BOOST_CAPY_MSVC_WARNING_DISABLE(4251)
+    std::atomic<service*> slots_[max_service_slots] = {};
+    BOOST_CAPY_MSVC_WARNING_POP
 };
 
 template< typename Derived >
diff --git a/src/ex/execution_context.cpp b/src/ex/execution_context.cpp
@@ -55,6 +55,8 @@ destroy() noexcept
         delete p;
         p = next;
     }
+    for(auto& s : slots_)
+        s.store(nullptr, std::memory_order_relaxed);
 }
 
 execution_context::service*
@@ -78,7 +80,13 @@ use_service_impl(factory& f)
     std::unique_lock<std::mutex> lock(mutex_);
 
     if(auto* p = find_impl(f.t0))
+    {
+        if(f.slot0 < max_service_slots)
+            slots_[f.slot0].store(p, std::memory_order_release);
+        if(f.slot0 != f.slot1 && f.slot1 < max_service_slots)
+            slots_[f.slot1].store(p, std::memory_order_release);
         return *p;
+    }
 
     lock.unlock();
 
@@ -91,13 +99,22 @@ use_service_impl(factory& f)
 
     if(auto* p = find_impl(f.t0))
     {
+        if(f.slot0 < max_service_slots)
+            slots_[f.slot0].store(p, std::memory_order_release);
+        if(f.slot0 != f.slot1 && f.slot1 < max_service_slots)
+            slots_[f.slot1].store(p, std::memory_order_release);
         delete sp;
         return *p;
     }
 
     sp->next_ = head_;
     head_ = sp;
 
+    if(f.slot0 < max_service_slots)
+        slots_[f.slot0].store(sp, std::memory_order_release);
+    if(f.slot0 != f.slot1 && f.slot1 < max_service_slots)
+        slots_[f.slot1].store(sp, std::memory_order_release);
+
     return *sp;
 }
 
@@ -141,6 +158,11 @@ make_service_impl(factory& f)
     p->next_ = head_;
     head_ = p;
 
+    if(f.slot0 < max_service_slots)
+        slots_[f.slot0].store(p, std::memory_order_release);
+    if(f.slot0 != f.slot1 && f.slot1 < max_service_slots)
+        slots_[f.slot1].store(p, std::memory_order_release);
+
     return *p;
 }
 
diff --git a/test/unit/ex/execution_context.cpp b/test/unit/ex/execution_context.cpp
@@ -387,6 +387,85 @@ struct execution_context_test
         BOOST_TEST_NE(new_mr, default_mr);
     }
 
+    void
+    testSlotLookupConsistency()
+    {
+        // Verify that find_service returns the same pointer
+        // whether from the slot fast path or linked list fallback.
+        test_io_context ctx;
+
+        auto& svc = ctx.make_service<simple_service>(77);
+        auto* p1 = ctx.find_service<simple_service>();
+        auto* p2 = ctx.find_service<simple_service>();
+
+        BOOST_TEST_NE(p1, nullptr);
+        BOOST_TEST_EQ(p1, p2);
+        BOOST_TEST_EQ(p1, &svc);
+    }
+
+    void
+    testSlotKeyTypeLookup()
+    {
+        // Verify slot lookup works for both concrete and key_type.
+        test_io_context ctx;
+
+        ctx.make_service<derived_service>(55);
+
+        auto* p1 = ctx.find_service<derived_service>();
+        BOOST_TEST_NE(p1, nullptr);
+        BOOST_TEST_EQ(p1->value, 55);
+
+        auto* p2 = ctx.find_service<base_service>();
+        BOOST_TEST_NE(p2, nullptr);
+        BOOST_TEST_EQ(p2->get_value(), 55);
+
+        // Both should point to the same object
+        BOOST_TEST_EQ(
+            static_cast<base_service*>(p1), p2);
+    }
+
+    void
+    testUseServiceSlotFastPath()
+    {
+        // Verify use_service fast path returns same instance.
+        test_io_context ctx;
+
+        auto& svc1 = ctx.use_service<simple_service>();
+        auto& svc2 = ctx.use_service<simple_service>();
+
+        BOOST_TEST_EQ(&svc1, &svc2);
+    }
+
+    void
+    testConcurrentUseServiceSlots()
+    {
+        // Stress test: many threads calling use_service simultaneously.
+        // All must get the same service instance.
+        test_io_context ctx;
+        constexpr int num_threads = 16;
+        std::atomic<simple_service*> results[num_threads] = {};
+
+        std::vector<std::thread> threads;
+        threads.reserve(num_threads);
+
+        for(int i = 0; i < num_threads; ++i)
+        {
+            threads.emplace_back([&ctx, &results, i]{
+                auto& svc = ctx.use_service<simple_service>();
+                results[i].store(&svc,
+                    std::memory_order_relaxed);
+            });
+        }
+
+        for(auto& t : threads)
+            t.join();
+
+        auto* expected = results[0].load();
+        BOOST_TEST_NE(expected, nullptr);
+        for(int i = 1; i < num_threads; ++i)
+            BOOST_TEST_EQ(results[i].load(), expected);
+    }
+
     void
     run()
     {
@@ -406,6 +485,10 @@ struct execution_context_test
         testGetFrameAllocator();
         testSetFrameAllocatorRawPointer();
         testSetFrameAllocatorTemplate();
+        testSlotLookupConsistency();
+        testSlotKeyTypeLookup();
+        testUseServiceSlotFastPath();
+        testConcurrentUseServiceSlots();
     }
 };