ModelEngine-Group
diff --git a/‎ucm/transport/kv/asu/common/task_manager_base.h‎
Lines changed: 200 additions & 24 deletions b/‎ucm/transport/kv/asu/common/task_manager_base.h‎
Lines changed: 200 additions & 24 deletions
@@ -24,69 +24,245 @@
 #pragma once
 
 #include <atomic>
+#include <cstddef>
+#include <cstdint>
+#include <limits>
 #include <memory>
-#include <mutex>
 #include <string>
-#include <unordered_map>
 #include <utility>
+#include <vector>
+#include <functional>
+
 #include "asu_transport/types.h"
 
 namespace UC::ASU {
 
 template <typename Context, typename State>
 class TaskManagerBase {
 public:
-    TaskManagerBase(State initial_state, std::string task_name)
-        : initial_state_(initial_state), task_name_(std::move(task_name))
+    static constexpr std::size_t kMinSlotCount = 1024;
+    static constexpr std::size_t kDefaultSlotCount = 8192;
+
+    static std::size_t RecommendSlotCount(std::size_t max_inflight_tasks)
+    {
+        // Keep load factor <= 0.5 for open addressing.
+        // For example: 4096 inflight tasks -> 8192 slots.
+        const auto required = std::max<std::size_t>(
+            kMinSlotCount,
+            max_inflight_tasks * 2);
+        return NormalizeSlotCount(required);
+    }
+
+    explicit TaskManagerBase(
+        State initial_state,
+        std::string task_name,
+        std::size_t slot_count = kDefaultSlotCount)
+        : initial_state_(initial_state),
+          task_name_(std::move(task_name)),
+          slots_(NormalizeSlotCount(slot_count)),
+          slot_mask_(slots_.size() - 1)  // Used for efficient slot index calculation: bitwise modulo
     {
     }
 
     Status Submit(std::unique_ptr<Context> ctx, TaskId& task_id)
     {
         if (!ctx) {
             task_id = kInvalidTaskId;
-            return Status::Error(StatusCode::INVALID_ARGUMENT,
-                                 task_name_ + " task context is null");
+            return Status::Error(
+                StatusCode::INVALID_ARGUMENT,
+                task_name_ + " task context is null");
         }
 
         auto shared_ctx = std::shared_ptr<Context>(std::move(ctx));
         shared_ctx->state.store(initial_state_, std::memory_order_release);
 
-        std::lock_guard<std::mutex> lock(mu_);
+        TaskId new_task_id = kInvalidTaskId;
         do {
-            task_id = next_task_id_.fetch_add(1, std::memory_order_relaxed);
-        } while (task_id == kInvalidTaskId || tasks_.find(task_id) != tasks_.end());
+            new_task_id = next_task_id_.fetch_add(1, std::memory_order_relaxed);
+        } while (new_task_id == kInvalidTaskId);  // kInvalidTaskId is 0, so task id starts from 1 to avoid allocating invalid IDs
+
+        shared_ctx->task_id = new_task_id;
+
+        const auto start = Hash(new_task_id) & slot_mask_;
+        const auto capacity = slots_.size();
+
+        for (std::size_t probe = 0; probe < capacity; ++probe) {
+            auto& slot = slots_[(start + probe) & slot_mask_];
+
+            // CAS: Try to transition EMPTY → WRITING
+            std::uint8_t expected = SlotState::EMPTY;
+            if (!slot.state.compare_exchange_strong(
+                    expected,
+                    SlotState::WRITING,
+                    std::memory_order_acq_rel,
+                    std::memory_order_acquire)) {
+                continue;
+            }
+
+            AtomicStoreCtx(slot, shared_ctx, std::memory_order_release);
+            slot.task_id.store(new_task_id, std::memory_order_release);
+            slot.state.store(SlotState::READY, std::memory_order_release);
+
+            task_id = new_task_id;
+            return Status::OK();
+        }
+
+        task_id = kInvalidTaskId;
 
-        shared_ctx->task_id = task_id;
-        tasks_.emplace(task_id, std::move(shared_ctx));
-        return Status::OK();
+        // Consider adding RESOURCE_EXHAUSTED / NO_SPACE error codes to StatusCode
+        return Status::Error(
+            StatusCode::INVALID_ARGUMENT,
+            task_name_ + " task table is full");
     }
 
     std::shared_ptr<Context> Get(TaskId task_id)
     {
-        std::lock_guard<std::mutex> lock(mu_);
-        auto iter = tasks_.find(task_id);
-        if (iter == tasks_.end()) { return nullptr; }
-        return iter->second;
+        if (task_id == kInvalidTaskId) {
+            return nullptr;
+        }
+
+        const auto start = Hash(task_id) & slot_mask_;
+        const auto capacity = slots_.size();
+
+        for (std::size_t probe = 0; probe < capacity; ++probe) {
+            auto& slot = slots_[(start + probe) & slot_mask_];
+
+            const auto state1 = slot.state.load(std::memory_order_acquire);
+            if (state1 != SlotState::READY) {
+                continue;
+            }
+
+            const auto id1 = slot.task_id.load(std::memory_order_acquire);
+            if (id1 != task_id) {
+                continue;
+            }
+
+            auto ptr = AtomicLoadCtx(slot, std::memory_order_acquire);
+            if (!ptr) {
+                continue;
+            }
+
+            // Double-check to avoid returning a ctx from a reused slot.
+            const auto id2 = slot.task_id.load(std::memory_order_acquire);
+            const auto state2 = slot.state.load(std::memory_order_acquire);
+
+            if (state2 == SlotState::READY &&
+                id2 == task_id &&
+                ptr->task_id == task_id) {
+                return ptr;
+            }
+        }
+
+        return nullptr;
     }
 
     Status Remove(TaskId task_id)
     {
-        std::lock_guard<std::mutex> lock(mu_);
-        auto erased = tasks_.erase(task_id);
-        if (erased == 0) {
-            return Status::Error(StatusCode::TASK_NOT_FOUND, task_name_ + " task not found");
+        if (task_id == kInvalidTaskId) {
+            return Status::Error(
+                StatusCode::TASK_NOT_FOUND,
+                task_name_ + " task not found");
+        }
+
+        const auto start = Hash(task_id) & slot_mask_;
+        const auto capacity = slots_.size();
+
+        for (std::size_t probe = 0; probe < capacity; ++probe) {
+            auto& slot = slots_[(start + probe) & slot_mask_];
+
+            const auto state = slot.state.load(std::memory_order_acquire);
+            if (state != SlotState::READY) {
+                continue;  // Only process slots in READY state
+            }
+
+            const auto id = slot.task_id.load(std::memory_order_acquire);
+            if (id != task_id) {
+                continue;
+            }
+
+            std::uint8_t expected = SlotState::READY;
+            if (!slot.state.compare_exchange_strong(
+                    expected,
+                    SlotState::REMOVING,
+                    std::memory_order_acq_rel,
+                    std::memory_order_acquire)) {
+                continue;  // CAS failed, continue probing
+            }
+
+            AtomicStoreCtx(slot, std::shared_ptr<Context>{}, std::memory_order_release);
+            slot.task_id.store(kInvalidTaskId, std::memory_order_release);
+            slot.state.store(SlotState::EMPTY, std::memory_order_release);
+
+            return Status::OK();
+        }
+
+        return Status::Error(
+            StatusCode::TASK_NOT_FOUND,
+            task_name_ + " task not found");
+    }
+
+private:
+    struct SlotState {
+        static constexpr std::uint8_t EMPTY = 0;
+        static constexpr std::uint8_t WRITING = 1;
+        static constexpr std::uint8_t READY = 2;
+        static constexpr std::uint8_t REMOVING = 3;
+    };
+    // Ensure each Slot is aligned to 64 bytes to avoid False Sharing
+    struct alignas(64) Slot {
+        std::atomic<std::uint8_t> state{SlotState::EMPTY};
+        std::atomic<TaskId> task_id{kInvalidTaskId};
+
+        // Use atomic_load/atomic_store free functions for shared_ptr.
+        // This avoids requiring C++20 std::atomic<std::shared_ptr<T>>.
+        std::shared_ptr<Context> ctx;
+    };
+
+private:
+    static std::size_t NormalizeSlotCount(std::size_t n)
+    {
+        n = std::max<std::size_t>(n, kMinSlotCount);
+
+        std::size_t power = 1;
+        while (power < n) {
+            if (power > (std::numeric_limits<std::size_t>::max() >> 1)) {
+                return power;
+            }
+            power <<= 1;
         }
-        return Status::OK();
+
+        return power;
+    }
+
+    static std::size_t Hash(TaskId task_id)
+    {
+        return std::hash<TaskId>{}(task_id);
+    }
+
+    // Atomically load shared_ptr<Context> from Slot, ensuring thread safety
+    static std::shared_ptr<Context> AtomicLoadCtx(
+        const Slot& slot,
+        std::memory_order order)
+    {
+        return std::atomic_load_explicit(&slot.ctx, order);
+    }
+
+    // Atomically store task context shared_ctx into slot
+    static void AtomicStoreCtx(
+        Slot& slot,
+        std::shared_ptr<Context> ptr,
+        std::memory_order order)
+    {
+        std::atomic_store_explicit(&slot.ctx, std::move(ptr), order);
     }
 
 private:
     State initial_state_;
     std::string task_name_;
     std::atomic<TaskId> next_task_id_{1};
-    // TODO: consider using a lock-free structure !
-    std::mutex mu_;
-    std::unordered_map<TaskId, std::shared_ptr<Context>> tasks_;
+
+    std::vector<Slot> slots_;
+    std::size_t slot_mask_{0};
 };
 
 }  // namespace UC::ASU