1919#include < ylt/util/tl/expected.hpp>
2020
2121#include " allocation_strategy.h"
22+ #include " count_min_sketch.h"
2223#include " master_metric_manager.h"
2324#include " mutex.h"
2425#include " segment.h"
@@ -456,6 +457,34 @@ class MasterService {
456457 const std::vector<StorageObjectMetadata>& metadatas)
457458 -> tl::expected<void, ErrorCode>;
458459
460+ /* *
461+ * @brief Heartbeat-driven pull of pending promotion work for a client.
462+ * Returns the per-client promotion_objects map (key -> object size) and
463+ * clears it. The per-shard promotion_tasks map remains populated as the
464+ * source of truth until NotifyPromotionSuccess commits the new MEMORY
465+ * replica.
466+ */
467+ auto PromotionObjectHeartbeat (const UUID& client_id)
468+ -> tl::expected<std::unordered_map<std::string, int64_t>, ErrorCode>;
469+
470+ /* *
471+ * @brief Stage a PROCESSING MEMORY replica for an existing key. Allocates
472+ * DRAM via the existing AllocationStrategy, optionally biased toward the
473+ * caller's local memory segment via preferred_segments. The new replica is
474+ * invisible to readers until NotifyPromotionSuccess flips it to COMPLETE.
475+ */
476+ auto PromotionAllocStart (const std::string& key, uint64_t size,
477+ const std::vector<std::string>& preferred_segments)
478+ -> tl::expected<PromotionAllocStartResponse, ErrorCode>;
479+
480+ /* *
481+ * @brief Commit a staged MEMORY replica to COMPLETE; decrement source
482+ * refcnt; erase per-shard and per-client task entries. Mirror of
483+ * NotifyOffloadSuccess.
484+ */
485+ auto NotifyPromotionSuccess (const UUID& client_id, const std::string& key)
486+ -> tl::expected<void, ErrorCode>;
487+
459488 /* *
460489 * @brief Create a copy task to copy an object's replicas to target segments
461490 * @return Copy task ID on success, ErrorCode on failure
@@ -848,6 +877,19 @@ class MasterService {
848877 std::chrono::system_clock::time_point start_time;
849878 };
850879
880+ // Tracks an in-flight LOCAL_DISK -> MEMORY copy. The source LOCAL_DISK
881+ // replica is refcnt-pinned for the duration of the task so it cannot be
882+ // evicted. alloc_id pins down which staged replica
883+ // NotifyPromotionSuccess should commit, so a concurrent Put creating
884+ // another PROCESSING MEMORY replica cannot be confused with ours.
885+ // alloc_id is 0 until PromotionAllocStart records the new replica.
886+ struct PromotionTask {
887+ ReplicaID source_id; // the LOCAL_DISK replica being promoted
888+ ReplicaID alloc_id{0 }; // the new MEMORY replica staged by AllocStart
889+ uint64_t object_size;
890+ std::chrono::system_clock::time_point start_time;
891+ };
892+
851893 static constexpr size_t kNumShards = 1024 ; // Number of metadata shards
852894
853895 // Sharded metadata maps and their mutexes
@@ -860,6 +902,8 @@ class MasterService {
860902 GUARDED_BY (mutex);
861903 std::unordered_map<std::string, const OffloadingTask> offloading_tasks
862904 GUARDED_BY (mutex);
905+ std::unordered_map<std::string, const PromotionTask> promotion_tasks
906+ GUARDED_BY (mutex);
863907 };
864908 std::array<MetadataShard, kNumShards > metadata_shards_;
865909
@@ -935,6 +979,25 @@ class MasterService {
935979 tl::expected<void , ErrorCode> PushOffloadingQueue (const std::string& key,
936980 Replica& replica);
937981
982+ /* *
983+ * @brief Mirror of PushOffloadingQueue for promotion-on-hit. Inserts an
984+ * entry into the holder client's LocalDiskSegment::promotion_objects map.
985+ * Caller is responsible for refcnt-pinning the source replica and
986+ * recording the task in the shard's promotion_tasks map.
987+ */
988+ tl::expected<void , ErrorCode> PushPromotionQueue (const std::string& key,
989+ Replica& source_replica);
990+
991+ /* *
992+ * @brief Helper invoked from GetReplicaList when an only-LOCAL_DISK key is
993+ * observed. Applies the gating chain (frequency / watermark / dedup /
994+ * cap), refcnt-pins the source LOCAL_DISK replica, records a
995+ * PromotionTask, and pushes onto the holder client's promotion_objects
996+ * map. Acquires its own RW shard accessor; safe to call after
997+ * GetReplicaList's RO accessor has been released.
998+ */
999+ void TryPushPromotionQueue (const std::string& key);
1000+
9381001 // Lease related members
9391002 const uint64_t default_kv_lease_ttl_; // in milliseconds
9401003 const uint64_t default_kv_soft_pin_ttl_; // in milliseconds
@@ -1180,6 +1243,16 @@ class MasterService {
11801243 // offload_on_evict_=true)
11811244 bool offload_force_evict_{false };
11821245
1246+ // Promotion-on-hit: opt-in flag enabling LOCAL_DISK -> MEMORY promotion
1247+ // when a Get observes a key with only LOCAL_DISK replicas.
1248+ bool promotion_on_hit_{false };
1249+ uint32_t promotion_admission_threshold_{2 };
1250+ uint32_t promotion_queue_limit_{50000 };
1251+ // Master-side frequency sketch. Constructed only when promotion_on_hit_ is
1252+ // true. CountMinSketch is mutex-protected internally so we can call into it
1253+ // from any GetReplicaList caller without additional locking.
1254+ std::unique_ptr<CountMinSketch> promotion_sketch_;
1255+
11831256 const std::string ha_backend_type_;
11841257
11851258 const std::string ha_backend_connstring_;
0 commit comments