@@ -7,6 +7,7 @@ package mmaprototype
77
88import (
99 "context"
10+ "slices"
1011
1112 "github.com/cockroachdb/cockroach/pkg/roachpb"
1213 "github.com/cockroachdb/cockroach/pkg/util/log"
@@ -71,29 +72,59 @@ func NoopMMARebalanceAdvisor() *MMARebalanceAdvisor {
7172// duplicate storeIDs. It is up to computeMeansForStoreSet to handle
7273// de-duplication of storeIDs from the cands list.
7374//
75+ // Callers may pass storeIDs (in `existing` or `cands`) that MMA's
76+ // clusterState has not yet seen. This happens during startup, when the
77+ // legacy allocator's StorePool reflects gossiped store descriptors that
78+ // MMA has not yet been notified of via SetStore. Such stores are filtered
79+ // out of `cands` before computing means; if `existing` itself is unknown,
80+ // a NoopMMARebalanceAdvisor is returned because MMA has no load history
81+ // against which to judge candidates. The same asymmetry is acknowledged
82+ // in updateStoreStatuses, which logs and skips unknown stores rather
83+ // than panicking. See #170703.
84+ //
7485// The returned advisor should be passed to IsInConflictWithMMA as a helper to
7586// determine if a candidate is vetoed by the multi-metric allocator due to
7687// running counter to its goals.
7788func (a * allocatorState ) BuildMMARebalanceAdvisor (
78- existing roachpb.StoreID , cands []roachpb.StoreID ,
89+ ctx context. Context , existing roachpb.StoreID , cands []roachpb.StoreID ,
7990) * MMARebalanceAdvisor {
8091 // a.cs is mutated by gossip-driven callbacks (e.g. ProcessStoreLoadMsg) and
8192 // must only be accessed under a.mu. The other public methods on
8293 // allocatorState follow this discipline.
8394 a .mu .Lock ()
8495 defer a .mu .Unlock ()
96+ if ! a .cs .hasStore (existing ) {
97+ // MMA has no load history for the source store, so it cannot judge
98+ // whether any candidate is more overloaded than existing. Fall back to
99+ // a no-op advisor rather than risk a misclassification (or, before the
100+ // hasStore filter on cands below was added, a nil-pointer panic).
101+ log .KvDistribution .VEventf (ctx , 2 ,
102+ "mma skipping advisor: existing store s%d not yet known to mma" , existing )
103+ return NoopMMARebalanceAdvisor ()
104+ }
105+ // Drop any cand the integration layer learned about (via gossip / StorePool)
106+ // before MMA did. computeMeansForStoreSet would otherwise nil-deref on the
107+ // missing storeLoad. In the steady state every cand is known, so the
108+ // IndexFunc walk completes without allocating; only when an unknown cand
109+ // is present do we copy and compact.
110+ if slices .IndexFunc (cands , a .cs .notHasStore ) != - 1 {
111+ cp := make ([]roachpb.StoreID , len (cands ))
112+ copy (cp , cands )
113+ cands = slices .DeleteFunc (cp , a .cs .notHasStore )
114+ }
85115 // TODO(wenyihu6): for simplicity, we create a new scratchNodes every call.
86116 // We should reuse the scratchNodes instead.
87117 scratchNodes := map [roachpb.NodeID ]* NodeLoad {}
88118 scratchStores := map [roachpb.StoreID ]struct {}{}
89119 cands = append (cands , existing )
90120 means , ok := computeMeansForStoreSet (a .cs , cands , scratchNodes , scratchStores )
91121 if ! ok {
92- // Unreachable: cands always contains at least `existing`. Assert in
93- // test builds; in production, fall back to a no-op advisor rather than
94- // return a zero-valued means that would misclassify stores. Gating on
95- // !ok avoids variadic arg boxing on the success path.
96- assertTruef (context .Background (), false , "computeMeansForStoreSet returned !ok for non-empty cands=%v" , cands )
122+ // Unreachable: cands always contains at least `existing`, which we
123+ // just verified is known to MMA. Assert in test builds; in production,
124+ // fall back to a no-op advisor rather than return a zero-valued means
125+ // that would misclassify stores. Gating on !ok avoids variadic arg
126+ // boxing on the success path.
127+ assertTruef (ctx , false , "computeMeansForStoreSet returned !ok for non-empty cands=%v" , cands )
97128 return NoopMMARebalanceAdvisor ()
98129 }
99130 return & MMARebalanceAdvisor {
0 commit comments