Skip to content

Commit e2dbb16

Browse files
committed
distribution(composed1): break+compact+rename per claude review on PR #894
Three findings from claude's review of PR #894, all valid: * OwnerOf O(N)-vs-O(early-break) on sorted route list. Routes are sorted by Start (UpdateRoute / routesFromCatalog keep them sorted; recordHistorySnapshotLocked clones from that order). Once key < r.Start, no later route can cover key either. `continue` worked but iterated the whole tail; `break` short- circuits. Matters because M3 puts OwnerOf on every txn commit's apply path. * historyOrder backing-array growth on FIFO eviction. The previous `e.historyOrder = e.historyOrder[1:]` only moved the slice header — the head of the original backing array stayed alive across evictions, growing unboundedly. Replace with an explicit `make([]uint64, len-1, historyDepth) + copy` so the array stays bounded at historyDepth. * recordHistorySnapshot lock contract fragility. The function requires the caller to hold e.mu (write lock); the doc said so but nothing in the name signalled it. Renamed to recordHistorySnapshotLocked per the Go convention so a future refactor that moves the call past an Unlock surfaces the contract by name. Test nits also addressed: * TestEngineSnapshotAt_FIFOEviction now calls t.Parallel() and the direct `e.historyDepth = 3` write is documented as test-local (Engine not yet shared with any concurrent reader). * TestKvFSM_WithRouteHistory_NilProviderTreatedAsUnwired asserts shardGroupID=7 explicitly so a future refactor that accidentally zeroes it would be caught. Verification: go test -race -count=1 ./distribution ./kv → pass (1.0 s + 8.1 s). No spec change; no contract change visible to callers outside the distribution package.
1 parent 85056fa commit e2dbb16

3 files changed

Lines changed: 44 additions & 13 deletions

File tree

distribution/engine.go

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ func NewEngine() *Engine {
8080
func NewEngineWithDefaultRoute() *Engine {
8181
engine := NewEngine()
8282
engine.UpdateRoute([]byte(""), nil, defaultGroupID)
83-
engine.recordHistorySnapshot()
83+
engine.recordHistorySnapshotLocked()
8484
return engine
8585
}
8686

@@ -135,7 +135,7 @@ func (e *Engine) ApplySnapshot(snapshot CatalogSnapshot) error {
135135

136136
e.routes = routes
137137
e.catalogVersion = snapshot.Version
138-
e.recordHistorySnapshot()
138+
e.recordHistorySnapshotLocked()
139139
return nil
140140
}
141141

@@ -157,10 +157,19 @@ func (s RouteHistorySnapshot) Version() uint64 { return s.version }
157157
// pre-bootstrap state or an explicitly-uncovered range). Mirrors
158158
// Engine.GetRoute's right-half-open interval semantics but against
159159
// the historical snapshot, not the live engine state.
160+
//
161+
// Routes are sorted by Start (recordHistorySnapshotLocked clones from
162+
// e.routes, which Engine.UpdateRoute / routesFromCatalog keep sorted),
163+
// so the scan can break the moment key < r.Start — every later route
164+
// has a strictly greater Start and cannot cover key either. This
165+
// matters because M3 puts OwnerOf on every txn commit's apply path
166+
// (claude review on PR #894 — break-vs-continue lifts the worst-case
167+
// scan from O(N) to "first non-covering gap" without changing the
168+
// resolution semantics).
160169
func (s RouteHistorySnapshot) OwnerOf(key []byte) (uint64, bool) {
161170
for _, r := range s.routes {
162171
if bytes.Compare(key, r.Start) < 0 {
163-
continue
172+
break
164173
}
165174
if r.End != nil && bytes.Compare(key, r.End) >= 0 {
166175
continue
@@ -191,12 +200,15 @@ func (e *Engine) HistoryDepth() int {
191200
return e.historyDepth
192201
}
193202

194-
// recordHistorySnapshot pushes the current (catalogVersion, routes)
195-
// pair into the ring. Caller MUST hold e.mu (write lock) — invoked
196-
// from ApplySnapshot under the write lock, and from
197-
// NewEngineWithDefaultRoute before the Engine is shared with any
198-
// concurrent reader. Idempotent on re-record at the same version.
199-
func (e *Engine) recordHistorySnapshot() {
203+
// recordHistorySnapshotLocked pushes the current (catalogVersion,
204+
// routes) pair into the ring. The `Locked` suffix is the Go
205+
// convention for "caller MUST hold the receiver's lock" — checked
206+
// by reviewers via name, not by the runtime (claude review on PR
207+
// #894 — fragile lock contract). Invoked from ApplySnapshot under
208+
// the write lock and from NewEngineWithDefaultRoute before the
209+
// Engine is shared with any concurrent reader. Idempotent on
210+
// re-record at the same version.
211+
func (e *Engine) recordHistorySnapshotLocked() {
200212
if e.history == nil {
201213
// Engines constructed via the bare struct literal (e.g.
202214
// internal test seams) — no history ring configured. Skip
@@ -210,7 +222,17 @@ func (e *Engine) recordHistorySnapshot() {
210222
}
211223
if len(e.historyOrder) >= e.historyDepth {
212224
evict := e.historyOrder[0]
213-
e.historyOrder = e.historyOrder[1:]
225+
// Copy the retained tail into fresh storage rather than
226+
// reslicing. `historyOrder[1:]` only advances the slice
227+
// header — the head of the original backing array stays
228+
// alive and grows unboundedly across evictions. At depth=32
229+
// this is small, but the FIFO eviction is the only place
230+
// the array grows, and the compaction is free (single
231+
// allocation, single copy of <=historyDepth entries —
232+
// claude review on PR #894 — backing-array leak).
233+
retained := make([]uint64, len(e.historyOrder)-1, e.historyDepth)
234+
copy(retained, e.historyOrder[1:])
235+
e.historyOrder = retained
214236
delete(e.history, evict)
215237
}
216238
cloned := make([]Route, len(e.routes))

distribution/engine_test.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -559,8 +559,14 @@ func TestEngineSnapshotAt_PreservesHistoryAcrossVersions(t *testing.T) {
559559
// test pins the eviction order so a future depth change does not
560560
// silently break the M3 contract.
561561
func TestEngineSnapshotAt_FIFOEviction(t *testing.T) {
562+
t.Parallel()
562563
e := NewEngine()
563-
// Force a tiny depth so the test is bounded and explicit.
564+
// Force a tiny depth so the test is bounded and explicit. The
565+
// direct field write is safe because `e` is local to this test
566+
// goroutine and the depth is set before any ApplySnapshot fires;
567+
// once the Engine is published to concurrent readers, the depth
568+
// would have to flow through a constructor option (claude review
569+
// on PR #894 — fragile-but-test-local lock contract).
564570
e.historyDepth = 3
565571

566572
for v := uint64(1); v <= 5; v++ {

kv/fsm_route_history_test.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,9 @@ func TestKvFSM_WithRouteHistory_NilProviderTreatedAsUnwired(t *testing.T) {
9292
// shardGroupID is still recorded — the design doc says zero is
9393
// the "unset" sentinel but a caller that explicitly passes a
9494
// non-zero shardGroupID with a nil provider gets the gate
95-
// short-circuited anyway via the nil routes check. No invariant
96-
// to assert on the ID here.
95+
// short-circuited anyway via the nil routes check. Assert the
96+
// non-zero ID survives so a future refactor that accidentally
97+
// zeroes it would be caught (claude review on PR #894).
98+
require.Equal(t, uint64(7), fsm.shardGroupID,
99+
"WithRouteHistory(nil, 7) must still record shardGroupID=7; the M3 nil-routes short-circuit fires before the ID is consulted")
97100
}

0 commit comments

Comments
 (0)