@@ -43,6 +43,28 @@ func TestIsTransientLeaderRedisError(t *testing.T) {
4343 {"unrelated error" , io .EOF , false },
4444 {"nil" , nil , false },
4545 {"wrong-type-looking error" , errors .New ("WRONGTYPE op" ), false },
46+ // Suffix-fallback regression cases. These cover the gRPC-
47+ // boundary path: when the coordinator forwards to a remote
48+ // leader and the remote returns ErrLeaderNotFound, gRPC
49+ // flattens it to "rpc error: code = X desc = leader not
50+ // found"; the typed sentinel chain is gone. The Jepsen
51+ // Redis workload (scheduled run 26035515694) saw workers
52+ // crash with `:prefix :rpc` because this case was not
53+ // classified as transient and the raw "rpc error: …"
54+ // reached Carmine.
55+ {"grpc-wrapped leader not found" ,
56+ errors .New ("rpc error: code = Unknown desc = leader not found" ), true },
57+ {"grpc-wrapped not leader" ,
58+ errors .New ("rpc error: code = FailedPrecondition desc = raft engine: not leader" ), true },
59+ {"grpc-wrapped leadership lost" ,
60+ errors .New ("rpc error: code = Aborted desc = raft engine: leadership lost" ), true },
61+ {"grpc-wrapped leadership transfer" ,
62+ errors .New ("rpc error: code = Aborted desc = raft engine: leadership transfer in progress" ), true },
63+ // Suffix discipline: a user-controlled key in the middle
64+ // of the message must NOT trigger a false positive. The kv
65+ // suffix matcher pins this exact scenario; mirror it here.
66+ {"user key embedding 'not leader' in the middle" ,
67+ errors .New ("key: not leader: write conflict" ), false },
4668 }
4769 for _ , tc := range cases {
4870 t .Run (tc .name , func (t * testing.T ) {
@@ -74,6 +96,15 @@ func TestWriteRedisError(t *testing.T) {
7496 errors .New ("WRONGTYPE op" ), "WRONGTYPE op" },
7597 {"generic io.EOF untouched" ,
7698 io .EOF , io .EOF .Error ()},
99+ // Suffix-fallback wire reply regression: the gRPC-wrapped
100+ // "rpc error: code = Unknown desc = leader not found" string
101+ // (the failure mode behind scheduled run 26035515694) must
102+ // gain a NOTLEADER prefix on the Redis wire so Carmine maps
103+ // it to `:prefix :notleader` and the upstream
104+ // jepsen-io/redis with-exceptions catch fires.
105+ {"grpc-wrapped leader-not-found gains NOTLEADER prefix" ,
106+ errors .New ("rpc error: code = Unknown desc = leader not found" ),
107+ "NOTLEADER rpc error: code = Unknown desc = leader not found" },
77108 // Regression: address-mapping gap errors (raft leader known
78109 // but raft→redis address missing in r.leaderRedis) must be
79110 // ERR-prefixed at the source so Carmine maps to :prefix :err
@@ -99,3 +130,45 @@ func TestWriteRedisError(t *testing.T) {
99130 })
100131 }
101132}
133+
134+ // TestHasTransientLeaderSuffix_PinsSentinels closes the gap noted
135+ // at kv/coordinator.go:529 ("A symmetric pin lives in the adapter
136+ // test package"): the adapter's redisLeaderErrorPhrases set must
137+ // stay in sync with the actual .Error() text of every transient-
138+ // leader sentinel the suffix fallback is meant to catch. If a
139+ // sentinel ever gets renamed (e.g. raftengine.ErrLeadershipLost
140+ // becomes "raft engine: leadership lost (xyz)") the kv-side pin
141+ // fails first, but without this adapter-side pin the adapter's
142+ // phrase list could drift silently and the NOTLEADER classification
143+ // would regress to the pre-PR-789 worker-crash failure mode.
144+ //
145+ // Each case calls hasTransientLeaderSuffix(sentinel.Error()) and
146+ // asserts true. Wrapping (errors.Wrap / fmt.Errorf %w) is covered
147+ // by TestIsTransientLeaderRedisError; this test pins the raw
148+ // .Error() strings only.
149+ func TestHasTransientLeaderSuffix_PinsSentinels (t * testing.T ) {
150+ t .Parallel ()
151+ cases := []struct {
152+ name string
153+ msg string
154+ }{
155+ {"adapter.ErrLeaderNotFound" , ErrLeaderNotFound .Error ()},
156+ {"adapter.ErrNotLeader" , ErrNotLeader .Error ()},
157+ {"kv.ErrLeaderNotFound" , kv .ErrLeaderNotFound .Error ()},
158+ {"raftengine.ErrNotLeader" , raftengine .ErrNotLeader .Error ()},
159+ {"raftengine.ErrLeadershipLost" , raftengine .ErrLeadershipLost .Error ()},
160+ {"raftengine.ErrLeadershipTransferInProgress" ,
161+ raftengine .ErrLeadershipTransferInProgress .Error ()},
162+ }
163+ for _ , tc := range cases {
164+ t .Run (tc .name , func (t * testing.T ) {
165+ t .Parallel ()
166+ if ! hasTransientLeaderSuffix (tc .msg ) {
167+ t .Fatalf ("hasTransientLeaderSuffix(%q) = false; " +
168+ "redisLeaderErrorPhrases is out of sync with %s — " +
169+ "a sentinel rename slipped through. Update the " +
170+ "phrase list in adapter/redis.go to match." , tc .msg , tc .name )
171+ }
172+ })
173+ }
174+ }
0 commit comments