From e2a5a896ab4a845b778d8ed142c7bf702487d54c Mon Sep 17 00:00:00 2001
From: "Yoshiaki Ueda (bootjp)" <contact@bootjp.me>
Date: Tue, 2 Jun 2026 00:40:59 +0900
Subject: [PATCH 1/2] fix(redis): finish option-2 dedup HLC-4 / ctx-parent
 parity
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Picks up the two follow-up items noted in claude[bot]'s PR #887
verdict but deferred from that PR:

  (a) prepareDispatch() takes its bounded ctx from t.server.
      handlerContext() — the server-lifetime ctx, NOT the caller's
      dispatchCtx. The reuseCtx fix on PR #887 round 1 corrected
      runTransactionWithDedup to derive from dispatchCtx; this
      commit applies the symmetric fix to prepareDispatch so a
      disconnected client / retryRedisWrite timeout interrupts the
      prepare+dispatch promptly. Nil-guard falls back to
      handlerContext for test fixtures that construct a txnContext
      without setting ctx.

  (b) dispatchListPushReuse and the listPushCoreWithDedup first-
      attempt site both shipped before the HLC-4 physical-ceiling
      migration and were missed in that pass. dispatchExecReuse
      already uses NextFenced (PR #887 round 1) and so do all
      first-attempt commit_ts allocations through prepareDispatch
      and the post-raise SetQueueAttributes etc. These two
      list-push dedup sites were the last persistence-grade
      Clock().Next() callers in adapter/redis.go — verified by
      grep '\.Clock()\.Next\b' adapter/redis.go now returning
      only the NextFenced variants.

Without HLC-4 parity here, a stale-leader window could let the
list-push dedup path mint a commit_ts that collides with the
successor's freshly-fenced range — defeating the same anomaly
class option-2 was introduced to prevent.

Caller audit per /loop semantic-change rule
============================================

  - prepareDispatch: sole caller is commit() (in this file) and
    firstExecAttempt (in this file). Both honor defer
    prepared.cancel() and check len(prepared.elems) == 0; ctx-
    parent change is additive — same defer-cancel discipline,
    same error mapping. No external callers.

  - dispatchListPushReuse: sole caller is listPushCoreWithDedup
    (in this file). NextFenced error wired through the existing
    (newLen, drop, err) tuple position; the new error is non-
    retryable so retryRedisWrite surfaces it directly to the
    caller — same shape as PR #887 round-1's dispatchExecReuse
    NextFenced wiring.

  - listPushCoreWithDedup first-attempt site: surfaces NextFenced
    error through the existing retryRedisWrite closure return,
    matching the pattern in prepareDispatch.

Validation
==========

  go test ./adapter/ -run 'ListPushDedup|ExecDedup|StandaloneSetDedup' -race -count=2 -> ok 1.2s
  gofmt + go vet + golangci-lint run -> 0 issues

This PR closes the explicitly-noted follow-up from PR #887's
"Note on pre-existing Clock().Next() callers" + the
prepareDispatch ctx-parent deviation the round-1 review marked
for symmetric treatment. No new functionality.
---
 adapter/redis.go | 39 +++++++++++++++++++++++++++++++--------
 1 file changed, 31 insertions(+), 8 deletions(-)

diff --git a/adapter/redis.go b/adapter/redis.go
index 170f0fa7..fa481e4f 100644
--- a/adapter/redis.go
+++ b/adapter/redis.go
@@ -2778,12 +2778,21 @@ func (t *txnContext) prepareDispatch() (preparedTxnDispatch, error) {
 	// non-string keys get a !redis|ttl| element written in the same transaction.
 	ttlElems := t.buildTTLElems()
 
-	// Derive a single redisDispatchTimeout-bounded context covering both the
-	// stream-deletion scans (paginated ScanAt/ExistsAt over StreamEntryScanPrefix)
-	// and the final Dispatch. Without this bound, buildStreamDeletionElems would
-	// run on the server-lifetime handlerContext, leaving its scans uncancellable
-	// from the request side on a slow disk or hot-key pathological commit.
-	ctx, cancel := context.WithTimeout(t.server.handlerContext(), redisDispatchTimeout)
+	// Derive a single redisDispatchTimeout-bounded context covering both
+	// the stream-deletion scans (paginated ScanAt/ExistsAt over
+	// StreamEntryScanPrefix) and the final Dispatch. The parent is the
+	// txnContext's own ctx (the caller's dispatchCtx), not the server-
+	// lifetime handlerContext, so an outer cancellation (client
+	// disconnect, retryRedisWrite timeout) interrupts the prepare+dispatch
+	// promptly instead of waiting the full redisDispatchTimeout — same
+	// rationale as runTransactionWithDedup's reuseCtx fix on PR #887. The
+	// nil-guard falls back to handlerContext for callers that construct a
+	// txnContext without setting ctx (test fixtures).
+	parentCtx := t.ctx
+	if parentCtx == nil {
+		parentCtx = t.server.handlerContext()
+	}
+	ctx, cancel := context.WithTimeout(parentCtx, redisDispatchTimeout)
 
 	streamElems, err := t.buildStreamDeletionElems(ctx)
 	if err != nil {
@@ -3607,7 +3616,16 @@ type reusableListPush struct {
 // fresh meta. Extracted from listPushCoreWithDedup to keep that closure
 // under the cyclop / gocognit / nestif limits.
 func (r *RedisServer) dispatchListPushReuse(ctx context.Context, key []byte, pending *reusableListPush) (newLen int64, drop bool, err error) {
-	commitTS := r.coordinator.Clock().Next()
+	// HLC-4 parity: persistence-grade commit_ts allocation must honor
+	// the physical-ceiling fence so a stale-leader window cannot mint a
+	// timestamp that collides with the successor's. dispatchExecReuse
+	// already uses NextFenced (PR #887 round 1); the two listPush dedup
+	// sites shipped before the HLC-4 migration and were missed in that
+	// pass — see PR #887 verdict for the noted gap.
+	commitTS, allocErr := r.coordinator.Clock().NextFenced()
+	if allocErr != nil {
+		return 0, false, errors.Wrap(allocErr, "redis list-push reuse: allocate commitTS")
+	}
 	_, dispErr := r.coordinator.Dispatch(ctx, &kv.OperationGroup[kv.OP]{
 		IsTxn:        true,
 		StartTS:      pending.startTS,
@@ -3778,7 +3796,12 @@ func (r *RedisServer) listPushCoreWithDedup(ctx context.Context, key []byte, val
 			return err
 		}
 
-		commitTS := r.coordinator.Clock().Next()
+		// HLC-4 parity with prepareDispatch / dispatchExecReuse —
+		// see dispatchListPushReuse above for the rationale.
+		commitTS, allocErr := r.coordinator.Clock().NextFenced()
+		if allocErr != nil {
+			return errors.Wrap(allocErr, "redis list-push first-attempt: allocate commitTS")
+		}
 		ops, updatedMeta, err := buildFn(meta, key, values, commitTS, 0)
 		if err != nil {
 			return err

From 216ca94bdc3cf92ade3332f6bd8e325533c3a5a2 Mon Sep 17 00:00:00 2001
From: "Yoshiaki Ueda (bootjp)" <contact@bootjp.me>
Date: Tue, 2 Jun 2026 13:35:10 +0900
Subject: [PATCH 2/2] fix(docs): strip PR-process refs from inline comments

claude[bot] PR #901 minor #2 (CLAUDE.md convention): inline comments
should state the underlying reason, not reference issue/PR numbers
that rot as the codebase evolves. Dropped "PR #887" / "PR #887
round 1" / "PR #887 verdict" / "PR #887 review" attributions from
the three doc blocks. The WHY content is kept (ctx-parent symmetry,
HLC-4 fence rationale, ErrCeilingExpired non-retryable shape).

The PR description is the durable home for the flake-fix lineage
and the related PR refs.

No behavior change. Comment-only.
---
 adapter/redis.go | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/adapter/redis.go b/adapter/redis.go
index fa481e4f..5f132733 100644
--- a/adapter/redis.go
+++ b/adapter/redis.go
@@ -2784,10 +2784,10 @@ func (t *txnContext) prepareDispatch() (preparedTxnDispatch, error) {
 	// txnContext's own ctx (the caller's dispatchCtx), not the server-
 	// lifetime handlerContext, so an outer cancellation (client
 	// disconnect, retryRedisWrite timeout) interrupts the prepare+dispatch
-	// promptly instead of waiting the full redisDispatchTimeout — same
-	// rationale as runTransactionWithDedup's reuseCtx fix on PR #887. The
-	// nil-guard falls back to handlerContext for callers that construct a
-	// txnContext without setting ctx (test fixtures).
+	// promptly instead of waiting the full redisDispatchTimeout. Symmetric
+	// with the reuseCtx threading in runTransactionWithDedup. The nil-guard
+	// falls back to handlerContext for callers that construct a txnContext
+	// without setting ctx (test fixtures).
 	parentCtx := t.ctx
 	if parentCtx == nil {
 		parentCtx = t.server.handlerContext()
@@ -3282,7 +3282,7 @@ func (r *RedisServer) runTransactionWithDedup(queue []redcon.Command) ([]redisRe
 			// fresh 10 s budget elapses. The earlier "fresh ctx from
 			// handlerContext" pattern (noted in design doc §M3) was strictly
 			// more conservative but wasted resources on a disconnected
-			// client — see PR #887 review.
+			// client.
 			reuseCtx, reuseCancel := context.WithTimeout(dispatchCtx, redisDispatchTimeout)
 			defer reuseCancel()
 			res, drop, dispErr := r.dispatchExecReuse(reuseCtx, pending)
@@ -3618,10 +3618,11 @@ type reusableListPush struct {
 func (r *RedisServer) dispatchListPushReuse(ctx context.Context, key []byte, pending *reusableListPush) (newLen int64, drop bool, err error) {
 	// HLC-4 parity: persistence-grade commit_ts allocation must honor
 	// the physical-ceiling fence so a stale-leader window cannot mint a
-	// timestamp that collides with the successor's. dispatchExecReuse
-	// already uses NextFenced (PR #887 round 1); the two listPush dedup
-	// sites shipped before the HLC-4 migration and were missed in that
-	// pass — see PR #887 verdict for the noted gap.
+	// timestamp that collides with the successor's. The error path
+	// returns ErrCeilingExpired which isRetryableRedisTxnErr classifies
+	// as non-retryable, so it exits retryRedisWrite directly to the
+	// client — same shape as the other persistence-grade Next call
+	// sites in this file.
 	commitTS, allocErr := r.coordinator.Clock().NextFenced()
 	if allocErr != nil {
 		return 0, false, errors.Wrap(allocErr, "redis list-push reuse: allocate commitTS")