@@ -90,7 +90,7 @@ func (c *ShardedCoordinator) Dispatch(ctx context.Context, reqs *OperationGroup[
9090 }
9191
9292 if reqs .IsTxn {
93- return c .dispatchTxn (reqs .StartTS , reqs .CommitTS , reqs .Elems )
93+ return c .dispatchTxn (ctx , reqs .StartTS , reqs .CommitTS , reqs .Elems , reqs . ReadKeys )
9494 }
9595
9696 logs , err := c .requestLogs (reqs )
@@ -193,7 +193,7 @@ func (c *ShardedCoordinator) broadcastToAllGroups(requests []*pb.Request) (*Coor
193193 return & CoordinateResponse {CommitIndex : maxIndex .Load ()}, nil
194194}
195195
196- func (c * ShardedCoordinator ) dispatchTxn (startTS uint64 , commitTS uint64 , elems []* Elem [OP ]) (* CoordinateResponse , error ) {
196+ func (c * ShardedCoordinator ) dispatchTxn (ctx context. Context , startTS uint64 , commitTS uint64 , elems []* Elem [OP ], readKeys [][] byte ) (* CoordinateResponse , error ) {
197197 grouped , gids , err := c .groupMutations (elems )
198198 if err != nil {
199199 return nil , err
@@ -212,7 +212,7 @@ func (c *ShardedCoordinator) dispatchTxn(startTS uint64, commitTS uint64, elems
212212 return c .dispatchSingleShardTxn (startTS , commitTS , primaryKey , gids [0 ], elems )
213213 }
214214
215- prepared , err := c .prewriteTxn (startTS , commitTS , primaryKey , grouped , gids )
215+ prepared , err := c .prewriteTxn (ctx , startTS , commitTS , primaryKey , grouped , gids , readKeys )
216216 if err != nil {
217217 return nil , err
218218 }
@@ -246,8 +246,9 @@ func (c *ShardedCoordinator) dispatchSingleShardTxn(startTS, commitTS uint64, pr
246246 if err != nil {
247247 return nil , err
248248 }
249+ // Single-shard: read-set validated pre-Raft by the adapter.
249250 resp , err := g .Txn .Commit ([]* pb.Request {
250- onePhaseTxnRequest (startTS , commitTS , primaryKey , elems ),
251+ onePhaseTxnRequest (startTS , commitTS , primaryKey , elems , nil ),
251252 })
252253 if err != nil {
253254 return nil , errors .WithStack (err )
@@ -263,10 +264,12 @@ type preparedGroup struct {
263264 keys []* pb.Mutation
264265}
265266
266- func (c * ShardedCoordinator ) prewriteTxn (startTS , commitTS uint64 , primaryKey []byte , grouped map [uint64 ][]* pb.Mutation , gids []uint64 ) ([]preparedGroup , error ) {
267+ func (c * ShardedCoordinator ) prewriteTxn (ctx context. Context , startTS , commitTS uint64 , primaryKey []byte , grouped map [uint64 ][]* pb.Mutation , gids []uint64 , readKeys [][] byte ) ([]preparedGroup , error ) {
267268 prepareMeta := txnMetaMutation (primaryKey , defaultTxnLockTTLms , 0 )
268269 prepared := make ([]preparedGroup , 0 , len (gids ))
269270
271+ groupedReadKeys := c .groupReadKeysByShardID (readKeys )
272+
270273 for _ , gid := range gids {
271274 g , err := c .txnGroupForID (gid )
272275 if err != nil {
@@ -277,6 +280,7 @@ func (c *ShardedCoordinator) prewriteTxn(startTS, commitTS uint64, primaryKey []
277280 Phase : pb .Phase_PREPARE ,
278281 Ts : startTS ,
279282 Mutations : append ([]* pb.Mutation {prepareMeta }, grouped [gid ]... ),
283+ ReadKeys : groupedReadKeys [gid ],
280284 }
281285 if _ , err := g .Txn .Commit ([]* pb.Request {req }); err != nil {
282286 c .abortPreparedTxn (startTS , primaryKey , prepared , abortTSFrom (startTS , commitTS ))
@@ -285,6 +289,14 @@ func (c *ShardedCoordinator) prewriteTxn(startTS, commitTS uint64, primaryKey []
285289 prepared = append (prepared , preparedGroup {gid : gid , keys : keyMutations (grouped [gid ])})
286290 }
287291
292+ // Validate read keys on read-only shards (shards that have read keys
293+ // but no mutations in this transaction). Without this, a concurrent
294+ // write to a read-only shard would go undetected.
295+ if err := c .validateReadOnlyShards (ctx , groupedReadKeys , gids , startTS ); err != nil {
296+ c .abortPreparedTxn (startTS , primaryKey , prepared , abortTSFrom (startTS , commitTS ))
297+ return nil , err
298+ }
299+
288300 return prepared , nil
289301}
290302
@@ -586,6 +598,81 @@ func (c *ShardedCoordinator) engineGroupIDForKey(key []byte) uint64 {
586598 return route .GroupID
587599}
588600
601+ func (c * ShardedCoordinator ) groupReadKeysByShardID (readKeys [][]byte ) map [uint64 ][][]byte {
602+ if len (readKeys ) == 0 {
603+ return nil
604+ }
605+ grouped := make (map [uint64 ][][]byte )
606+ for _ , key := range readKeys {
607+ gid := c .engineGroupIDForKey (key )
608+ if gid == 0 {
609+ continue
610+ }
611+ grouped [gid ] = append (grouped [gid ], key )
612+ }
613+ return grouped
614+ }
615+
616+ // validateReadOnlyShards checks read-write conflicts on shards that have
617+ // read keys but no mutations in this transaction. writeGIDs is the set of
618+ // shards that already received a PREPARE with their readKeys attached.
619+ //
620+ // Because these shards have no mutations, we cannot send a PREPARE request
621+ // (the FSM rejects empty mutation lists). Instead we issue a linearizable
622+ // read barrier on each read-only shard's Raft group (ensuring the local
623+ // FSM has applied all committed log entries) and then check LatestCommitTS
624+ // against the local store.
625+ //
626+ // NOTE: This check is performed outside the FSM's applyMu lock, so there
627+ // is a small TOCTOU window between the linearizable read barrier and the
628+ // LatestCommitTS check. A concurrent write that commits in this window may
629+ // go undetected. Full SSI for read-only shards in multi-shard transactions
630+ // would require a dedicated "read-validate" FSM request phase. For
631+ // single-shard transactions and write-shard read keys, validation is fully
632+ // atomic under applyMu.
633+ func (c * ShardedCoordinator ) validateReadOnlyShards (ctx context.Context , groupedReadKeys map [uint64 ][][]byte , writeGIDs []uint64 , startTS uint64 ) error {
634+ if len (groupedReadKeys ) == 0 {
635+ return nil
636+ }
637+ writeSet := make (map [uint64 ]struct {}, len (writeGIDs ))
638+ for _ , gid := range writeGIDs {
639+ writeSet [gid ] = struct {}{}
640+ }
641+ for gid , keys := range groupedReadKeys {
642+ if _ , isWrite := writeSet [gid ]; isWrite {
643+ continue
644+ }
645+ if err := c .validateReadKeysOnShard (ctx , gid , keys , startTS ); err != nil {
646+ return err
647+ }
648+ }
649+ return nil
650+ }
651+
652+ func (c * ShardedCoordinator ) validateReadKeysOnShard (ctx context.Context , gid uint64 , keys [][]byte , startTS uint64 ) error {
653+ g , ok := c .groups [gid ]
654+ if ! ok {
655+ return nil
656+ }
657+ // Linearizable read barrier: wait until the shard's FSM has applied
658+ // all Raft-committed entries so LatestCommitTS reflects the latest
659+ // committed state. Without this, a concurrent write that is committed
660+ // in Raft but not yet applied locally would be invisible.
661+ if _ , err := linearizableReadEngineCtx (ctx , engineForGroup (g )); err != nil {
662+ return errors .WithStack (err )
663+ }
664+ for _ , key := range keys {
665+ ts , exists , err := g .Store .LatestCommitTS (ctx , key )
666+ if err != nil {
667+ return errors .WithStack (err )
668+ }
669+ if exists && ts > startTS {
670+ return errors .WithStack (store .NewWriteConflictError (key ))
671+ }
672+ }
673+ return nil
674+ }
675+
589676var _ Coordinator = (* ShardedCoordinator )(nil )
590677
591678func validateOperationGroup (reqs * OperationGroup [OP ]) error {
0 commit comments