Skip to content

Commit ef9b810

Browse files
committed
Fix WriteSet use-after-recycle in sequence reads
Sequence read completion recycled the WriteSet before registering slow bookies. When a speculative read completed the entry, the subclass could still access the recycled WriteSet. Late error callbacks could also enter reattempt logic and call indexOf() after the request had already completed. Snapshot slow bookie addresses before delegating to the shared complete path, and ignore late errors after sequence read requests have completed. Apply the same guard to ReadLAC sequence reads, where orderedEnsemble is also recycled on completion. Add regression coverage for normal sequence reads and ReadLAC covering both completion and late-error paths.
1 parent 34ec6a4 commit ef9b810

3 files changed

Lines changed: 432 additions & 14 deletions

File tree

bookkeeper-server/src/main/java/org/apache/bookkeeper/client/PendingReadOp.java

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,12 @@ synchronized BookieId sendNextRead() {
386386

387387
@Override
388388
synchronized void logErrorAndReattemptRead(int bookieIndex, BookieId host, String errMsg, int rc) {
389+
// A late error response from a bookie whose request was already superseded by
390+
// a faster (e.g. speculative) read must not flow through the reattempt logic:
391+
// writeSet has been recycled at the moment this entry transitioned to complete.
392+
if (isComplete()) {
393+
return;
394+
}
389395
super.logErrorAndReattemptRead(bookieIndex, host, errMsg, rc);
390396

391397
int replica = writeSet.indexOf(bookieIndex);
@@ -412,15 +418,29 @@ synchronized void logErrorAndReattemptRead(int bookieIndex, BookieId host, Strin
412418

413419
@Override
414420
boolean complete(int bookieIndex, BookieId host, ByteBuf buffer) {
421+
if (isComplete()) {
422+
return false;
423+
}
424+
// Common case: the very first replica responded successfully; no
425+
// speculative retry happened, so there are no slow bookies to mark.
426+
// Skip the snapshot allocation entirely.
427+
final int numReplicasTried = getNextReplicaIndexToReadFrom();
428+
if (numReplicasTried <= 1) {
429+
return super.complete(bookieIndex, host, buffer);
430+
}
431+
// Speculative retry happened: snapshot the addresses of the replicas tried
432+
// before this one BEFORE calling super.complete(), which recycles writeSet
433+
// (see issue #4680). The WriteSet keeps its normal pooled lifecycle.
434+
final BookieId[] slowBookies = new BookieId[numReplicasTried - 1];
435+
for (int i = 0; i < slowBookies.length; i++) {
436+
slowBookies[i] = ensemble.get(writeSet.get(i));
437+
}
438+
415439
boolean completed = super.complete(bookieIndex, host, buffer);
416440
if (completed) {
417-
int numReplicasTried = getNextReplicaIndexToReadFrom();
418-
// Check if any speculative reads were issued and mark any slow bookies before
419-
// the first successful speculative read as "slow"
420-
for (int i = 0; i < numReplicasTried - 1; i++) {
421-
int slowBookieIndex = writeSet.get(i);
422-
BookieId slowBookieSocketAddress = ensemble.get(slowBookieIndex);
423-
clientCtx.getPlacementPolicy().registerSlowBookie(slowBookieSocketAddress, eId);
441+
// Mark replicas tried before the first successful response as "slow".
442+
for (BookieId slowBookie : slowBookies) {
443+
clientCtx.getPlacementPolicy().registerSlowBookie(slowBookie, eId);
424444
}
425445
}
426446
return completed;

bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ReadLastConfirmedAndEntryOp.java

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,12 @@ private synchronized void translateAndSetFirstError(int rc) {
189189
* read result code
190190
*/
191191
synchronized void logErrorAndReattemptRead(int bookieIndex, BookieId host, String errMsg, int rc) {
192+
// Late error response after this request was already completed (e.g. by a
193+
// faster bookie's success): writeSet/orderedEnsemble have been recycled, so
194+
// do not enter logic that would access them.
195+
if (isComplete()) {
196+
return;
197+
}
192198
translateAndSetFirstError(rc);
193199

194200
if (BKException.Code.NoSuchEntryException == rc || BKException.Code.NoSuchLedgerExistsException == rc) {
@@ -398,6 +404,9 @@ synchronized BookieId sendNextRead() {
398404

399405
@Override
400406
synchronized void logErrorAndReattemptRead(int bookieIndex, BookieId host, String errMsg, int rc) {
407+
if (isComplete()) {
408+
return;
409+
}
401410
super.logErrorAndReattemptRead(bookieIndex, host, errMsg, rc);
402411

403412
int replica = getReplicaIndex(bookieIndex);
@@ -422,15 +431,22 @@ synchronized void logErrorAndReattemptRead(int bookieIndex, BookieId host, Strin
422431

423432
@Override
424433
boolean complete(int bookieIndex, BookieId host, ByteBuf buffer, long entryId) {
434+
if (isComplete()) {
435+
return false;
436+
}
437+
// Snapshot the slow-bookie addresses BEFORE super.complete() recycles
438+
// orderedEnsemble (see issue #4680). The loop runs for every replica tried
439+
// (including the successful one), so numReplicasTried is at least 1 here.
440+
final int numReplicasTried = getNextReplicaIndexToReadFrom();
441+
final BookieId[] slowBookies = new BookieId[numReplicasTried];
442+
for (int i = 0; i < slowBookies.length; i++) {
443+
slowBookies[i] = ensemble.get(orderedEnsemble.get(i));
444+
}
445+
425446
boolean completed = super.complete(bookieIndex, host, buffer, entryId);
426447
if (completed) {
427-
int numReplicasTried = getNextReplicaIndexToReadFrom();
428-
// Check if any speculative reads were issued and mark any bookies before the
429-
// first speculative read as slow
430-
for (int i = 0; i < numReplicasTried; i++) {
431-
int slowBookieIndex = orderedEnsemble.get(i);
432-
BookieId slowBookieSocketAddress = ensemble.get(slowBookieIndex);
433-
clientCtx.getPlacementPolicy().registerSlowBookie(slowBookieSocketAddress, entryId);
448+
for (BookieId slowBookie : slowBookies) {
449+
clientCtx.getPlacementPolicy().registerSlowBookie(slowBookie, entryId);
434450
}
435451
}
436452
return completed;

0 commit comments

Comments
 (0)