Skip to content

Commit d0b848b

Browse files
committed
ZOOKEEPER-5010: purge orphaned ephemerals after DIFF sync
1 parent 5127900 commit d0b848b

File tree

2 files changed

+80
-0
lines changed

2 files changed

+80
-0
lines changed

zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/Learner.java

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import java.nio.ByteBuffer;
3232
import java.util.ArrayDeque;
3333
import java.util.Deque;
34+
import java.util.HashSet;
3435
import java.util.Map;
3536
import java.util.Map.Entry;
3637
import java.util.Set;
@@ -54,7 +55,9 @@
5455
import org.apache.zookeeper.server.Request;
5556
import org.apache.zookeeper.server.ServerCnxn;
5657
import org.apache.zookeeper.server.ServerMetrics;
58+
import org.apache.zookeeper.server.SessionTracker;
5759
import org.apache.zookeeper.server.TxnLogEntry;
60+
import org.apache.zookeeper.server.ZKDatabase;
5861
import org.apache.zookeeper.server.ZooTrace;
5962
import org.apache.zookeeper.server.quorum.QuorumPeer.QuorumServer;
6063
import org.apache.zookeeper.server.quorum.flexible.QuorumVerifier;
@@ -558,6 +561,7 @@ protected void syncWithLeader(long newLeaderZxid) throws Exception {
558561
boolean snapshotNeeded = true;
559562
boolean syncSnapshot = false;
560563
readPacket(qp);
564+
boolean diffSync = qp.getType() == Leader.DIFF;
561565
Deque<Long> packetsCommitted = new ArrayDeque<>();
562566
Deque<PacketInFlight> packetsNotCommitted = new ArrayDeque<>();
563567
Deque<Request> requestsToAck = new ArrayDeque<>();
@@ -613,6 +617,10 @@ protected void syncWithLeader(long newLeaderZxid) throws Exception {
613617
}
614618
zk.getZKDatabase().initConfigInZKDatabase(self.getQuorumVerifier());
615619
zk.createSessionTracker();
620+
// DIFF keeps the local tree; clear ephemerals without sessions before applying new transactions.
621+
if (diffSync) {
622+
purgeOrphanedEphemerals();
623+
}
616624

617625
long lastQueued = 0;
618626

@@ -845,6 +853,43 @@ protected void syncWithLeader(long newLeaderZxid) throws Exception {
845853
// New server type need to handle in-flight packets
846854
throw new UnsupportedOperationException("Unknown server type");
847855
}
856+
857+
}
858+
859+
void purgeOrphanedEphemerals() {
860+
if (zk == null) {
861+
return;
862+
}
863+
SessionTracker sessionTracker = zk.getSessionTracker();
864+
if (sessionTracker == null) {
865+
return;
866+
}
867+
ZKDatabase zkDatabase = zk.getZKDatabase();
868+
if (zkDatabase == null) {
869+
return;
870+
}
871+
872+
Set<Long> globalSessions = sessionTracker.globalSessions();
873+
Set<Long> localSessions = sessionTracker.localSessions();
874+
Set<Long> sessionsWithEphemerals = new HashSet<>(zkDatabase.getSessions());
875+
if (sessionsWithEphemerals.isEmpty()) {
876+
return;
877+
}
878+
879+
long zxid = zkDatabase.getDataTreeLastProcessedZxid();
880+
for (Long sessionId : sessionsWithEphemerals) {
881+
if (globalSessions.contains(sessionId)
882+
|| localSessions.contains(sessionId)
883+
|| (sessionTracker instanceof UpgradeableSessionTracker
884+
&& ((UpgradeableSessionTracker) sessionTracker).isUpgradingSession(sessionId))) {
885+
continue;
886+
}
887+
LOG.warn(
888+
"Removing ephemeral nodes for unknown session 0x{} after DIFF sync",
889+
Long.toHexString(sessionId));
890+
zkDatabase.killSession(sessionId, zxid);
891+
sessionTracker.removeSession(sessionId);
892+
}
848893
}
849894

850895
protected void revalidate(QuorumPacket qp) throws IOException {

zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/LearnerTest.java

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
import static org.hamcrest.CoreMatchers.is;
2525
import static org.hamcrest.MatcherAssert.assertThat;
2626
import static org.junit.jupiter.api.Assertions.assertEquals;
27+
import static org.junit.jupiter.api.Assertions.assertNotNull;
28+
import static org.junit.jupiter.api.Assertions.assertNull;
2729
import static org.junit.jupiter.api.Assertions.assertThrows;
2830
import static org.junit.jupiter.api.Assertions.assertTrue;
2931
import static org.junit.jupiter.api.Assertions.fail;
@@ -317,6 +319,39 @@ public void syncTest() throws Exception {
317319
}
318320
}
319321

322+
@Test
323+
public void testPurgeOrphanedEphemerals() throws Exception {
324+
File tmpFile = File.createTempFile("test", ".dir", testData);
325+
tmpFile.delete();
326+
SimpleLearner sl = null;
327+
try {
328+
FileTxnSnapLog ftsl = new FileTxnSnapLog(tmpFile, tmpFile);
329+
sl = new SimpleLearner(ftsl);
330+
331+
long sessionId = 0x1234L;
332+
TxnHeader hdr = new TxnHeader(sessionId, 1, 1L, 1L, ZooDefs.OpCode.create);
333+
CreateTxn txn = new CreateTxn(
334+
"/eph",
335+
new byte[0],
336+
new ArrayList<ACL>(),
337+
true,
338+
sl.zk.getZKDatabase().getNode("/").stat.getCversion());
339+
sl.zk.getZKDatabase().processTxn(hdr, txn, null);
340+
341+
assertNotNull(sl.zk.getZKDatabase().getNode("/eph"), "Ephemeral node should exist before cleanup");
342+
343+
sl.zk.startupWithoutServing();
344+
sl.purgeOrphanedEphemerals();
345+
346+
assertNull(sl.zk.getZKDatabase().getNode("/eph"), "Ephemeral node should be removed for unknown session");
347+
} finally {
348+
if (sl != null) {
349+
sl.zk.shutdown();
350+
}
351+
TestUtils.deleteFileRecursively(tmpFile);
352+
}
353+
}
354+
320355
@Test
321356
public void truncFailTest() throws Exception {
322357
final boolean[] exitProcCalled = {false};

0 commit comments

Comments
 (0)