From 5b1bbe89c7283bdcd2c73d0b73a08ebc15746f8c Mon Sep 17 00:00:00 2001 From: ashishk Date: Fri, 31 Oct 2025 12:53:00 +0530 Subject: [PATCH 1/7] HDDS-13846. Handle sequenceid synch during close container in SCM. --- .../scm/container/ContainerStateManager.java | 15 +++++++- .../container/ContainerStateManagerImpl.java | 37 ++++++++++++++++--- 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManager.java index 0194e65fe00a..6f974dc748d8 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManager.java @@ -160,13 +160,24 @@ void addContainer(ContainerInfoProto containerInfo) throws IOException; /** - * + * Updates container state. This method delegates to updateContainerStateWithSequenceId + * for HA consistency. The @Replicate annotation is removed to avoid double replication. */ - @Replicate void updateContainerState(HddsProtos.ContainerID id, HddsProtos.LifeCycleEvent event) throws IOException, InvalidStateTransitionException; + /** + * Updates container state with sequenceId synchronization for HA consistency. + * This method ensures that all SCM nodes have the same sequenceId when + * state transitions occur. + */ + @Replicate + void updateContainerStateWithSequenceId(HddsProtos.ContainerID id, + HddsProtos.LifeCycleEvent event, + Long sequenceId) + throws IOException, InvalidStateTransitionException; + /** * Bypasses the container state machine to change a container's state from DELETING or DELETED to CLOSED. This API was diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java index 4b4578894a61..ef3c139cc961 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java @@ -356,26 +356,53 @@ public boolean contains(ContainerID id) { public void updateContainerState(final HddsProtos.ContainerID containerID, final LifeCycleEvent event) throws IOException, InvalidStateTransitionException { + final ContainerID id = ContainerID.getFromProtobuf(containerID); + try (AutoCloseableLock ignored = readLock(id)) { + if (containers.contains(id)) { + final ContainerInfo containerInfo = containers.getContainerInfo(id); + Long currentSequenceId = containerInfo.getSequenceId(); + // Delegate to @Replicate method with current sequenceId + updateContainerStateWithSequenceId(containerID, event, currentSequenceId); + } + } + } + + @Override + public void updateContainerStateWithSequenceId(final HddsProtos.ContainerID containerID, + final LifeCycleEvent event, + final Long sequenceId) + throws IOException, InvalidStateTransitionException { // TODO: Remove the protobuf conversion after fixing ContainerStateMap. final ContainerID id = ContainerID.getFromProtobuf(containerID); try (AutoCloseableLock ignored = writeLock(id)) { if (containers.contains(id)) { - final ContainerInfo oldInfo = containers.getContainerInfo(id); - final LifeCycleState oldState = oldInfo.getState(); + final ContainerInfo containerInfo = containers.getContainerInfo(id); + + // Synchronize sequenceId first + LOG.info("Testing Sequence id change, SeqID from raft: {}, SeqId from container{}", + sequenceId, containerInfo.getSequenceId()); + if (containerInfo.getSequenceId() < sequenceId) { + containerInfo.updateSequenceId(sequenceId); + } + + final LifeCycleState oldState = containerInfo.getState(); final LifeCycleState newState = stateMachine.getNextState( - oldInfo.getState(), event); + containerInfo.getState(), event); if (newState.getNumber() > oldState.getNumber()) { ExecutionUtil.create(() -> { containers.updateState(id, oldState, newState); transactionBuffer.addToBuffer(containerStore, id, containers.getContainerInfo(id)); }).onException(() -> { - transactionBuffer.addToBuffer(containerStore, id, oldInfo); + // Get current info instead of stale oldInfo + ContainerInfo currentInfo = containers.getContainerInfo(id); + currentInfo.setState(oldState); // Revert only the state + transactionBuffer.addToBuffer(containerStore, id, currentInfo); containers.updateState(id, newState, oldState); }).execute(); containerStateChangeActions.getOrDefault(event, info -> { }) - .accept(oldInfo); + .accept(containerInfo); } } } From 1cf4cbc746d182a856b551b0e7412da8fa0b87cd Mon Sep 17 00:00:00 2001 From: ashishk Date: Fri, 31 Oct 2025 12:58:17 +0530 Subject: [PATCH 2/7] HDDS-13846. Remove unwanted logs. --- .../hadoop/hdds/scm/container/ContainerStateManagerImpl.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java index ef3c139cc961..7de317d92e7e 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java @@ -380,8 +380,6 @@ public void updateContainerStateWithSequenceId(final HddsProtos.ContainerID cont final ContainerInfo containerInfo = containers.getContainerInfo(id); // Synchronize sequenceId first - LOG.info("Testing Sequence id change, SeqID from raft: {}, SeqId from container{}", - sequenceId, containerInfo.getSequenceId()); if (containerInfo.getSequenceId() < sequenceId) { containerInfo.updateSequenceId(sequenceId); } From b48bed8489c82d5a078e32c58df8cdb7237c8f4f Mon Sep 17 00:00:00 2001 From: ashishk Date: Mon, 24 Nov 2025 13:00:21 +0530 Subject: [PATCH 3/7] Fix review comments --- .../scm/container/ContainerManagerImpl.java | 6 ++++- .../scm/container/ContainerStateManager.java | 8 ------- .../container/ContainerStateManagerImpl.java | 15 ------------ .../container/TestContainerReportHandler.java | 4 ++-- ...TestIncrementalContainerReportHandler.java | 4 ++-- .../scm/storage/TestContainerCommandsEC.java | 24 +++++++++---------- 6 files changed, 21 insertions(+), 40 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java index d255bc9a672d..c93383e40db6 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java @@ -281,7 +281,11 @@ public void updateContainerState(final ContainerID cid, lock.lock(); try { if (containerExist(cid)) { - containerStateManager.updateContainerState(protoId, event); + final ContainerInfo info = containerStateManager.getContainer(cid); + if (info != null) { + // Delegate to @Replicate method with current sequenceId + containerStateManager.updateContainerStateWithSequenceId(protoId, event, info.getSequenceId()); + } } else { throw new ContainerNotFoundException(cid); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManager.java index 6f974dc748d8..f5a2334b7cd2 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManager.java @@ -159,14 +159,6 @@ public interface ContainerStateManager { void addContainer(ContainerInfoProto containerInfo) throws IOException; - /** - * Updates container state. This method delegates to updateContainerStateWithSequenceId - * for HA consistency. The @Replicate annotation is removed to avoid double replication. - */ - void updateContainerState(HddsProtos.ContainerID id, - HddsProtos.LifeCycleEvent event) - throws IOException, InvalidStateTransitionException; - /** * Updates container state with sequenceId synchronization for HA consistency. * This method ensures that all SCM nodes have the same sequenceId when diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java index 7de317d92e7e..8d641f103d60 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java @@ -352,21 +352,6 @@ public boolean contains(ContainerID id) { } } - @Override - public void updateContainerState(final HddsProtos.ContainerID containerID, - final LifeCycleEvent event) - throws IOException, InvalidStateTransitionException { - final ContainerID id = ContainerID.getFromProtobuf(containerID); - try (AutoCloseableLock ignored = readLock(id)) { - if (containers.contains(id)) { - final ContainerInfo containerInfo = containers.getContainerInfo(id); - Long currentSequenceId = containerInfo.getSequenceId(); - // Delegate to @Replicate method with current sequenceId - updateContainerStateWithSequenceId(containerID, event, currentSequenceId); - } - } - } - @Override public void updateContainerStateWithSequenceId(final HddsProtos.ContainerID containerID, final LifeCycleEvent event, diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerReportHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerReportHandler.java index 61001b9d38fc..8e5d9b161a4d 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerReportHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerReportHandler.java @@ -132,9 +132,9 @@ void setup() throws IOException, InvalidStateTransitionException { doAnswer(invocation -> { containerStateManager - .updateContainerState(((ContainerID)invocation + .updateContainerStateWithSequenceId(((ContainerID)invocation .getArguments()[0]).getProtobuf(), - (HddsProtos.LifeCycleEvent)invocation.getArguments()[1]); + (HddsProtos.LifeCycleEvent)invocation.getArguments()[1], 0L); return null; }).when(containerManager).updateContainerState( any(ContainerID.class), diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestIncrementalContainerReportHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestIncrementalContainerReportHandler.java index d9ecaba4935e..188783b95903 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestIncrementalContainerReportHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestIncrementalContainerReportHandler.java @@ -165,9 +165,9 @@ public void setup() throws IOException, InvalidStateTransitionException, doAnswer(invocation -> { containerStateManager - .updateContainerState(((ContainerID)invocation + .updateContainerStateWithSequenceId(((ContainerID)invocation .getArguments()[0]).getProtobuf(), - (HddsProtos.LifeCycleEvent)invocation.getArguments()[1]); + (HddsProtos.LifeCycleEvent)invocation.getArguments()[1], 0L); return null; }).when(containerManager).updateContainerState( any(ContainerID.class), diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java index 7e6047744707..7ce4f9319db2 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java @@ -465,11 +465,11 @@ public void testCreateRecoveryContainer() throws Exception { // To create the actual situation, container would have been in closed // state at SCM. scm.getContainerManager().getContainerStateManager() - .updateContainerState(container.containerID().getProtobuf(), - HddsProtos.LifeCycleEvent.FINALIZE); + .updateContainerStateWithSequenceId(container.containerID().getProtobuf(), + HddsProtos.LifeCycleEvent.FINALIZE, 0L); scm.getContainerManager().getContainerStateManager() - .updateContainerState(container.containerID().getProtobuf(), - HddsProtos.LifeCycleEvent.CLOSE); + .updateContainerStateWithSequenceId(container.containerID().getProtobuf(), + HddsProtos.LifeCycleEvent.CLOSE, 0L); //Create the recovering container in DN. String encodedToken = cToken.encodeToUrlString(); @@ -555,11 +555,11 @@ public void testCreateRecoveryContainerAfterDNRestart() throws Exception { // To create the actual situation, container would have been in closed // state at SCM. scm.getContainerManager().getContainerStateManager() - .updateContainerState(container.containerID().getProtobuf(), - HddsProtos.LifeCycleEvent.FINALIZE); + .updateContainerStateWithSequenceId(container.containerID().getProtobuf(), + HddsProtos.LifeCycleEvent.FINALIZE, 0L); scm.getContainerManager().getContainerStateManager() - .updateContainerState(container.containerID().getProtobuf(), - HddsProtos.LifeCycleEvent.CLOSE); + .updateContainerStateWithSequenceId(container.containerID().getProtobuf(), + HddsProtos.LifeCycleEvent.CLOSE, 0L); //Create the recovering container in target DN. String encodedToken = cToken.encodeToUrlString(); @@ -936,12 +936,12 @@ public void testECReconstructionCoordinatorShouldCleanupContainersOnFailure() private void closeContainer(long conID) throws IOException, InvalidStateTransitionException { //Close the container first. - scm.getContainerManager().getContainerStateManager().updateContainerState( + scm.getContainerManager().getContainerStateManager().updateContainerStateWithSequenceId( HddsProtos.ContainerID.newBuilder().setId(conID).build(), - HddsProtos.LifeCycleEvent.FINALIZE); - scm.getContainerManager().getContainerStateManager().updateContainerState( + HddsProtos.LifeCycleEvent.FINALIZE, 0L); + scm.getContainerManager().getContainerStateManager().updateContainerStateWithSequenceId( HddsProtos.ContainerID.newBuilder().setId(conID).build(), - HddsProtos.LifeCycleEvent.CLOSE); + HddsProtos.LifeCycleEvent.CLOSE, 0L); } private void checkBlockDataWithRetry( From 01dce42a0f6e12d148e15d820f978a22539baa52 Mon Sep 17 00:00:00 2001 From: ashishk Date: Tue, 9 Dec 2025 14:50:14 +0530 Subject: [PATCH 4/7] Fix review comments --- .../hadoop/hdds/scm/container/ContainerStateManagerImpl.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java index 8d641f103d60..225f0bbc4d1c 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java @@ -367,11 +367,14 @@ public void updateContainerStateWithSequenceId(final HddsProtos.ContainerID cont // Synchronize sequenceId first if (containerInfo.getSequenceId() < sequenceId) { containerInfo.updateSequenceId(sequenceId); + } else { + LOG.warn("Container sequenceId is {} greater than the leader container sequenceId {}", + containerInfo.getSequenceId(), sequenceId); } final LifeCycleState oldState = containerInfo.getState(); final LifeCycleState newState = stateMachine.getNextState( - containerInfo.getState(), event); + oldState, event); if (newState.getNumber() > oldState.getNumber()) { ExecutionUtil.create(() -> { containers.updateState(id, oldState, newState); From 7f7f8a55dc6bee15d6a9a96f34d56da93f1a151a Mon Sep 17 00:00:00 2001 From: ashishk Date: Tue, 9 Dec 2025 16:27:45 +0530 Subject: [PATCH 5/7] Add test case --- .../container/TestContainerStateManager.java | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManager.java index 0f5b3b6adcd5..51c459f5575b 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManager.java @@ -32,6 +32,7 @@ import java.util.Set; import java.util.concurrent.TimeoutException; import org.apache.hadoop.hdds.HddsConfigKeys; +import org.apache.hadoop.hdds.client.RatisReplicationConfig; import org.apache.hadoop.hdds.client.StandaloneReplicationConfig; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; @@ -196,6 +197,46 @@ public void testTransitionContainerToClosedStateAllowOnlyDeletingOrDeletedContai } } + @Test + public void testSequenceIdOnStateUpdate() throws Exception { + ContainerID containerID = ContainerID.valueOf(3L); + long sequenceId = 100L; + + ContainerInfo containerInfo = new ContainerInfo.Builder() + .setContainerID(containerID.getId()) + .setState(HddsProtos.LifeCycleState.OPEN) + .setSequenceId(sequenceId) + .setOwner("scm") + .setPipelineID(PipelineID.randomId()) + .setReplicationConfig( + RatisReplicationConfig + .getInstance(ReplicationFactor.THREE)) + .build(); + + containerStateManager.addContainer(containerInfo.getProtobuf()); + + // Try to update with a higher sequenceId + containerStateManager.updateContainerStateWithSequenceId( + containerID.getProtobuf(), + HddsProtos.LifeCycleEvent.FINALIZE, + sequenceId + 1); + + ContainerInfo afterFirst = containerStateManager.getContainer(containerID); + long currentSequenceId = afterFirst.getSequenceId(); + // Sequence id should be updated with latest sequence id + assertEquals(sequenceId + 1, currentSequenceId); + + // Try updating with older sequenceId + containerStateManager.updateContainerStateWithSequenceId( + containerID.getProtobuf(), + HddsProtos.LifeCycleEvent.CLOSE, + sequenceId - 10); // Older sequenceId + + // Assert - SequenceId should not change + ContainerInfo finalInfo = containerStateManager.getContainer(containerID); + assertEquals(finalInfo.getSequenceId(), currentSequenceId); + } + private void addReplica(ContainerInfo cont, DatanodeDetails node) { ContainerReplica replica = ContainerReplica.newBuilder() .setContainerID(cont.containerID()) From ef87b2310f1867114fa2c31f0ac1f4524eb75cb3 Mon Sep 17 00:00:00 2001 From: ashishk Date: Mon, 15 Dec 2025 14:27:55 +0530 Subject: [PATCH 6/7] Fix logging issue --- .../hadoop/hdds/scm/container/ContainerStateManagerImpl.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java index 225f0bbc4d1c..0c0f687a459d 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java @@ -367,7 +367,7 @@ public void updateContainerStateWithSequenceId(final HddsProtos.ContainerID cont // Synchronize sequenceId first if (containerInfo.getSequenceId() < sequenceId) { containerInfo.updateSequenceId(sequenceId); - } else { + } else if (containerInfo.getSequenceId() > sequenceId) { LOG.warn("Container sequenceId is {} greater than the leader container sequenceId {}", containerInfo.getSequenceId(), sequenceId); } From 33db9bc246c8572319e9f306da6933aecc2d2993 Mon Sep 17 00:00:00 2001 From: ashishk Date: Fri, 2 Jan 2026 13:31:03 +0530 Subject: [PATCH 7/7] Fix comment --- .../hadoop/hdds/scm/container/ContainerStateManagerImpl.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java index 0c0f687a459d..d971b19c406c 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java @@ -381,11 +381,10 @@ public void updateContainerStateWithSequenceId(final HddsProtos.ContainerID cont transactionBuffer.addToBuffer(containerStore, id, containers.getContainerInfo(id)); }).onException(() -> { - // Get current info instead of stale oldInfo + containers.updateState(id, newState, oldState); ContainerInfo currentInfo = containers.getContainerInfo(id); - currentInfo.setState(oldState); // Revert only the state transactionBuffer.addToBuffer(containerStore, id, currentInfo); - containers.updateState(id, newState, oldState); + }).execute(); containerStateChangeActions.getOrDefault(event, info -> { }) .accept(containerInfo);