Skip to content

Commit b104f09

Browse files
HDDS-13846. Handle sequenceid synch during close container in SCM. (#9228)
1 parent 58a9fb8 commit b104f09

7 files changed

Lines changed: 87 additions & 27 deletions

File tree

hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,11 @@ public void updateContainerState(final ContainerID cid,
281281
lock.lock();
282282
try {
283283
if (containerExist(cid)) {
284-
containerStateManager.updateContainerState(protoId, event);
284+
final ContainerInfo info = containerStateManager.getContainer(cid);
285+
if (info != null) {
286+
// Delegate to @Replicate method with current sequenceId
287+
containerStateManager.updateContainerStateWithSequenceId(protoId, event, info.getSequenceId());
288+
}
285289
} else {
286290
throw new ContainerNotFoundException(cid);
287291
}

hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManager.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -160,11 +160,14 @@ void addContainer(ContainerInfoProto containerInfo)
160160
throws IOException;
161161

162162
/**
163-
*
163+
* Updates container state with sequenceId synchronization for HA consistency.
164+
* This method ensures that all SCM nodes have the same sequenceId when
165+
* state transitions occur.
164166
*/
165167
@Replicate
166-
void updateContainerState(HddsProtos.ContainerID id,
167-
HddsProtos.LifeCycleEvent event)
168+
void updateContainerStateWithSequenceId(HddsProtos.ContainerID id,
169+
HddsProtos.LifeCycleEvent event,
170+
Long sequenceId)
168171
throws IOException, InvalidStateTransitionException;
169172

170173

hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -353,29 +353,41 @@ public boolean contains(ContainerID id) {
353353
}
354354

355355
@Override
356-
public void updateContainerState(final HddsProtos.ContainerID containerID,
357-
final LifeCycleEvent event)
356+
public void updateContainerStateWithSequenceId(final HddsProtos.ContainerID containerID,
357+
final LifeCycleEvent event,
358+
final Long sequenceId)
358359
throws IOException, InvalidStateTransitionException {
359360
// TODO: Remove the protobuf conversion after fixing ContainerStateMap.
360361
final ContainerID id = ContainerID.getFromProtobuf(containerID);
361362

362363
try (AutoCloseableLock ignored = writeLock(id)) {
363364
if (containers.contains(id)) {
364-
final ContainerInfo oldInfo = containers.getContainerInfo(id);
365-
final LifeCycleState oldState = oldInfo.getState();
365+
final ContainerInfo containerInfo = containers.getContainerInfo(id);
366+
367+
// Synchronize sequenceId first
368+
if (containerInfo.getSequenceId() < sequenceId) {
369+
containerInfo.updateSequenceId(sequenceId);
370+
} else if (containerInfo.getSequenceId() > sequenceId) {
371+
LOG.warn("Container sequenceId is {} greater than the leader container sequenceId {}",
372+
containerInfo.getSequenceId(), sequenceId);
373+
}
374+
375+
final LifeCycleState oldState = containerInfo.getState();
366376
final LifeCycleState newState = stateMachine.getNextState(
367-
oldInfo.getState(), event);
377+
oldState, event);
368378
if (newState.getNumber() > oldState.getNumber()) {
369379
ExecutionUtil.create(() -> {
370380
containers.updateState(id, oldState, newState);
371381
transactionBuffer.addToBuffer(containerStore, id,
372382
containers.getContainerInfo(id));
373383
}).onException(() -> {
374-
transactionBuffer.addToBuffer(containerStore, id, oldInfo);
375384
containers.updateState(id, newState, oldState);
385+
ContainerInfo currentInfo = containers.getContainerInfo(id);
386+
transactionBuffer.addToBuffer(containerStore, id, currentInfo);
387+
376388
}).execute();
377389
containerStateChangeActions.getOrDefault(event, info -> { })
378-
.accept(oldInfo);
390+
.accept(containerInfo);
379391
}
380392
}
381393
}

hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerReportHandler.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,9 +132,9 @@ void setup() throws IOException, InvalidStateTransitionException {
132132

133133
doAnswer(invocation -> {
134134
containerStateManager
135-
.updateContainerState(((ContainerID)invocation
135+
.updateContainerStateWithSequenceId(((ContainerID)invocation
136136
.getArguments()[0]).getProtobuf(),
137-
(HddsProtos.LifeCycleEvent)invocation.getArguments()[1]);
137+
(HddsProtos.LifeCycleEvent)invocation.getArguments()[1], 0L);
138138
return null;
139139
}).when(containerManager).updateContainerState(
140140
any(ContainerID.class),

hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManager.java

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import java.util.Set;
3333
import java.util.concurrent.TimeoutException;
3434
import org.apache.hadoop.hdds.HddsConfigKeys;
35+
import org.apache.hadoop.hdds.client.RatisReplicationConfig;
3536
import org.apache.hadoop.hdds.client.StandaloneReplicationConfig;
3637
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
3738
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
@@ -196,6 +197,46 @@ public void testTransitionContainerToClosedStateAllowOnlyDeletingOrDeletedContai
196197
}
197198
}
198199

200+
@Test
201+
public void testSequenceIdOnStateUpdate() throws Exception {
202+
ContainerID containerID = ContainerID.valueOf(3L);
203+
long sequenceId = 100L;
204+
205+
ContainerInfo containerInfo = new ContainerInfo.Builder()
206+
.setContainerID(containerID.getId())
207+
.setState(HddsProtos.LifeCycleState.OPEN)
208+
.setSequenceId(sequenceId)
209+
.setOwner("scm")
210+
.setPipelineID(PipelineID.randomId())
211+
.setReplicationConfig(
212+
RatisReplicationConfig
213+
.getInstance(ReplicationFactor.THREE))
214+
.build();
215+
216+
containerStateManager.addContainer(containerInfo.getProtobuf());
217+
218+
// Try to update with a higher sequenceId
219+
containerStateManager.updateContainerStateWithSequenceId(
220+
containerID.getProtobuf(),
221+
HddsProtos.LifeCycleEvent.FINALIZE,
222+
sequenceId + 1);
223+
224+
ContainerInfo afterFirst = containerStateManager.getContainer(containerID);
225+
long currentSequenceId = afterFirst.getSequenceId();
226+
// Sequence id should be updated with latest sequence id
227+
assertEquals(sequenceId + 1, currentSequenceId);
228+
229+
// Try updating with older sequenceId
230+
containerStateManager.updateContainerStateWithSequenceId(
231+
containerID.getProtobuf(),
232+
HddsProtos.LifeCycleEvent.CLOSE,
233+
sequenceId - 10); // Older sequenceId
234+
235+
// Assert - SequenceId should not change
236+
ContainerInfo finalInfo = containerStateManager.getContainer(containerID);
237+
assertEquals(finalInfo.getSequenceId(), currentSequenceId);
238+
}
239+
199240
private void addReplica(ContainerInfo cont, DatanodeDetails node) {
200241
ContainerReplica replica = ContainerReplica.newBuilder()
201242
.setContainerID(cont.containerID())

hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestIncrementalContainerReportHandler.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,9 +165,9 @@ public void setup() throws IOException, InvalidStateTransitionException,
165165

166166
doAnswer(invocation -> {
167167
containerStateManager
168-
.updateContainerState(((ContainerID)invocation
168+
.updateContainerStateWithSequenceId(((ContainerID)invocation
169169
.getArguments()[0]).getProtobuf(),
170-
(HddsProtos.LifeCycleEvent)invocation.getArguments()[1]);
170+
(HddsProtos.LifeCycleEvent)invocation.getArguments()[1], 0L);
171171
return null;
172172
}).when(containerManager).updateContainerState(
173173
any(ContainerID.class),

hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -465,11 +465,11 @@ public void testCreateRecoveryContainer() throws Exception {
465465
// To create the actual situation, container would have been in closed
466466
// state at SCM.
467467
scm.getContainerManager().getContainerStateManager()
468-
.updateContainerState(container.containerID().getProtobuf(),
469-
HddsProtos.LifeCycleEvent.FINALIZE);
468+
.updateContainerStateWithSequenceId(container.containerID().getProtobuf(),
469+
HddsProtos.LifeCycleEvent.FINALIZE, 0L);
470470
scm.getContainerManager().getContainerStateManager()
471-
.updateContainerState(container.containerID().getProtobuf(),
472-
HddsProtos.LifeCycleEvent.CLOSE);
471+
.updateContainerStateWithSequenceId(container.containerID().getProtobuf(),
472+
HddsProtos.LifeCycleEvent.CLOSE, 0L);
473473

474474
//Create the recovering container in DN.
475475
String encodedToken = cToken.encodeToUrlString();
@@ -555,11 +555,11 @@ public void testCreateRecoveryContainerAfterDNRestart() throws Exception {
555555
// To create the actual situation, container would have been in closed
556556
// state at SCM.
557557
scm.getContainerManager().getContainerStateManager()
558-
.updateContainerState(container.containerID().getProtobuf(),
559-
HddsProtos.LifeCycleEvent.FINALIZE);
558+
.updateContainerStateWithSequenceId(container.containerID().getProtobuf(),
559+
HddsProtos.LifeCycleEvent.FINALIZE, 0L);
560560
scm.getContainerManager().getContainerStateManager()
561-
.updateContainerState(container.containerID().getProtobuf(),
562-
HddsProtos.LifeCycleEvent.CLOSE);
561+
.updateContainerStateWithSequenceId(container.containerID().getProtobuf(),
562+
HddsProtos.LifeCycleEvent.CLOSE, 0L);
563563

564564
//Create the recovering container in target DN.
565565
String encodedToken = cToken.encodeToUrlString();
@@ -936,12 +936,12 @@ public void testECReconstructionCoordinatorShouldCleanupContainersOnFailure()
936936
private void closeContainer(long conID)
937937
throws IOException, InvalidStateTransitionException {
938938
//Close the container first.
939-
scm.getContainerManager().getContainerStateManager().updateContainerState(
939+
scm.getContainerManager().getContainerStateManager().updateContainerStateWithSequenceId(
940940
HddsProtos.ContainerID.newBuilder().setId(conID).build(),
941-
HddsProtos.LifeCycleEvent.FINALIZE);
942-
scm.getContainerManager().getContainerStateManager().updateContainerState(
941+
HddsProtos.LifeCycleEvent.FINALIZE, 0L);
942+
scm.getContainerManager().getContainerStateManager().updateContainerStateWithSequenceId(
943943
HddsProtos.ContainerID.newBuilder().setId(conID).build(),
944-
HddsProtos.LifeCycleEvent.CLOSE);
944+
HddsProtos.LifeCycleEvent.CLOSE, 0L);
945945
}
946946

947947
private void checkBlockDataWithRetry(

0 commit comments

Comments
 (0)