From 2533cd496d348ee4a70b9d11436adde422552d1c Mon Sep 17 00:00:00 2001 From: Zita Dombi Date: Thu, 16 Apr 2026 16:48:48 +0200 Subject: [PATCH 01/10] HDDS-15034. Query SCM status for ozone admin upgrade status command --- .../hadoop/hdds/scm/node/NodeManager.java | 19 +++++ .../hadoop/hdds/scm/node/SCMNodeManager.java | 38 +++++++++ .../scm/server/SCMClientProtocolServer.java | 36 ++++++-- .../hdds/scm/node/TestSCMNodeManager.java | 31 +++++++ .../server/TestSCMClientProtocolServer.java | 63 ++++++++++++++ .../ozone/admin/upgrade/StatusSubCommand.java | 3 +- .../admin/upgrade/TestStatusSubCommand.java | 83 +++++++++++++++++++ 7 files changed, 264 insertions(+), 9 deletions(-) create mode 100644 hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/ozone/admin/upgrade/TestStatusSubCommand.java diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java index e1cf9a45bf50..98e27a955964 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java @@ -144,6 +144,25 @@ default int getAllNodeCount() { return getAllNodes().size(); } + /** + * Returns the number of datanodes currently marked finalized by SCM layout version + * metadata/software comparison. + */ + default int getNumDatanodesFinalized() { + return (int) getAllNodes().stream() + .filter(DatanodeInfo.class::isInstance) + .map(DatanodeInfo.class::cast) + .filter(datanodeInfo -> { + LayoutVersionProto layoutVersion = datanodeInfo.getLastKnownLayoutVersion(); + if (layoutVersion == null) { + return false; + } + return layoutVersion.getMetadataLayoutVersion() + >= layoutVersion.getSoftwareLayoutVersion(); + }) + .count(); + } + /** * Returns the aggregated node stats. * @return the aggregated node stats. diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java index f7432ad47e53..37ee774aa83b 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java @@ -44,6 +44,7 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.function.BiConsumer; import java.util.function.Function; @@ -140,6 +141,7 @@ public class SCMNodeManager implements NodeManager { BiConsumer>> sendCommandNotifyMap; private final NonWritableNodeFilter nonWritableNodeFilter; private final int numContainerPerVolume; + private final AtomicInteger datanodesFinalized; /** * Lock used to synchronize some operation in Node manager to ensure a @@ -202,6 +204,7 @@ public SCMNodeManager( this.scmContext = scmContext; this.sendCommandNotifyMap = new HashMap<>(); this.nonWritableNodeFilter = new NonWritableNodeFilter(conf); + this.datanodesFinalized = new AtomicInteger(0); } @Override @@ -422,6 +425,7 @@ public RegisteredCommand register( try { clusterMap.add(datanodeDetails); nodeStateManager.addNode(datanodeDetails, layoutInfo); + datanodesFinalized.addAndGet(isDatanodeFinalized(layoutInfo) ? 1 : 0); // Check that datanode in nodeStateManager has topology parent set DatanodeDetails dn = nodeStateManager.getNode(datanodeDetails); Preconditions.checkState(dn.getParent() != null); @@ -447,7 +451,9 @@ public RegisteredCommand register( hostName, ipAddress, dnId)) { LOG.info("Updating datanode from {} to {}", oldNode, datanodeDetails); clusterMap.update(oldNode, datanodeDetails); + LayoutVersionProto oldLayoutVersion = oldNode.getLastKnownLayoutVersion(); nodeStateManager.updateNode(datanodeDetails, layoutInfo); + updateDatanodesFinalizedCount(oldLayoutVersion, layoutInfo); DatanodeDetails dn = nodeStateManager.getNode(datanodeDetails); Preconditions.checkState(dn.getParent() != null); processNodeReport(datanodeDetails, nodeReport); @@ -457,7 +463,9 @@ public RegisteredCommand register( LOG.info("Update the version for registered datanode {}, " + "oldVersion = {}, newVersion = {}.", datanodeDetails, oldNode.getVersion(), datanodeDetails.getVersion()); + LayoutVersionProto oldLayoutVersion = oldNode.getLastKnownLayoutVersion(); nodeStateManager.updateNode(datanodeDetails, layoutInfo); + updateDatanodesFinalizedCount(oldLayoutVersion, layoutInfo); } } catch (NodeNotFoundException e) { LOG.error("Cannot find datanode {} from nodeStateManager", @@ -732,8 +740,11 @@ public void processLayoutVersionReport(DatanodeDetails datanodeDetails, } try { + DatanodeInfo datanodeInfo = nodeStateManager.getNode(datanodeDetails); + LayoutVersionProto oldLayoutVersion = datanodeInfo.getLastKnownLayoutVersion(); nodeStateManager.updateLastKnownLayoutVersion(datanodeDetails, layoutVersionReport); + updateDatanodesFinalizedCount(oldLayoutVersion, layoutVersionReport); } catch (NodeNotFoundException e) { LOG.error("SCM trying to process Layout Version from an " + "unregistered node {}.", datanodeDetails); @@ -1053,6 +1064,28 @@ public DatanodeInfo getDatanodeInfo(DatanodeDetails dn) { } } + @Override + public int getNumDatanodesFinalized() { + return datanodesFinalized.get(); + } + + private void updateDatanodesFinalizedCount( + LayoutVersionProto previous, LayoutVersionProto current) { + int previousValue = isDatanodeFinalized(previous) ? 1 : 0; + int currentValue = isDatanodeFinalized(current) ? 1 : 0; + if (previousValue != currentValue) { + datanodesFinalized.addAndGet(currentValue - previousValue); + } + } + + private static boolean isDatanodeFinalized(LayoutVersionProto layoutVersion) { + if (layoutVersion == null) { + return false; + } + return layoutVersion.getMetadataLayoutVersion() + >= layoutVersion.getSoftwareLayoutVersion(); + } + /** * Return the node stat of the specified datanode. * @@ -1979,6 +2012,11 @@ public void removeNode(DatanodeDetails datanodeDetails) throws NodeNotFoundExcep if (clusterMap.contains(datanodeDetails)) { clusterMap.remove(datanodeDetails); } + DatanodeInfo datanodeInfo = nodeStateManager.getNode(datanodeDetails); + if (datanodeInfo != null) { + updateDatanodesFinalizedCount( + datanodeInfo.getLastKnownLayoutVersion(), null); + } nodeStateManager.removeNode(datanodeDetails.getID()); removeFromDnsToDnIdMap(datanodeDetails.getID(), datanodeDetails.getIpAddress()); final List> cmdList = getCommandQueue(datanodeDetails.getID()); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java index ca29b8c0c350..c8377f783fea 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java @@ -116,6 +116,7 @@ import org.apache.hadoop.ozone.audit.AuditMessage; import org.apache.hadoop.ozone.audit.Auditor; import org.apache.hadoop.ozone.audit.SCMAction; +import org.apache.hadoop.ozone.upgrade.UpgradeFinalization; import org.apache.hadoop.ozone.upgrade.UpgradeFinalization.StatusAndMessages; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; @@ -1153,13 +1154,12 @@ public HddsProtos.UpgradeStatus queryUpgradeStatus() throws IOException { try { getScm().checkAdminAccess(getRemoteUser(), true); - // Returning a placeholder for now. - HddsProtos.UpgradeStatus result = HddsProtos.UpgradeStatus.newBuilder() - .setScmFinalized(true) - .setNumDatanodesFinalized(10) - .setNumDatanodesTotal(10) - .setShouldFinalize(true) - .build(); + UpgradeFinalization.Status scmUpgradeStatus = + scm.getLayoutVersionManager().getUpgradeState(); + int totalDatanodes = scm.getScmNodeManager().getAllNodeCount(); + int finalizedDatanodes = scm.getScmNodeManager().getNumDatanodesFinalized(); + HddsProtos.UpgradeStatus result = buildUpgradeStatus( + scmUpgradeStatus, finalizedDatanodes, totalDatanodes); AUDIT.logReadSuccess(buildAuditMessageForSuccess(SCMAction.QUERY_UPGRADE_STATUS, null)); return result; } catch (IOException ex) { @@ -1168,6 +1168,28 @@ public HddsProtos.UpgradeStatus queryUpgradeStatus() throws IOException { } } + static HddsProtos.UpgradeStatus buildUpgradeStatus( + UpgradeFinalization.Status scmUpgradeStatus, + int finalizedDatanodes, + int totalDatanodes) { + return HddsProtos.UpgradeStatus.newBuilder() + .setScmFinalized(isScmFinalized(scmUpgradeStatus)) + .setNumDatanodesFinalized(finalizedDatanodes) + .setNumDatanodesTotal(totalDatanodes) + .setShouldFinalize(shouldFinalize(scmUpgradeStatus)) + .build(); + } + + static boolean isScmFinalized(UpgradeFinalization.Status scmUpgradeStatus) { + return UpgradeFinalization.isFinalized(scmUpgradeStatus) + || UpgradeFinalization.isDone(scmUpgradeStatus); + } + + static boolean shouldFinalize(UpgradeFinalization.Status scmUpgradeStatus) { + return UpgradeFinalization.Status.FINALIZATION_REQUIRED.equals( + scmUpgradeStatus); + } + @Override public StartContainerBalancerResponseProto startContainerBalancer( Optional threshold, Optional iterations, diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java index a93fbd4aa99b..0ea7087e9352 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java @@ -737,6 +737,37 @@ public void testProcessLayoutVersion() throws IOException { testProcessLayoutVersionReportHigherMlv(); } + @Test + public void testDatanodeFinalizedCounterTracksLayoutVersionReports() + throws IOException, AuthenticationException { + try (SCMNodeManager nodeManager = createNodeManager(getConf())) { + DatanodeDetails node = + HddsTestUtils.createRandomDatanodeAndRegister(nodeManager); + assertEquals(1, nodeManager.getNumDatanodesFinalized(), + "Initial datanode should be counted as finalized"); + + int softwareVersion = + nodeManager.getLayoutVersionManager().getSoftwareLayoutVersion(); + int metadataVersion = + nodeManager.getLayoutVersionManager().getMetadataLayoutVersion(); + nodeManager.processLayoutVersionReport(node, + LayoutVersionProto.newBuilder() + .setMetadataLayoutVersion(metadataVersion - 1) + .setSoftwareLayoutVersion(softwareVersion) + .build()); + assertEquals(0, nodeManager.getNumDatanodesFinalized(), + "Lower metadata layout version should decrement finalized count"); + + nodeManager.processLayoutVersionReport(node, + LayoutVersionProto.newBuilder() + .setMetadataLayoutVersion(metadataVersion) + .setSoftwareLayoutVersion(softwareVersion) + .build()); + assertEquals(1, nodeManager.getNumDatanodesFinalized(), + "Restored metadata layout version should restore finalized count"); + } + } + // Currently invoked by testProcessLayoutVersion. public void testProcessLayoutVersionReportHigherMlv() throws IOException { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMClientProtocolServer.java index 9402218014ce..4bdf4cbbd1a4 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMClientProtocolServer.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMClientProtocolServer.java @@ -19,6 +19,7 @@ import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_READONLY_ADMINISTRATORS; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.mock; @@ -45,6 +46,7 @@ import org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocolServerSideTranslatorPB; import org.apache.hadoop.hdds.utils.ProtocolMessageMetrics; import org.apache.hadoop.ozone.container.common.SCMTestUtils; +import org.apache.hadoop.ozone.upgrade.UpgradeFinalization; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.UserGroupInformation; import org.junit.jupiter.api.AfterEach; @@ -138,6 +140,67 @@ public void testScmListContainer() throws Exception { HddsProtos.ReplicationFactor.THREE).getContainerInfoList().size()); } + @Test + public void testBuildUpgradeStatusMapsFinalizationRequired() { + HddsProtos.UpgradeStatus status = SCMClientProtocolServer.buildUpgradeStatus( + UpgradeFinalization.Status.FINALIZATION_REQUIRED, 1, 2); + + assertFalse(status.getScmFinalized()); + assertTrue(status.getShouldFinalize()); + assertEquals(1, status.getNumDatanodesFinalized()); + assertEquals(2, status.getNumDatanodesTotal()); + } + + @Test + public void testBuildUpgradeStatusMapsFinalizationInProgress() { + HddsProtos.UpgradeStatus status = SCMClientProtocolServer.buildUpgradeStatus( + UpgradeFinalization.Status.FINALIZATION_IN_PROGRESS, 1, 2); + + assertFalse(status.getScmFinalized()); + assertFalse(status.getShouldFinalize()); + } + + @Test + public void testBuildUpgradeStatusMapsStartingFinalization() { + HddsProtos.UpgradeStatus status = SCMClientProtocolServer.buildUpgradeStatus( + UpgradeFinalization.Status.STARTING_FINALIZATION, 1, 2); + + assertFalse(status.getScmFinalized()); + assertFalse(status.getShouldFinalize()); + assertEquals(1, status.getNumDatanodesFinalized()); + assertEquals(2, status.getNumDatanodesTotal()); + } + + @Test + public void testBuildUpgradeStatusMapsCompletedStatesToFinalized() { + HddsProtos.UpgradeStatus doneStatus = + SCMClientProtocolServer.buildUpgradeStatus( + UpgradeFinalization.Status.FINALIZATION_DONE, 2, 2); + HddsProtos.UpgradeStatus finalizedStatus = + SCMClientProtocolServer.buildUpgradeStatus( + UpgradeFinalization.Status.ALREADY_FINALIZED, 2, 2); + + assertTrue(doneStatus.getScmFinalized()); + assertFalse(doneStatus.getShouldFinalize()); + assertTrue(finalizedStatus.getScmFinalized()); + assertFalse(finalizedStatus.getShouldFinalize()); + } + + @Test + public void testBuildUpgradeStatusFromVersionManagerState() { + HddsProtos.UpgradeStatus needsFinalization = + SCMClientProtocolServer.buildUpgradeStatus(UpgradeFinalization.Status.FINALIZATION_REQUIRED, 1, 3); + assertFalse(needsFinalization.getScmFinalized()); + assertTrue(needsFinalization.getShouldFinalize()); + assertEquals(1, needsFinalization.getNumDatanodesFinalized()); + assertEquals(3, needsFinalization.getNumDatanodesTotal()); + + HddsProtos.UpgradeStatus alreadyFinalized = + SCMClientProtocolServer.buildUpgradeStatus(UpgradeFinalization.Status.ALREADY_FINALIZED, 3, 3); + assertTrue(alreadyFinalized.getScmFinalized()); + assertFalse(alreadyFinalized.getShouldFinalize()); + } + private StorageContainerManager mockStorageContainerManager() { List infos = new ArrayList<>(); for (int i = 0; i < 10; i++) { diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/upgrade/StatusSubCommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/upgrade/StatusSubCommand.java index 246887edde9c..b2654e5203d7 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/upgrade/StatusSubCommand.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/upgrade/StatusSubCommand.java @@ -39,8 +39,7 @@ public class StatusSubCommand extends ScmSubcommand { public void execute(ScmClient client) throws IOException { HddsProtos.UpgradeStatus status = client.queryUpgradeStatus(); - // Temporary output to validate the command is working. - out().println("Update status:"); + out().println("Upgrade status:"); out().println(" SCM Finalized: " + status.getScmFinalized()); out().println(" Datanodes finalized: " + status.getNumDatanodesFinalized()); out().println(" Total Datanodes: " + status.getNumDatanodesTotal()); diff --git a/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/ozone/admin/upgrade/TestStatusSubCommand.java b/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/ozone/admin/upgrade/TestStatusSubCommand.java new file mode 100644 index 000000000000..5d7c80a01903 --- /dev/null +++ b/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/ozone/admin/upgrade/TestStatusSubCommand.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.admin.upgrade; + +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.client.ScmClient; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import picocli.CommandLine; + +/** + * Unit tests for {@link StatusSubCommand}. + */ +public class TestStatusSubCommand { + + private static final String DEFAULT_ENCODING = StandardCharsets.UTF_8.name(); + + private final ByteArrayOutputStream outContent = new ByteArrayOutputStream(); + private final PrintStream originalOut = System.out; + private StatusSubCommand cmd; + + @BeforeEach + public void setup() throws UnsupportedEncodingException { + cmd = new StatusSubCommand(); + System.setOut(new PrintStream(outContent, false, DEFAULT_ENCODING)); + } + + @AfterEach + public void tearDown() { + System.setOut(originalOut); + } + + @Test + public void testStatusCommandPrintsUpgradeStatus() throws IOException { + ScmClient scmClient = mock(ScmClient.class); + HddsProtos.UpgradeStatus status = HddsProtos.UpgradeStatus.newBuilder() + .setScmFinalized(false) + .setNumDatanodesFinalized(1) + .setNumDatanodesTotal(3) + .setShouldFinalize(true) + .build(); + when(scmClient.queryUpgradeStatus()).thenReturn(status); + + new CommandLine(cmd).parseArgs(); + cmd.execute(scmClient); + + String output = outContent.toString(DEFAULT_ENCODING); + assertTrue(output.contains("Upgrade status:")); + assertTrue(output.contains("SCM Finalized: false")); + assertTrue(output.contains("Datanodes finalized: 1")); + assertTrue(output.contains("Total Datanodes: 3")); + assertTrue(output.contains("Should Finalize: true")); + verify(scmClient).queryUpgradeStatus(); + } +} From bd49bba5c5f86cb230b0af3554e5683646ce4c71 Mon Sep 17 00:00:00 2001 From: Zita Dombi Date: Thu, 16 Apr 2026 16:49:36 +0200 Subject: [PATCH 02/10] Remove unused import --- .../apache/hadoop/ozone/admin/upgrade/TestStatusSubCommand.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/ozone/admin/upgrade/TestStatusSubCommand.java b/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/ozone/admin/upgrade/TestStatusSubCommand.java index 5d7c80a01903..d0e0a439d1a0 100644 --- a/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/ozone/admin/upgrade/TestStatusSubCommand.java +++ b/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/ozone/admin/upgrade/TestStatusSubCommand.java @@ -18,8 +18,6 @@ package org.apache.hadoop.ozone.admin.upgrade; import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.ArgumentMatchers.anyString; -import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; From 0a88f9be48592b76991b81845aef7b0ea67f755a Mon Sep 17 00:00:00 2001 From: Zita Dombi Date: Mon, 20 Apr 2026 13:21:55 +0200 Subject: [PATCH 03/10] Address review comments --- .../hadoop/hdds/scm/node/NodeManager.java | 10 ++----- .../hadoop/hdds/scm/node/SCMNodeManager.java | 2 +- .../scm/server/SCMClientProtocolServer.java | 9 +++--- .../hdds/scm/node/TestSCMNodeManager.java | 28 +++++++++++++++++++ .../server/TestSCMClientProtocolServer.java | 15 ++++++++-- 5 files changed, 49 insertions(+), 15 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java index 98e27a955964..fc2ee8a049ca 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java @@ -152,14 +152,8 @@ default int getNumDatanodesFinalized() { return (int) getAllNodes().stream() .filter(DatanodeInfo.class::isInstance) .map(DatanodeInfo.class::cast) - .filter(datanodeInfo -> { - LayoutVersionProto layoutVersion = datanodeInfo.getLastKnownLayoutVersion(); - if (layoutVersion == null) { - return false; - } - return layoutVersion.getMetadataLayoutVersion() - >= layoutVersion.getSoftwareLayoutVersion(); - }) + .filter(datanodeInfo -> + SCMNodeManager.isDatanodeFinalized(datanodeInfo.getLastKnownLayoutVersion())) .count(); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java index 37ee774aa83b..3ef2ddcd3553 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java @@ -1078,7 +1078,7 @@ private void updateDatanodesFinalizedCount( } } - private static boolean isDatanodeFinalized(LayoutVersionProto layoutVersion) { + public static boolean isDatanodeFinalized(LayoutVersionProto layoutVersion) { if (layoutVersion == null) { return false; } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java index c8377f783fea..a9c34cedafb3 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java @@ -1176,7 +1176,7 @@ static HddsProtos.UpgradeStatus buildUpgradeStatus( .setScmFinalized(isScmFinalized(scmUpgradeStatus)) .setNumDatanodesFinalized(finalizedDatanodes) .setNumDatanodesTotal(totalDatanodes) - .setShouldFinalize(shouldFinalize(scmUpgradeStatus)) + .setShouldFinalize(shouldFinalize(scmUpgradeStatus, finalizedDatanodes, totalDatanodes)) .build(); } @@ -1185,9 +1185,10 @@ static boolean isScmFinalized(UpgradeFinalization.Status scmUpgradeStatus) { || UpgradeFinalization.isDone(scmUpgradeStatus); } - static boolean shouldFinalize(UpgradeFinalization.Status scmUpgradeStatus) { - return UpgradeFinalization.Status.FINALIZATION_REQUIRED.equals( - scmUpgradeStatus); + static boolean shouldFinalize(UpgradeFinalization.Status scmUpgradeStatus, + int finalizedDatanodes, int totalDatanodes) { + return UpgradeFinalization.Status.FINALIZATION_REQUIRED.equals(scmUpgradeStatus) + && finalizedDatanodes == totalDatanodes; } @Override diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java index 0ea7087e9352..547094fe2858 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java @@ -768,6 +768,34 @@ public void testDatanodeFinalizedCounterTracksLayoutVersionReports() } } + @Test + public void testDatanodeFinalizedCounterTracksRegistrationAndRemoveNode() + throws IOException, AuthenticationException, NodeNotFoundException { + try (SCMNodeManager nodeManager = createNodeManager(getConf())) { + DatanodeDetails finalizedNode = + registerWithCapacity(nodeManager, CORRECT_LAYOUT_PROTO, success); + assertEquals(1, nodeManager.getNumDatanodesFinalized(), + "Finalized registration should increment finalized count"); + + DatanodeDetails nonFinalizedNode = + registerWithCapacity(nodeManager, SMALLER_MLV_LAYOUT_PROTO, success); + assertEquals(1, nodeManager.getNumDatanodesFinalized(), + "Non-finalized registration should not increment finalized count"); + + nonFinalizedNode.setPersistedOpState( + HddsProtos.NodeOperationalState.DECOMMISSIONED); + nodeManager.removeNode(nonFinalizedNode); + assertEquals(1, nodeManager.getNumDatanodesFinalized(), + "Removing a non-finalized node should not change finalized count"); + + finalizedNode.setPersistedOpState( + HddsProtos.NodeOperationalState.DECOMMISSIONED); + nodeManager.removeNode(finalizedNode); + assertEquals(0, nodeManager.getNumDatanodesFinalized(), + "Removing a finalized node should decrement finalized count"); + } + } + // Currently invoked by testProcessLayoutVersion. public void testProcessLayoutVersionReportHigherMlv() throws IOException { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMClientProtocolServer.java index 4bdf4cbbd1a4..a6d24362c6c9 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMClientProtocolServer.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMClientProtocolServer.java @@ -146,11 +146,22 @@ public void testBuildUpgradeStatusMapsFinalizationRequired() { UpgradeFinalization.Status.FINALIZATION_REQUIRED, 1, 2); assertFalse(status.getScmFinalized()); - assertTrue(status.getShouldFinalize()); + assertFalse(status.getShouldFinalize()); assertEquals(1, status.getNumDatanodesFinalized()); assertEquals(2, status.getNumDatanodesTotal()); } + @Test + public void testBuildUpgradeStatusMapsFinalizationRequiredAllNodesFinalized() { + HddsProtos.UpgradeStatus status = SCMClientProtocolServer.buildUpgradeStatus( + UpgradeFinalization.Status.FINALIZATION_REQUIRED, 3, 3); + + assertFalse(status.getScmFinalized()); + assertTrue(status.getShouldFinalize()); + assertEquals(3, status.getNumDatanodesFinalized()); + assertEquals(3, status.getNumDatanodesTotal()); + } + @Test public void testBuildUpgradeStatusMapsFinalizationInProgress() { HddsProtos.UpgradeStatus status = SCMClientProtocolServer.buildUpgradeStatus( @@ -191,7 +202,7 @@ public void testBuildUpgradeStatusFromVersionManagerState() { HddsProtos.UpgradeStatus needsFinalization = SCMClientProtocolServer.buildUpgradeStatus(UpgradeFinalization.Status.FINALIZATION_REQUIRED, 1, 3); assertFalse(needsFinalization.getScmFinalized()); - assertTrue(needsFinalization.getShouldFinalize()); + assertFalse(needsFinalization.getShouldFinalize()); assertEquals(1, needsFinalization.getNumDatanodesFinalized()); assertEquals(3, needsFinalization.getNumDatanodesTotal()); From 1e73007beb157143004d2d07fca26df9d82983ab Mon Sep 17 00:00:00 2001 From: Zita Dombi Date: Tue, 21 Apr 2026 11:26:03 +0200 Subject: [PATCH 04/10] Iterate over nodes each time, instead of counter --- .../hadoop/hdds/scm/node/NodeManager.java | 13 -- .../hadoop/hdds/scm/node/SCMNodeManager.java | 38 ------ .../scm/server/SCMClientProtocolServer.java | 26 ++-- .../server/upgrade/SCMUpgradeFinalizer.java | 112 ++++++++++++------ .../hdds/scm/node/TestSCMNodeManager.java | 21 ++-- 5 files changed, 105 insertions(+), 105 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java index fc2ee8a049ca..e1cf9a45bf50 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java @@ -144,19 +144,6 @@ default int getAllNodeCount() { return getAllNodes().size(); } - /** - * Returns the number of datanodes currently marked finalized by SCM layout version - * metadata/software comparison. - */ - default int getNumDatanodesFinalized() { - return (int) getAllNodes().stream() - .filter(DatanodeInfo.class::isInstance) - .map(DatanodeInfo.class::cast) - .filter(datanodeInfo -> - SCMNodeManager.isDatanodeFinalized(datanodeInfo.getLastKnownLayoutVersion())) - .count(); - } - /** * Returns the aggregated node stats. * @return the aggregated node stats. diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java index 3ef2ddcd3553..f7432ad47e53 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java @@ -44,7 +44,6 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.function.BiConsumer; import java.util.function.Function; @@ -141,7 +140,6 @@ public class SCMNodeManager implements NodeManager { BiConsumer>> sendCommandNotifyMap; private final NonWritableNodeFilter nonWritableNodeFilter; private final int numContainerPerVolume; - private final AtomicInteger datanodesFinalized; /** * Lock used to synchronize some operation in Node manager to ensure a @@ -204,7 +202,6 @@ public SCMNodeManager( this.scmContext = scmContext; this.sendCommandNotifyMap = new HashMap<>(); this.nonWritableNodeFilter = new NonWritableNodeFilter(conf); - this.datanodesFinalized = new AtomicInteger(0); } @Override @@ -425,7 +422,6 @@ public RegisteredCommand register( try { clusterMap.add(datanodeDetails); nodeStateManager.addNode(datanodeDetails, layoutInfo); - datanodesFinalized.addAndGet(isDatanodeFinalized(layoutInfo) ? 1 : 0); // Check that datanode in nodeStateManager has topology parent set DatanodeDetails dn = nodeStateManager.getNode(datanodeDetails); Preconditions.checkState(dn.getParent() != null); @@ -451,9 +447,7 @@ public RegisteredCommand register( hostName, ipAddress, dnId)) { LOG.info("Updating datanode from {} to {}", oldNode, datanodeDetails); clusterMap.update(oldNode, datanodeDetails); - LayoutVersionProto oldLayoutVersion = oldNode.getLastKnownLayoutVersion(); nodeStateManager.updateNode(datanodeDetails, layoutInfo); - updateDatanodesFinalizedCount(oldLayoutVersion, layoutInfo); DatanodeDetails dn = nodeStateManager.getNode(datanodeDetails); Preconditions.checkState(dn.getParent() != null); processNodeReport(datanodeDetails, nodeReport); @@ -463,9 +457,7 @@ public RegisteredCommand register( LOG.info("Update the version for registered datanode {}, " + "oldVersion = {}, newVersion = {}.", datanodeDetails, oldNode.getVersion(), datanodeDetails.getVersion()); - LayoutVersionProto oldLayoutVersion = oldNode.getLastKnownLayoutVersion(); nodeStateManager.updateNode(datanodeDetails, layoutInfo); - updateDatanodesFinalizedCount(oldLayoutVersion, layoutInfo); } } catch (NodeNotFoundException e) { LOG.error("Cannot find datanode {} from nodeStateManager", @@ -740,11 +732,8 @@ public void processLayoutVersionReport(DatanodeDetails datanodeDetails, } try { - DatanodeInfo datanodeInfo = nodeStateManager.getNode(datanodeDetails); - LayoutVersionProto oldLayoutVersion = datanodeInfo.getLastKnownLayoutVersion(); nodeStateManager.updateLastKnownLayoutVersion(datanodeDetails, layoutVersionReport); - updateDatanodesFinalizedCount(oldLayoutVersion, layoutVersionReport); } catch (NodeNotFoundException e) { LOG.error("SCM trying to process Layout Version from an " + "unregistered node {}.", datanodeDetails); @@ -1064,28 +1053,6 @@ public DatanodeInfo getDatanodeInfo(DatanodeDetails dn) { } } - @Override - public int getNumDatanodesFinalized() { - return datanodesFinalized.get(); - } - - private void updateDatanodesFinalizedCount( - LayoutVersionProto previous, LayoutVersionProto current) { - int previousValue = isDatanodeFinalized(previous) ? 1 : 0; - int currentValue = isDatanodeFinalized(current) ? 1 : 0; - if (previousValue != currentValue) { - datanodesFinalized.addAndGet(currentValue - previousValue); - } - } - - public static boolean isDatanodeFinalized(LayoutVersionProto layoutVersion) { - if (layoutVersion == null) { - return false; - } - return layoutVersion.getMetadataLayoutVersion() - >= layoutVersion.getSoftwareLayoutVersion(); - } - /** * Return the node stat of the specified datanode. * @@ -2012,11 +1979,6 @@ public void removeNode(DatanodeDetails datanodeDetails) throws NodeNotFoundExcep if (clusterMap.contains(datanodeDetails)) { clusterMap.remove(datanodeDetails); } - DatanodeInfo datanodeInfo = nodeStateManager.getNode(datanodeDetails); - if (datanodeInfo != null) { - updateDatanodesFinalizedCount( - datanodeInfo.getLastKnownLayoutVersion(), null); - } nodeStateManager.removeNode(datanodeDetails.getID()); removeFromDnsToDnIdMap(datanodeDetails.getID(), datanodeDetails.getIpAddress()); final List> cmdList = getCommandQueue(datanodeDetails.getID()); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java index a9c34cedafb3..a74c8dcc50f2 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java @@ -101,6 +101,7 @@ import org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocol; import org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocolServerSideTranslatorPB; import org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolPB; +import org.apache.hadoop.hdds.scm.server.upgrade.SCMUpgradeFinalizer; import org.apache.hadoop.hdds.security.SecurityConfig; import org.apache.hadoop.hdds.utils.HddsServerUtil; import org.apache.hadoop.hdds.utils.ProtocolMessageMetrics; @@ -1154,12 +1155,16 @@ public HddsProtos.UpgradeStatus queryUpgradeStatus() throws IOException { try { getScm().checkAdminAccess(getRemoteUser(), true); - UpgradeFinalization.Status scmUpgradeStatus = - scm.getLayoutVersionManager().getUpgradeState(); - int totalDatanodes = scm.getScmNodeManager().getAllNodeCount(); - int finalizedDatanodes = scm.getScmNodeManager().getNumDatanodesFinalized(); + UpgradeFinalization.Status scmUpgradeStatus = scm.getLayoutVersionManager().getUpgradeState(); + SCMUpgradeFinalizer.DatanodeFinalizationCounts datanodeFinalizationCounts = + SCMUpgradeFinalizer.getNumFinalizedDatanodes(scm.getScmNodeManager()); + int finalizedDatanodes = datanodeFinalizationCounts.getNumFinalizedDatanodes(); + int healthyDatanodes = datanodeFinalizationCounts.getTotalHealthyDatanodes(); + HddsProtos.UpgradeStatus result = buildUpgradeStatus( - scmUpgradeStatus, finalizedDatanodes, totalDatanodes); + scmUpgradeStatus, + finalizedDatanodes, + healthyDatanodes); AUDIT.logReadSuccess(buildAuditMessageForSuccess(SCMAction.QUERY_UPGRADE_STATUS, null)); return result; } catch (IOException ex) { @@ -1171,12 +1176,13 @@ public HddsProtos.UpgradeStatus queryUpgradeStatus() throws IOException { static HddsProtos.UpgradeStatus buildUpgradeStatus( UpgradeFinalization.Status scmUpgradeStatus, int finalizedDatanodes, - int totalDatanodes) { + int healthyDatanodes) { return HddsProtos.UpgradeStatus.newBuilder() .setScmFinalized(isScmFinalized(scmUpgradeStatus)) .setNumDatanodesFinalized(finalizedDatanodes) - .setNumDatanodesTotal(totalDatanodes) - .setShouldFinalize(shouldFinalize(scmUpgradeStatus, finalizedDatanodes, totalDatanodes)) + .setNumDatanodesTotal(healthyDatanodes) + .setShouldFinalize( + shouldFinalize(scmUpgradeStatus, finalizedDatanodes, healthyDatanodes)) .build(); } @@ -1186,9 +1192,9 @@ static boolean isScmFinalized(UpgradeFinalization.Status scmUpgradeStatus) { } static boolean shouldFinalize(UpgradeFinalization.Status scmUpgradeStatus, - int finalizedDatanodes, int totalDatanodes) { + int finalizedDatanodes, int healthyDatanodes) { return UpgradeFinalization.Status.FINALIZATION_REQUIRED.equals(scmUpgradeStatus) - && finalizedDatanodes == totalDatanodes; + && finalizedDatanodes == healthyDatanodes; } @Override diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/SCMUpgradeFinalizer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/SCMUpgradeFinalizer.java index 93f2c7dace88..88bc60e9d453 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/SCMUpgradeFinalizer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/SCMUpgradeFinalizer.java @@ -121,44 +121,13 @@ private void waitForDatanodesToFinalize(SCMUpgradeFinalizationContext context) // SCM is no longer the leader by throwing NotLeaderException. context.getSCMContext().getTermOfLeader(); - allDatanodesFinalized = true; - int totalHealthyNodes = 0; - int finalizedNodes = 0; - int unfinalizedNodes = 0; + DatanodeFinalizationCounts datanodeFinalizationCounts = + getNumFinalizedDatanodes(nodeManager); + int finalizedNodes = datanodeFinalizationCounts.getNumFinalizedDatanodes(); + int totalHealthyNodes = datanodeFinalizationCounts.getTotalHealthyDatanodes(); + int unfinalizedNodes = datanodeFinalizationCounts.getNumUnfinalizedDatanodes(); - for (DatanodeDetails dn : nodeManager.getAllNodes()) { - try { - // Only check HEALTHY nodes. STALE/DEAD nodes will be told to - // finalize when they recover. - if (nodeManager.getNodeStatus(dn).isHealthy()) { - totalHealthyNodes++; - DatanodeInfo datanodeInfo = nodeManager.getDatanodeInfo(dn); - if (datanodeInfo == null) { - LOG.warn("Could not get DatanodeInfo for {}, skipping in " + - "finalization wait.", dn.getHostName()); - continue; - } - - LayoutVersionProto dnLayout = datanodeInfo.getLastKnownLayoutVersion(); - int dnMlv = dnLayout.getMetadataLayoutVersion(); - int dnSlv = dnLayout.getSoftwareLayoutVersion(); - - if (dnMlv < dnSlv) { - // Datanode has not yet finalized - allDatanodesFinalized = false; - unfinalizedNodes++; - LOG.debug("Datanode {} has not yet finalized: MLV={}, SLV={}", - dn.getHostName(), dnMlv, dnSlv); - } else { - finalizedNodes++; - } - } - } catch (NodeNotFoundException e) { - // Node was removed while we were iterating. This is OK, skip it. - LOG.debug("Node {} not found while waiting for finalization, " + - "skipping.", dn); - } - } + allDatanodesFinalized = unfinalizedNodes == 0; if (!allDatanodesFinalized) { LOG.info("Waiting for datanodes to finalize. Status: {}/{} healthy " + @@ -177,4 +146,73 @@ private void waitForDatanodesToFinalize(SCMUpgradeFinalizationContext context) } } } + + public static DatanodeFinalizationCounts getNumFinalizedDatanodes( + NodeManager nodeManager) { + int finalizedNodes = 0; + int totalHealthyNodes = 0; + int unfinalizedNodes = 0; + + for (DatanodeDetails dn : nodeManager.getAllNodes()) { + try { + // Only check HEALTHY nodes. STALE/DEAD nodes will be told to + // finalize when they recover. + if (!nodeManager.getNodeStatus(dn).isHealthy()) { + continue; + } + totalHealthyNodes++; + DatanodeInfo datanodeInfo = nodeManager.getDatanodeInfo(dn); + if (datanodeInfo == null) { + LOG.warn("Could not get DatanodeInfo for {}, skipping in " + + "finalization wait.", dn.getHostName()); + continue; + } + + LayoutVersionProto dnLayout = datanodeInfo.getLastKnownLayoutVersion(); + int dnMlv = dnLayout.getMetadataLayoutVersion(); + int dnSlv = dnLayout.getSoftwareLayoutVersion(); + + if (dnMlv < dnSlv) { + // Datanode has not yet finalized + unfinalizedNodes++; + LOG.debug("Datanode {} has not yet finalized: MLV={}, SLV={}", + dn.getHostName(), dnMlv, dnSlv); + } else { + finalizedNodes++; + } + } catch (NodeNotFoundException e) { + // Node was removed while we were iterating. This is OK, skip it. + LOG.debug("Node {} not found while waiting for finalization, " + + "skipping.", dn); + } + } + + return new DatanodeFinalizationCounts(finalizedNodes, totalHealthyNodes, unfinalizedNodes); + } + + public static final class DatanodeFinalizationCounts { + private final int numFinalizedDatanodes; + private final int totalHealthyDatanodes; + private final int numUnfinalizedDatanodes; + + public DatanodeFinalizationCounts(int numFinalizedDatanodes, + int totalHealthyDatanodes, + int numUnfinalizedDatanodes) { + this.numFinalizedDatanodes = numFinalizedDatanodes; + this.totalHealthyDatanodes = totalHealthyDatanodes; + this.numUnfinalizedDatanodes = numUnfinalizedDatanodes; + } + + public int getNumFinalizedDatanodes() { + return numFinalizedDatanodes; + } + + public int getTotalHealthyDatanodes() { + return totalHealthyDatanodes; + } + + public int getNumUnfinalizedDatanodes() { + return numUnfinalizedDatanodes; + } + } } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java index 547094fe2858..91eae6d2fae0 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java @@ -96,6 +96,7 @@ import org.apache.hadoop.hdds.scm.safemode.SCMSafeModeManager; import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher.NodeReportFromDatanode; import org.apache.hadoop.hdds.scm.server.SCMStorageConfig; +import org.apache.hadoop.hdds.scm.server.upgrade.SCMUpgradeFinalizer; import org.apache.hadoop.hdds.scm.server.StorageContainerManager; import org.apache.hadoop.hdds.server.events.EventPublisher; import org.apache.hadoop.hdds.server.events.EventQueue; @@ -743,7 +744,8 @@ public void testDatanodeFinalizedCounterTracksLayoutVersionReports() try (SCMNodeManager nodeManager = createNodeManager(getConf())) { DatanodeDetails node = HddsTestUtils.createRandomDatanodeAndRegister(nodeManager); - assertEquals(1, nodeManager.getNumDatanodesFinalized(), + assertEquals(1, SCMUpgradeFinalizer.getNumFinalizedDatanodes(nodeManager) + .getNumFinalizedDatanodes(), "Initial datanode should be counted as finalized"); int softwareVersion = @@ -755,7 +757,8 @@ public void testDatanodeFinalizedCounterTracksLayoutVersionReports() .setMetadataLayoutVersion(metadataVersion - 1) .setSoftwareLayoutVersion(softwareVersion) .build()); - assertEquals(0, nodeManager.getNumDatanodesFinalized(), + assertEquals(0, SCMUpgradeFinalizer.getNumFinalizedDatanodes(nodeManager) + .getNumFinalizedDatanodes(), "Lower metadata layout version should decrement finalized count"); nodeManager.processLayoutVersionReport(node, @@ -763,7 +766,8 @@ public void testDatanodeFinalizedCounterTracksLayoutVersionReports() .setMetadataLayoutVersion(metadataVersion) .setSoftwareLayoutVersion(softwareVersion) .build()); - assertEquals(1, nodeManager.getNumDatanodesFinalized(), + assertEquals(1, SCMUpgradeFinalizer.getNumFinalizedDatanodes(nodeManager) + .getNumFinalizedDatanodes(), "Restored metadata layout version should restore finalized count"); } } @@ -774,24 +778,27 @@ public void testDatanodeFinalizedCounterTracksRegistrationAndRemoveNode() try (SCMNodeManager nodeManager = createNodeManager(getConf())) { DatanodeDetails finalizedNode = registerWithCapacity(nodeManager, CORRECT_LAYOUT_PROTO, success); - assertEquals(1, nodeManager.getNumDatanodesFinalized(), + assertEquals(1, SCMUpgradeFinalizer.getNumFinalizedDatanodes(nodeManager) + .getNumFinalizedDatanodes(), "Finalized registration should increment finalized count"); DatanodeDetails nonFinalizedNode = registerWithCapacity(nodeManager, SMALLER_MLV_LAYOUT_PROTO, success); - assertEquals(1, nodeManager.getNumDatanodesFinalized(), + assertEquals(1, SCMUpgradeFinalizer.getNumFinalizedDatanodes(nodeManager) + .getNumFinalizedDatanodes(), "Non-finalized registration should not increment finalized count"); nonFinalizedNode.setPersistedOpState( HddsProtos.NodeOperationalState.DECOMMISSIONED); nodeManager.removeNode(nonFinalizedNode); - assertEquals(1, nodeManager.getNumDatanodesFinalized(), + assertEquals(1, SCMUpgradeFinalizer.getNumFinalizedDatanodes(nodeManager) + .getNumFinalizedDatanodes(), "Removing a non-finalized node should not change finalized count"); finalizedNode.setPersistedOpState( HddsProtos.NodeOperationalState.DECOMMISSIONED); nodeManager.removeNode(finalizedNode); - assertEquals(0, nodeManager.getNumDatanodesFinalized(), + assertEquals(0, SCMUpgradeFinalizer.getNumFinalizedDatanodes(nodeManager).getNumFinalizedDatanodes(), "Removing a finalized node should decrement finalized count"); } } From 425441cfc59d5ce4e1492903017d0a214bbcfd1c Mon Sep 17 00:00:00 2001 From: Zita Dombi Date: Tue, 21 Apr 2026 11:32:32 +0200 Subject: [PATCH 05/10] Fix checkstyle --- .../hadoop/hdds/scm/server/upgrade/SCMUpgradeFinalizer.java | 3 +++ .../org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/SCMUpgradeFinalizer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/SCMUpgradeFinalizer.java index 88bc60e9d453..10f035146ea6 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/SCMUpgradeFinalizer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/SCMUpgradeFinalizer.java @@ -190,6 +190,9 @@ public static DatanodeFinalizationCounts getNumFinalizedDatanodes( return new DatanodeFinalizationCounts(finalizedNodes, totalHealthyNodes, unfinalizedNodes); } + /** + * Class to store the number finalized, unfinalized and healthy datanodes + */ public static final class DatanodeFinalizationCounts { private final int numFinalizedDatanodes; private final int totalHealthyDatanodes; diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java index 91eae6d2fae0..7df42c3123f0 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java @@ -96,8 +96,8 @@ import org.apache.hadoop.hdds.scm.safemode.SCMSafeModeManager; import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher.NodeReportFromDatanode; import org.apache.hadoop.hdds.scm.server.SCMStorageConfig; -import org.apache.hadoop.hdds.scm.server.upgrade.SCMUpgradeFinalizer; import org.apache.hadoop.hdds.scm.server.StorageContainerManager; +import org.apache.hadoop.hdds.scm.server.upgrade.SCMUpgradeFinalizer; import org.apache.hadoop.hdds.server.events.EventPublisher; import org.apache.hadoop.hdds.server.events.EventQueue; import org.apache.hadoop.hdds.upgrade.HDDSLayoutVersionManager; From c39b79b2b285cc3c52cd7159cfe1cdbc7f49ad7b Mon Sep 17 00:00:00 2001 From: Zita Dombi Date: Tue, 21 Apr 2026 12:01:31 +0200 Subject: [PATCH 06/10] Fix checkstyle --- .../hadoop/hdds/scm/server/upgrade/SCMUpgradeFinalizer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/SCMUpgradeFinalizer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/SCMUpgradeFinalizer.java index 10f035146ea6..5ce428738344 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/SCMUpgradeFinalizer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/SCMUpgradeFinalizer.java @@ -191,7 +191,7 @@ public static DatanodeFinalizationCounts getNumFinalizedDatanodes( } /** - * Class to store the number finalized, unfinalized and healthy datanodes + * Class to store the number finalized, unfinalized and healthy datanodes. */ public static final class DatanodeFinalizationCounts { private final int numFinalizedDatanodes; From 2858c7666388da66f05880fa256f5316e76a1bbb Mon Sep 17 00:00:00 2001 From: Zita Dombi Date: Tue, 21 Apr 2026 12:55:15 +0200 Subject: [PATCH 07/10] Fix shouldFinalize --- .../apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java index a74c8dcc50f2..ad612600d2e3 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java @@ -1193,8 +1193,7 @@ static boolean isScmFinalized(UpgradeFinalization.Status scmUpgradeStatus) { static boolean shouldFinalize(UpgradeFinalization.Status scmUpgradeStatus, int finalizedDatanodes, int healthyDatanodes) { - return UpgradeFinalization.Status.FINALIZATION_REQUIRED.equals(scmUpgradeStatus) - && finalizedDatanodes == healthyDatanodes; + return isScmFinalized(scmUpgradeStatus) && finalizedDatanodes == healthyDatanodes; } @Override From 71047b7872d5cf9b0ec6ee75807f4bfbe024a5e3 Mon Sep 17 00:00:00 2001 From: Zita Dombi Date: Tue, 21 Apr 2026 15:24:54 +0200 Subject: [PATCH 08/10] Fix wrong assertation TestSCMClientProtocolServer --- .../hdds/scm/server/TestSCMClientProtocolServer.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMClientProtocolServer.java index a6d24362c6c9..f9cab1b1ae5e 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMClientProtocolServer.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMClientProtocolServer.java @@ -157,7 +157,9 @@ public void testBuildUpgradeStatusMapsFinalizationRequiredAllNodesFinalized() { UpgradeFinalization.Status.FINALIZATION_REQUIRED, 3, 3); assertFalse(status.getScmFinalized()); - assertTrue(status.getShouldFinalize()); + assertEquals(SCMClientProtocolServer.shouldFinalize( + UpgradeFinalization.Status.FINALIZATION_REQUIRED, 3, 3), + status.getShouldFinalize()); assertEquals(3, status.getNumDatanodesFinalized()); assertEquals(3, status.getNumDatanodesTotal()); } @@ -192,9 +194,9 @@ public void testBuildUpgradeStatusMapsCompletedStatesToFinalized() { UpgradeFinalization.Status.ALREADY_FINALIZED, 2, 2); assertTrue(doneStatus.getScmFinalized()); - assertFalse(doneStatus.getShouldFinalize()); + assertTrue(doneStatus.getShouldFinalize()); assertTrue(finalizedStatus.getScmFinalized()); - assertFalse(finalizedStatus.getShouldFinalize()); + assertTrue(finalizedStatus.getShouldFinalize()); } @Test @@ -209,7 +211,7 @@ public void testBuildUpgradeStatusFromVersionManagerState() { HddsProtos.UpgradeStatus alreadyFinalized = SCMClientProtocolServer.buildUpgradeStatus(UpgradeFinalization.Status.ALREADY_FINALIZED, 3, 3); assertTrue(alreadyFinalized.getScmFinalized()); - assertFalse(alreadyFinalized.getShouldFinalize()); + assertTrue(alreadyFinalized.getShouldFinalize()); } private StorageContainerManager mockStorageContainerManager() { From 82b9b5abe4ea3d13aa715c454a738033caf8edf1 Mon Sep 17 00:00:00 2001 From: Zita Dombi Date: Fri, 24 Apr 2026 14:29:55 +0200 Subject: [PATCH 09/10] Address review comments --- .../hadoop/hdds/scm/node/NodeManager.java | 81 +++++++++++++++++ .../scm/server/SCMClientProtocolServer.java | 44 +++------ .../server/upgrade/SCMUpgradeFinalizer.java | 80 +---------------- .../hdds/scm/node/TestSCMNodeManager.java | 90 +++++++++++++++++-- .../server/TestSCMClientProtocolServer.java | 88 +++--------------- 5 files changed, 190 insertions(+), 193 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java index e1cf9a45bf50..8a84ef180459 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java @@ -48,6 +48,8 @@ import org.apache.hadoop.ozone.protocol.commands.CommandForDatanode; import org.apache.hadoop.ozone.protocol.commands.RegisteredCommand; import org.apache.hadoop.ozone.protocol.commands.SCMCommand; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * A node manager supports a simple interface for managing a datanode. @@ -74,6 +76,8 @@ public interface NodeManager extends StorageContainerNodeProtocol, EventHandler, NodeManagerMXBean, Closeable { + Logger LOG = LoggerFactory.getLogger(NodeManager.class); + /** * Register API without a layout version info object passed in. Useful for * tests. @@ -144,6 +148,57 @@ default int getAllNodeCount() { return getAllNodes().size(); } + /** + * @return DatanodeFinalizationCounts, finalized and total healthy node counts + */ + default DatanodeFinalizationCounts getDatanodeFinalizationCounts() { + int finalizedNodes = 0; + int totalHealthyNodes = 0; + + for (DatanodeDetails dn : getAllNodes()) { + try { + // Only count HEALTHY nodes. STALE/DEAD nodes are intentionally excluded + // for the following reasons: + // - When a node goes STALE, its write pipelines are closed, so it + // cannot be involved in writes regardless of finalization state. + // - The ZDU write path is designed to handle datanodes at different + // layout versions, so an unfinalized STALE node does not block + // correctness if it later returns to HEALTHY. + // - If it recovers to HEALTHY, it will receive a finalize command on + // its next heartbeat and finalize quickly. If it is in bad shape, + // it will likely go DEAD and can be ignored. + if (!getNodeStatus(dn).isHealthy()) { + continue; + } + totalHealthyNodes++; + DatanodeInfo datanodeInfo = getDatanodeInfo(dn); + if (datanodeInfo == null) { + LOG.warn("Could not get DatanodeInfo for {}, skipping in " + + "finalization wait.", dn.getHostName()); + continue; + } + + LayoutVersionProto dnLayout = datanodeInfo.getLastKnownLayoutVersion(); + int dnMlv = dnLayout.getMetadataLayoutVersion(); + int dnSlv = dnLayout.getSoftwareLayoutVersion(); + + if (dnMlv < dnSlv) { + // Datanode has not yet finalized + LOG.debug("Datanode {} has not yet finalized: MLV={}, SLV={}", + dn.getHostName(), dnMlv, dnSlv); + } else { + finalizedNodes++; + } + } catch (NodeNotFoundException e) { + // Node was removed while we were iterating. This is OK, skip it. + LOG.debug("Node {} not found while waiting for finalization, " + + "skipping.", dn); + } + } + + return new DatanodeFinalizationCounts(finalizedNodes, totalHealthyNodes); + } + /** * Returns the aggregated node stats. * @return the aggregated node stats. @@ -420,4 +475,30 @@ default void removeNode(DatanodeDetails datanodeDetails) throws NodeNotFoundExce } int openContainerLimit(List datanodes); + + /** + * Class to store the number finalized and healthy datanodes. + */ + final class DatanodeFinalizationCounts { + private final int numFinalizedDatanodes; + private final int totalHealthyDatanodes; + + public DatanodeFinalizationCounts(int numFinalizedDatanodes, + int totalHealthyDatanodes) { + this.numFinalizedDatanodes = numFinalizedDatanodes; + this.totalHealthyDatanodes = totalHealthyDatanodes; + } + + public int getNumFinalizedDatanodes() { + return numFinalizedDatanodes; + } + + public int getTotalHealthyDatanodes() { + return totalHealthyDatanodes; + } + + public boolean allNodesFinalized() { + return numFinalizedDatanodes == totalHealthyDatanodes; + } + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java index ad612600d2e3..483d60c182af 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java @@ -92,6 +92,7 @@ import org.apache.hadoop.hdds.scm.ha.SCMRatisServerImpl; import org.apache.hadoop.hdds.scm.node.DatanodeInfo; import org.apache.hadoop.hdds.scm.node.DatanodeUsageInfo; +import org.apache.hadoop.hdds.scm.node.NodeManager; import org.apache.hadoop.hdds.scm.node.NodeStatus; import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; @@ -101,7 +102,6 @@ import org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocol; import org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocolServerSideTranslatorPB; import org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolPB; -import org.apache.hadoop.hdds.scm.server.upgrade.SCMUpgradeFinalizer; import org.apache.hadoop.hdds.security.SecurityConfig; import org.apache.hadoop.hdds.utils.HddsServerUtil; import org.apache.hadoop.hdds.utils.ProtocolMessageMetrics; @@ -117,7 +117,6 @@ import org.apache.hadoop.ozone.audit.AuditMessage; import org.apache.hadoop.ozone.audit.Auditor; import org.apache.hadoop.ozone.audit.SCMAction; -import org.apache.hadoop.ozone.upgrade.UpgradeFinalization; import org.apache.hadoop.ozone.upgrade.UpgradeFinalization.StatusAndMessages; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; @@ -1155,16 +1154,20 @@ public HddsProtos.UpgradeStatus queryUpgradeStatus() throws IOException { try { getScm().checkAdminAccess(getRemoteUser(), true); - UpgradeFinalization.Status scmUpgradeStatus = scm.getLayoutVersionManager().getUpgradeState(); - SCMUpgradeFinalizer.DatanodeFinalizationCounts datanodeFinalizationCounts = - SCMUpgradeFinalizer.getNumFinalizedDatanodes(scm.getScmNodeManager()); + boolean scmFinalized = !scm.getLayoutVersionManager().needsFinalization(); + NodeManager.DatanodeFinalizationCounts datanodeFinalizationCounts = + scm.getScmNodeManager().getDatanodeFinalizationCounts(); int finalizedDatanodes = datanodeFinalizationCounts.getNumFinalizedDatanodes(); int healthyDatanodes = datanodeFinalizationCounts.getTotalHealthyDatanodes(); + boolean shouldFinalize = scmFinalized && datanodeFinalizationCounts.allNodesFinalized(); + + HddsProtos.UpgradeStatus result = HddsProtos.UpgradeStatus.newBuilder() + .setScmFinalized(scmFinalized) + .setNumDatanodesFinalized(finalizedDatanodes) + .setNumDatanodesTotal(healthyDatanodes) + .setShouldFinalize(shouldFinalize) + .build(); - HddsProtos.UpgradeStatus result = buildUpgradeStatus( - scmUpgradeStatus, - finalizedDatanodes, - healthyDatanodes); AUDIT.logReadSuccess(buildAuditMessageForSuccess(SCMAction.QUERY_UPGRADE_STATUS, null)); return result; } catch (IOException ex) { @@ -1173,29 +1176,6 @@ public HddsProtos.UpgradeStatus queryUpgradeStatus() throws IOException { } } - static HddsProtos.UpgradeStatus buildUpgradeStatus( - UpgradeFinalization.Status scmUpgradeStatus, - int finalizedDatanodes, - int healthyDatanodes) { - return HddsProtos.UpgradeStatus.newBuilder() - .setScmFinalized(isScmFinalized(scmUpgradeStatus)) - .setNumDatanodesFinalized(finalizedDatanodes) - .setNumDatanodesTotal(healthyDatanodes) - .setShouldFinalize( - shouldFinalize(scmUpgradeStatus, finalizedDatanodes, healthyDatanodes)) - .build(); - } - - static boolean isScmFinalized(UpgradeFinalization.Status scmUpgradeStatus) { - return UpgradeFinalization.isFinalized(scmUpgradeStatus) - || UpgradeFinalization.isDone(scmUpgradeStatus); - } - - static boolean shouldFinalize(UpgradeFinalization.Status scmUpgradeStatus, - int finalizedDatanodes, int healthyDatanodes) { - return isScmFinalized(scmUpgradeStatus) && finalizedDatanodes == healthyDatanodes; - } - @Override public StartContainerBalancerResponseProto startContainerBalancer( Optional threshold, Optional iterations, diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/SCMUpgradeFinalizer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/SCMUpgradeFinalizer.java index 5ce428738344..9ce563a2c975 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/SCMUpgradeFinalizer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/SCMUpgradeFinalizer.java @@ -121,15 +121,13 @@ private void waitForDatanodesToFinalize(SCMUpgradeFinalizationContext context) // SCM is no longer the leader by throwing NotLeaderException. context.getSCMContext().getTermOfLeader(); - DatanodeFinalizationCounts datanodeFinalizationCounts = - getNumFinalizedDatanodes(nodeManager); + NodeManager.DatanodeFinalizationCounts datanodeFinalizationCounts = nodeManager.getDatanodeFinalizationCounts(); int finalizedNodes = datanodeFinalizationCounts.getNumFinalizedDatanodes(); int totalHealthyNodes = datanodeFinalizationCounts.getTotalHealthyDatanodes(); - int unfinalizedNodes = datanodeFinalizationCounts.getNumUnfinalizedDatanodes(); - - allDatanodesFinalized = unfinalizedNodes == 0; + allDatanodesFinalized = datanodeFinalizationCounts.allNodesFinalized(); if (!allDatanodesFinalized) { + int unfinalizedNodes = totalHealthyNodes - finalizedNodes; LOG.info("Waiting for datanodes to finalize. Status: {}/{} healthy " + "datanodes have finalized ({} remaining).", finalizedNodes, totalHealthyNodes, unfinalizedNodes); @@ -146,76 +144,4 @@ private void waitForDatanodesToFinalize(SCMUpgradeFinalizationContext context) } } } - - public static DatanodeFinalizationCounts getNumFinalizedDatanodes( - NodeManager nodeManager) { - int finalizedNodes = 0; - int totalHealthyNodes = 0; - int unfinalizedNodes = 0; - - for (DatanodeDetails dn : nodeManager.getAllNodes()) { - try { - // Only check HEALTHY nodes. STALE/DEAD nodes will be told to - // finalize when they recover. - if (!nodeManager.getNodeStatus(dn).isHealthy()) { - continue; - } - totalHealthyNodes++; - DatanodeInfo datanodeInfo = nodeManager.getDatanodeInfo(dn); - if (datanodeInfo == null) { - LOG.warn("Could not get DatanodeInfo for {}, skipping in " + - "finalization wait.", dn.getHostName()); - continue; - } - - LayoutVersionProto dnLayout = datanodeInfo.getLastKnownLayoutVersion(); - int dnMlv = dnLayout.getMetadataLayoutVersion(); - int dnSlv = dnLayout.getSoftwareLayoutVersion(); - - if (dnMlv < dnSlv) { - // Datanode has not yet finalized - unfinalizedNodes++; - LOG.debug("Datanode {} has not yet finalized: MLV={}, SLV={}", - dn.getHostName(), dnMlv, dnSlv); - } else { - finalizedNodes++; - } - } catch (NodeNotFoundException e) { - // Node was removed while we were iterating. This is OK, skip it. - LOG.debug("Node {} not found while waiting for finalization, " + - "skipping.", dn); - } - } - - return new DatanodeFinalizationCounts(finalizedNodes, totalHealthyNodes, unfinalizedNodes); - } - - /** - * Class to store the number finalized, unfinalized and healthy datanodes. - */ - public static final class DatanodeFinalizationCounts { - private final int numFinalizedDatanodes; - private final int totalHealthyDatanodes; - private final int numUnfinalizedDatanodes; - - public DatanodeFinalizationCounts(int numFinalizedDatanodes, - int totalHealthyDatanodes, - int numUnfinalizedDatanodes) { - this.numFinalizedDatanodes = numFinalizedDatanodes; - this.totalHealthyDatanodes = totalHealthyDatanodes; - this.numUnfinalizedDatanodes = numUnfinalizedDatanodes; - } - - public int getNumFinalizedDatanodes() { - return numFinalizedDatanodes; - } - - public int getTotalHealthyDatanodes() { - return totalHealthyDatanodes; - } - - public int getNumUnfinalizedDatanodes() { - return numUnfinalizedDatanodes; - } - } } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java index 7df42c3123f0..51c024599a07 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java @@ -97,7 +97,6 @@ import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher.NodeReportFromDatanode; import org.apache.hadoop.hdds.scm.server.SCMStorageConfig; import org.apache.hadoop.hdds.scm.server.StorageContainerManager; -import org.apache.hadoop.hdds.scm.server.upgrade.SCMUpgradeFinalizer; import org.apache.hadoop.hdds.server.events.EventPublisher; import org.apache.hadoop.hdds.server.events.EventQueue; import org.apache.hadoop.hdds.upgrade.HDDSLayoutVersionManager; @@ -744,7 +743,7 @@ public void testDatanodeFinalizedCounterTracksLayoutVersionReports() try (SCMNodeManager nodeManager = createNodeManager(getConf())) { DatanodeDetails node = HddsTestUtils.createRandomDatanodeAndRegister(nodeManager); - assertEquals(1, SCMUpgradeFinalizer.getNumFinalizedDatanodes(nodeManager) + assertEquals(1, nodeManager.getDatanodeFinalizationCounts() .getNumFinalizedDatanodes(), "Initial datanode should be counted as finalized"); @@ -757,7 +756,7 @@ public void testDatanodeFinalizedCounterTracksLayoutVersionReports() .setMetadataLayoutVersion(metadataVersion - 1) .setSoftwareLayoutVersion(softwareVersion) .build()); - assertEquals(0, SCMUpgradeFinalizer.getNumFinalizedDatanodes(nodeManager) + assertEquals(0, nodeManager.getDatanodeFinalizationCounts() .getNumFinalizedDatanodes(), "Lower metadata layout version should decrement finalized count"); @@ -766,7 +765,7 @@ public void testDatanodeFinalizedCounterTracksLayoutVersionReports() .setMetadataLayoutVersion(metadataVersion) .setSoftwareLayoutVersion(softwareVersion) .build()); - assertEquals(1, SCMUpgradeFinalizer.getNumFinalizedDatanodes(nodeManager) + assertEquals(1, nodeManager.getDatanodeFinalizationCounts() .getNumFinalizedDatanodes(), "Restored metadata layout version should restore finalized count"); } @@ -778,31 +777,106 @@ public void testDatanodeFinalizedCounterTracksRegistrationAndRemoveNode() try (SCMNodeManager nodeManager = createNodeManager(getConf())) { DatanodeDetails finalizedNode = registerWithCapacity(nodeManager, CORRECT_LAYOUT_PROTO, success); - assertEquals(1, SCMUpgradeFinalizer.getNumFinalizedDatanodes(nodeManager) + assertEquals(1, nodeManager.getDatanodeFinalizationCounts() .getNumFinalizedDatanodes(), "Finalized registration should increment finalized count"); DatanodeDetails nonFinalizedNode = registerWithCapacity(nodeManager, SMALLER_MLV_LAYOUT_PROTO, success); - assertEquals(1, SCMUpgradeFinalizer.getNumFinalizedDatanodes(nodeManager) + assertEquals(1, nodeManager.getDatanodeFinalizationCounts() .getNumFinalizedDatanodes(), "Non-finalized registration should not increment finalized count"); nonFinalizedNode.setPersistedOpState( HddsProtos.NodeOperationalState.DECOMMISSIONED); nodeManager.removeNode(nonFinalizedNode); - assertEquals(1, SCMUpgradeFinalizer.getNumFinalizedDatanodes(nodeManager) + assertEquals(1, nodeManager.getDatanodeFinalizationCounts() .getNumFinalizedDatanodes(), "Removing a non-finalized node should not change finalized count"); finalizedNode.setPersistedOpState( HddsProtos.NodeOperationalState.DECOMMISSIONED); nodeManager.removeNode(finalizedNode); - assertEquals(0, SCMUpgradeFinalizer.getNumFinalizedDatanodes(nodeManager).getNumFinalizedDatanodes(), + assertEquals(0, nodeManager.getDatanodeFinalizationCounts().getNumFinalizedDatanodes(), "Removing a finalized node should decrement finalized count"); } } + private static Stream ineligibleHealthStates() { + return Stream.of( + Arguments.of(NodeStatus.inServiceStale()), + Arguments.of(NodeStatus.inServiceDead()) + ); + } + + @ParameterizedTest + @MethodSource("ineligibleHealthStates") + public void testDatanodeFinalizedCounterExcludesNonHealthyNodes(NodeStatus expectedStatus) + throws IOException, AuthenticationException, NodeNotFoundException, InterruptedException { + OzoneConfiguration conf = getConf(); + conf.setTimeDuration(OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL, 100, MILLISECONDS); + conf.setTimeDuration(HDDS_HEARTBEAT_INTERVAL, 1, SECONDS); + conf.setTimeDuration(OZONE_SCM_STALENODE_INTERVAL, 3, SECONDS); + conf.setTimeDuration(OZONE_SCM_DEADNODE_INTERVAL, 6, SECONDS); + + try (SCMNodeManager nodeManager = createNodeManager(conf)) { + // transitionNode stops heartbeating and will become STALE or DEAD + DatanodeDetails transitionNode = + registerWithCapacity(nodeManager, CORRECT_LAYOUT_PROTO, success); + // heartbeatingNode keeps heartbeating as a healthy baseline + DatanodeDetails heartbeatingNode = + registerWithCapacity(nodeManager, CORRECT_LAYOUT_PROTO, success); + + nodeManager.processHeartbeat(transitionNode); + nodeManager.processHeartbeat(heartbeatingNode); + + assertEquals(2, nodeManager.getDatanodeFinalizationCounts().getTotalHealthyDatanodes(), + "Both nodes should start as healthy"); + + // Only heartbeat the baseline node until transitionNode reaches the expected state. + // STALE requires > 3s (wait 4s), DEAD requires > 6s (wait 7s total). + boolean waitForDead = expectedStatus.equals(NodeStatus.inServiceDead()); + Thread.sleep(2000); + nodeManager.processHeartbeat(heartbeatingNode); + Thread.sleep(2000); + if (waitForDead) { + nodeManager.processHeartbeat(heartbeatingNode); + Thread.sleep(3000); + } + + assertEquals(expectedStatus, nodeManager.getNodeStatus(transitionNode), + "Node should have transitioned to " + expectedStatus); + + assertEquals(1, nodeManager.getDatanodeFinalizationCounts().getTotalHealthyDatanodes(), + expectedStatus + " node should be excluded from total count"); + assertEquals(1, nodeManager.getDatanodeFinalizationCounts().getNumFinalizedDatanodes(), + expectedStatus + " node should be excluded from finalized count"); + } + } + + private static Stream allOperationalStates() { + return Stream.of(HddsProtos.NodeOperationalState.values()) + .map(Arguments::of); + } + + @ParameterizedTest + @MethodSource("allOperationalStates") + public void testDatanodeFinalizedCounterIncludesAllHealthyOpStates( + HddsProtos.NodeOperationalState opState) + throws IOException, AuthenticationException, NodeNotFoundException { + try (SCMNodeManager nodeManager = createNodeManager(getConf())) { + DatanodeDetails node = + registerWithCapacity(nodeManager, CORRECT_LAYOUT_PROTO, success); + nodeManager.setNodeOperationalState(node, opState); + + // All HEALTHY nodes should be counted regardless of operational state + assertEquals(1, nodeManager.getDatanodeFinalizationCounts().getTotalHealthyDatanodes(), + "HEALTHY node with op state " + opState + " should be counted in total"); + assertEquals(1, nodeManager.getDatanodeFinalizationCounts().getNumFinalizedDatanodes(), + "HEALTHY finalized node with op state " + opState + " should be counted as finalized"); + } + } + // Currently invoked by testProcessLayoutVersion. public void testProcessLayoutVersionReportHigherMlv() throws IOException { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMClientProtocolServer.java index f9cab1b1ae5e..412dc635116a 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMClientProtocolServer.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMClientProtocolServer.java @@ -19,7 +19,6 @@ import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_READONLY_ADMINISTRATORS; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.mock; @@ -46,7 +45,6 @@ import org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocolServerSideTranslatorPB; import org.apache.hadoop.hdds.utils.ProtocolMessageMetrics; import org.apache.hadoop.ozone.container.common.SCMTestUtils; -import org.apache.hadoop.ozone.upgrade.UpgradeFinalization; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.UserGroupInformation; import org.junit.jupiter.api.AfterEach; @@ -140,80 +138,6 @@ public void testScmListContainer() throws Exception { HddsProtos.ReplicationFactor.THREE).getContainerInfoList().size()); } - @Test - public void testBuildUpgradeStatusMapsFinalizationRequired() { - HddsProtos.UpgradeStatus status = SCMClientProtocolServer.buildUpgradeStatus( - UpgradeFinalization.Status.FINALIZATION_REQUIRED, 1, 2); - - assertFalse(status.getScmFinalized()); - assertFalse(status.getShouldFinalize()); - assertEquals(1, status.getNumDatanodesFinalized()); - assertEquals(2, status.getNumDatanodesTotal()); - } - - @Test - public void testBuildUpgradeStatusMapsFinalizationRequiredAllNodesFinalized() { - HddsProtos.UpgradeStatus status = SCMClientProtocolServer.buildUpgradeStatus( - UpgradeFinalization.Status.FINALIZATION_REQUIRED, 3, 3); - - assertFalse(status.getScmFinalized()); - assertEquals(SCMClientProtocolServer.shouldFinalize( - UpgradeFinalization.Status.FINALIZATION_REQUIRED, 3, 3), - status.getShouldFinalize()); - assertEquals(3, status.getNumDatanodesFinalized()); - assertEquals(3, status.getNumDatanodesTotal()); - } - - @Test - public void testBuildUpgradeStatusMapsFinalizationInProgress() { - HddsProtos.UpgradeStatus status = SCMClientProtocolServer.buildUpgradeStatus( - UpgradeFinalization.Status.FINALIZATION_IN_PROGRESS, 1, 2); - - assertFalse(status.getScmFinalized()); - assertFalse(status.getShouldFinalize()); - } - - @Test - public void testBuildUpgradeStatusMapsStartingFinalization() { - HddsProtos.UpgradeStatus status = SCMClientProtocolServer.buildUpgradeStatus( - UpgradeFinalization.Status.STARTING_FINALIZATION, 1, 2); - - assertFalse(status.getScmFinalized()); - assertFalse(status.getShouldFinalize()); - assertEquals(1, status.getNumDatanodesFinalized()); - assertEquals(2, status.getNumDatanodesTotal()); - } - - @Test - public void testBuildUpgradeStatusMapsCompletedStatesToFinalized() { - HddsProtos.UpgradeStatus doneStatus = - SCMClientProtocolServer.buildUpgradeStatus( - UpgradeFinalization.Status.FINALIZATION_DONE, 2, 2); - HddsProtos.UpgradeStatus finalizedStatus = - SCMClientProtocolServer.buildUpgradeStatus( - UpgradeFinalization.Status.ALREADY_FINALIZED, 2, 2); - - assertTrue(doneStatus.getScmFinalized()); - assertTrue(doneStatus.getShouldFinalize()); - assertTrue(finalizedStatus.getScmFinalized()); - assertTrue(finalizedStatus.getShouldFinalize()); - } - - @Test - public void testBuildUpgradeStatusFromVersionManagerState() { - HddsProtos.UpgradeStatus needsFinalization = - SCMClientProtocolServer.buildUpgradeStatus(UpgradeFinalization.Status.FINALIZATION_REQUIRED, 1, 3); - assertFalse(needsFinalization.getScmFinalized()); - assertFalse(needsFinalization.getShouldFinalize()); - assertEquals(1, needsFinalization.getNumDatanodesFinalized()); - assertEquals(3, needsFinalization.getNumDatanodesTotal()); - - HddsProtos.UpgradeStatus alreadyFinalized = - SCMClientProtocolServer.buildUpgradeStatus(UpgradeFinalization.Status.ALREADY_FINALIZED, 3, 3); - assertTrue(alreadyFinalized.getScmFinalized()); - assertTrue(alreadyFinalized.getShouldFinalize()); - } - private StorageContainerManager mockStorageContainerManager() { List infos = new ArrayList<>(); for (int i = 0; i < 10; i++) { @@ -231,6 +155,18 @@ private StorageContainerManager mockStorageContainerManager() { return storageContainerManager; } + @Test + public void testQueryUpgradeStatus() throws Exception { + HddsProtos.UpgradeStatus status = server.queryUpgradeStatus(); + + // SCM starts already finalized in tests + assertTrue(status.getScmFinalized()); + // No datanodes registered + assertEquals(0, status.getNumDatanodesFinalized()); + assertEquals(0, status.getNumDatanodesTotal()); + assertTrue(status.getShouldFinalize()); + } + private ContainerInfo newContainerInfoForTest() { return new ContainerInfo.Builder() .setContainerID(1) From 1daadfc8b1fd680e64fac352379ae6e5c49bbf70 Mon Sep 17 00:00:00 2001 From: Zita Dombi Date: Fri, 24 Apr 2026 14:38:24 +0200 Subject: [PATCH 10/10] Remove unused imports --- .../hadoop/hdds/scm/server/upgrade/SCMUpgradeFinalizer.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/SCMUpgradeFinalizer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/SCMUpgradeFinalizer.java index 9ce563a2c975..7f11adb606b8 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/SCMUpgradeFinalizer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/SCMUpgradeFinalizer.java @@ -18,12 +18,8 @@ package org.apache.hadoop.hdds.scm.server.upgrade; import java.io.IOException; -import org.apache.hadoop.hdds.protocol.DatanodeDetails; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.LayoutVersionProto; import org.apache.hadoop.hdds.scm.exceptions.SCMException; -import org.apache.hadoop.hdds.scm.node.DatanodeInfo; import org.apache.hadoop.hdds.scm.node.NodeManager; -import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException; import org.apache.hadoop.hdds.upgrade.HDDSLayoutFeature; import org.apache.hadoop.hdds.upgrade.HDDSLayoutVersionManager; import org.apache.hadoop.ozone.upgrade.BasicUpgradeFinalizer;