Skip to content

Commit cfcfc39

Browse files
authored
HDDS-14188. NodesOutOfSpace renamed to NonWritableNodes, include DNs not accepting writes (#9518)
1 parent 6576773 commit cfcfc39

3 files changed

Lines changed: 60 additions & 41 deletions

File tree

hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java

Lines changed: 53 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,11 @@
4848
import java.util.concurrent.locks.ReentrantReadWriteLock;
4949
import java.util.function.BiConsumer;
5050
import java.util.function.Function;
51+
import java.util.function.Predicate;
5152
import java.util.stream.Collectors;
5253
import javax.management.ObjectName;
5354
import org.apache.hadoop.hdds.HddsConfigKeys;
55+
import org.apache.hadoop.hdds.conf.ConfigurationSource;
5456
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
5557
import org.apache.hadoop.hdds.conf.StorageUnit;
5658
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
@@ -1279,8 +1281,8 @@ public Map<String, String> getNodeStatistics() {
12791281
nodeStateStatistics(nodeStatistics);
12801282
// Statistics node space
12811283
nodeSpaceStatistics(nodeStatistics);
1282-
// Statistics node readOnly
1283-
nodeOutOfSpaceStatistics(nodeStatistics);
1284+
// Statistics node non-writable
1285+
nodeNonWritableStatistics(nodeStatistics);
12841286
// todo: Statistics of other instances
12851287
return nodeStatistics;
12861288
}
@@ -1368,43 +1370,59 @@ private void nodeSpaceStatistics(Map<String, String> nodeStatics) {
13681370
nodeStatics.put(SpaceStatistics.NON_SCM_USED.getLabel(), nonScmUsed);
13691371
}
13701372

1371-
private void nodeOutOfSpaceStatistics(Map<String, String> nodeStatics) {
1372-
List<DatanodeInfo> allNodes = getAllNodes();
1373-
long blockSize = (long) conf.getStorageSize(
1374-
OzoneConfigKeys.OZONE_SCM_BLOCK_SIZE,
1375-
OzoneConfigKeys.OZONE_SCM_BLOCK_SIZE_DEFAULT,
1376-
StorageUnit.BYTES);
1377-
long minRatisVolumeSizeBytes = (long) conf.getStorageSize(
1378-
ScmConfigKeys.OZONE_DATANODE_RATIS_VOLUME_FREE_SPACE_MIN,
1379-
ScmConfigKeys.OZONE_DATANODE_RATIS_VOLUME_FREE_SPACE_MIN_DEFAULT,
1380-
StorageUnit.BYTES);
1381-
long containerSize = (long) conf.getStorageSize(
1382-
ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE,
1383-
ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT,
1384-
StorageUnit.BYTES);
1385-
1386-
int nodeOutOfSpaceCount = (int) allNodes.parallelStream()
1387-
.filter(dn -> !hasEnoughSpace(dn, minRatisVolumeSizeBytes, containerSize, conf)
1388-
&& !hasEnoughCommittedVolumeSpace(dn, blockSize))
1373+
private void nodeNonWritableStatistics(Map<String, String> nodeStatics) {
1374+
int nonWritableNodesCount = (int) getAllNodes().parallelStream()
1375+
.filter(new NonWritableNodeFilter(conf))
13891376
.count();
13901377

1391-
nodeStatics.put("NodesOutOfSpace", String.valueOf(nodeOutOfSpaceCount));
1392-
}
1393-
1394-
/**
1395-
* Check if any volume in the datanode has committed space >= blockSize.
1396-
*
1397-
* @return true if any volume has committed space >= blockSize, false otherwise
1398-
*/
1399-
private boolean hasEnoughCommittedVolumeSpace(DatanodeInfo dnInfo, long blockSize) {
1400-
for (StorageReportProto reportProto : dnInfo.getStorageReports()) {
1401-
if (reportProto.getCommitted() >= blockSize) {
1402-
return true;
1378+
nodeStatics.put("NonWritableNodes", String.valueOf(nonWritableNodesCount));
1379+
}
1380+
1381+
static class NonWritableNodeFilter implements Predicate<DatanodeInfo> {
1382+
1383+
private final long blockSize;
1384+
private final long minRatisVolumeSizeBytes;
1385+
private final long containerSize;
1386+
private final ConfigurationSource conf;
1387+
1388+
NonWritableNodeFilter(ConfigurationSource conf) {
1389+
blockSize = (long) conf.getStorageSize(
1390+
OzoneConfigKeys.OZONE_SCM_BLOCK_SIZE,
1391+
OzoneConfigKeys.OZONE_SCM_BLOCK_SIZE_DEFAULT,
1392+
StorageUnit.BYTES);
1393+
minRatisVolumeSizeBytes = (long) conf.getStorageSize(
1394+
ScmConfigKeys.OZONE_DATANODE_RATIS_VOLUME_FREE_SPACE_MIN,
1395+
ScmConfigKeys.OZONE_DATANODE_RATIS_VOLUME_FREE_SPACE_MIN_DEFAULT,
1396+
StorageUnit.BYTES);
1397+
containerSize = (long) conf.getStorageSize(
1398+
ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE,
1399+
ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT,
1400+
StorageUnit.BYTES);
1401+
this.conf = conf;
1402+
}
1403+
1404+
@Override
1405+
public boolean test(DatanodeInfo dn) {
1406+
return !dn.getNodeStatus().isNodeWritable()
1407+
|| (!hasEnoughSpace(dn, minRatisVolumeSizeBytes, containerSize, conf)
1408+
&& !hasEnoughCommittedVolumeSpace(dn));
1409+
}
1410+
1411+
/**
1412+
* Check if any volume in the datanode has committed space >= blockSize.
1413+
*
1414+
* @return true if any volume has committed space >= blockSize, false otherwise
1415+
*/
1416+
private boolean hasEnoughCommittedVolumeSpace(DatanodeInfo dnInfo) {
1417+
for (StorageReportProto reportProto : dnInfo.getStorageReports()) {
1418+
if (reportProto.getCommitted() >= blockSize) {
1419+
return true;
1420+
}
14031421
}
1422+
LOG.debug("Datanode {} has no volumes with committed space >= {} bytes",
1423+
dnInfo.getID(), blockSize);
1424+
return false;
14041425
}
1405-
LOG.debug("Datanode {} has no volumes with committed space >= {} bytes",
1406-
dnInfo.getID(), blockSize);
1407-
return false;
14081426
}
14091427

14101428
/**

hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeMetrics.java

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -157,12 +157,13 @@ public void getMetrics(MetricsCollector collector, boolean all) {
157157
metrics.addGauge(
158158
Interns.info("AllNodes", "Number of datanodes"), totalNodeCount);
159159

160-
String nodesOutOfSpace = nodeStatistics.get("NodesOutOfSpace");
161-
if (nodesOutOfSpace != null) {
160+
String nonWritableNodes = nodeStatistics.get("NonWritableNodes");
161+
if (nonWritableNodes != null) {
162162
metrics.addGauge(
163-
Interns.info("NodesOutOfSpace", "Number of datanodes that are out of space because " +
164-
"they cannot allocate new containers or write to existing ones."),
165-
Integer.parseInt(nodesOutOfSpace));
163+
Interns.info("NonWritableNodes", "Number of datanodes that cannot accept new writes because " +
164+
"they are either not in IN_SERVICE and HEALTHY state, cannot allocate new containers or " +
165+
"cannot write to existing containers."),
166+
Integer.parseInt(nonWritableNodes));
166167
}
167168

168169
for (Map.Entry<String, Long> e : nodeInfo.entrySet()) {

hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeMetrics.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ public void testNodeCountAndInfoMetricsReported() throws Exception {
227227
assertGauge("AllNodes", 1,
228228
getMetrics(SCMNodeMetrics.class.getSimpleName()));
229229
// The DN has no metadata volumes, so hasEnoughSpace() returns false indicating the DN is out of space.
230-
assertGauge("NodesOutOfSpace", 1,
230+
assertGauge("NonWritableNodes", 1,
231231
getMetrics(SCMNodeMetrics.class.getSimpleName()));
232232
assertGauge("TotalCapacity", 100L,
233233
getMetrics(SCMNodeMetrics.class.getSimpleName()));

0 commit comments

Comments
 (0)