Skip to content

Commit 203b856

Browse files
committed
Rebase and fix review comments
1 parent f12aba7 commit 203b856

16 files changed

Lines changed: 92 additions & 197 deletions

File tree

hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfig.java

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -138,31 +138,10 @@ public class ScmConfig extends ReconfigurableConfig {
138138
)
139139
private int transactionToDNsCommitMapLimit = 5000000;
140140

141-
@Config(key = "hdds.scm.container.pending.allocation.roll.interval",
142-
defaultValue = "5m",
143-
type = ConfigType.TIME,
144-
tags = { ConfigTag.SCM, ConfigTag.CONTAINER },
145-
description =
146-
"Time interval for rolling the pending container allocation window. " +
147-
"Pending container allocations are tracked in a two-window tumbling bucket " +
148-
"pattern. Each window has this duration. " +
149-
"After 2x this interval, allocations that haven't been confirmed via " +
150-
"container reports will automatically age out. Default is 5 minutes."
151-
)
152-
private Duration pendingContainerAllocationRollInterval = Duration.ofMinutes(5);
153-
154141
public int getTransactionToDNsCommitMapLimit() {
155142
return transactionToDNsCommitMapLimit;
156143
}
157144

158-
public Duration getPendingContainerAllocationRollInterval() {
159-
return pendingContainerAllocationRollInterval;
160-
}
161-
162-
public void setPendingContainerAllocationRollInterval(Duration duration) {
163-
this.pendingContainerAllocationRollInterval = duration;
164-
}
165-
166145
public Duration getBlockDeletionInterval() {
167146
return blockDeletionInterval;
168147
}

hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,11 @@ public final class ScmConfigKeys {
269269
public static final String OZONE_SCM_STALENODE_INTERVAL_DEFAULT =
270270
"5m";
271271

272+
public static final String OZONE_SCM_PENDING_CONTAINER_ROLL_INTERVAL =
273+
"ozone.scm.pending.container.roll.interval";
274+
public static final String OZONE_SCM_PENDING_CONTAINER_ROLL_INTERVAL_DEFAULT =
275+
"5m";
276+
272277
public static final String OZONE_SCM_HEARTBEAT_RPC_TIMEOUT =
273278
"ozone.scm.heartbeat.rpc-timeout";
274279
public static final String OZONE_SCM_HEARTBEAT_RPC_TIMEOUT_DEFAULT =

hadoop-hdds/common/src/main/resources/ozone-default.xml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1037,6 +1037,17 @@
10371037
balances the amount of metadata.
10381038
</description>
10391039
</property>
1040+
<property>
1041+
<name>ozone.scm.pending.container.roll.interval</name>
1042+
<value>5m</value>
1043+
<tag>OZONE, SCM, PERFORMANCE, MANAGEMENT</tag>
1044+
<description>
1045+
The interval at which the two-window tumbling bucket for pending
1046+
container allocations rolls over per DataNode. Pending containers
1047+
that have not been confirmed within two intervals are automatically
1048+
aged out. Default is 5 minutes.
1049+
</description>
1050+
</property>
10401051
<property>
10411052
<name>ozone.scm.container.lock.stripes</name>
10421053
<value>512</value>

hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -136,15 +136,15 @@ public List<ContainerInfo> getContainers(ReplicationType type) {
136136

137137
@Override
138138
public List<ContainerID> getContainerIDs(final ContainerID startID,
139-
final int count,
140-
final LifeCycleState state) {
139+
final int count,
140+
final LifeCycleState state) {
141141
scmContainerManagerMetrics.incNumListContainersOps();
142142
return containerStateManager.getContainerIDs(state, startID, count);
143143
}
144144

145145
@Override
146146
public List<ContainerInfo> getContainers(final ContainerID startID,
147-
final int count) {
147+
final int count) {
148148
scmContainerManagerMetrics.incNumListContainersOps();
149149
return containerStateManager.getContainerInfos(startID, count);
150150
}
@@ -157,8 +157,8 @@ public List<ContainerInfo> getContainers(final LifeCycleState state) {
157157

158158
@Override
159159
public List<ContainerInfo> getContainers(final ContainerID startID,
160-
final int count,
161-
final LifeCycleState state) {
160+
final int count,
161+
final LifeCycleState state) {
162162
scmContainerManagerMetrics.incNumListContainersOps();
163163
return containerStateManager.getContainerInfos(state, startID, count);
164164
}
@@ -271,14 +271,12 @@ private ContainerInfo allocateContainer(final Pipeline pipeline,
271271
containerStateManager.addContainer(containerInfoBuilder.build());
272272
pipelineManager.recordPendingAllocation(pipeline, containerID);
273273
scmContainerManagerMetrics.incNumSuccessfulCreateContainers();
274-
pipelineManager.recordPendingAllocation(pipeline, containerID);
275-
276274
return containerStateManager.getContainer(containerID);
277275
}
278276

279277
@Override
280278
public void updateContainerState(final ContainerID cid,
281-
final LifeCycleEvent event)
279+
final LifeCycleEvent event)
282280
throws IOException, InvalidStateTransitionException {
283281
HddsProtos.ContainerID protoId = cid.getProtobuf();
284282
lock.lock();
@@ -314,7 +312,7 @@ public void updateContainerInfo(final ContainerID cid, ContainerInfoProto contai
314312

315313
@Override
316314
public void transitionDeletingOrDeletedToTargetState(ContainerID containerID, LifeCycleState targetState)
317-
throws IOException {
315+
throws IOException {
318316
HddsProtos.ContainerID proto = containerID.getProtobuf();
319317
lock.lock();
320318
try {
@@ -340,7 +338,7 @@ public Set<ContainerReplica> getContainerReplicas(final ContainerID id)
340338

341339
@Override
342340
public void updateContainerReplica(final ContainerID cid,
343-
final ContainerReplica replica)
341+
final ContainerReplica replica)
344342
throws ContainerNotFoundException {
345343
if (containerExist(cid)) {
346344
containerStateManager.updateContainerReplica(replica);
@@ -351,7 +349,7 @@ public void updateContainerReplica(final ContainerID cid,
351349

352350
@Override
353351
public void removeContainerReplica(final ContainerID cid,
354-
final ContainerReplica replica)
352+
final ContainerReplica replica)
355353
throws ContainerNotFoundException, ContainerReplicaNotFoundException {
356354
if (containerExist(cid)) {
357355
containerStateManager.removeContainerReplica(replica);
@@ -422,7 +420,7 @@ private NavigableSet<ContainerID> getContainersForOwner(
422420

423421
@Override
424422
public void notifyContainerReportProcessing(final boolean isFullReport,
425-
final boolean success) {
423+
final boolean success) {
426424
if (isFullReport) {
427425
if (success) {
428426
scmContainerManagerMetrics.incNumContainerReportsProcessedSuccessful();

hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReportHandler.java

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -176,17 +176,11 @@ public void onMessage(final ContainerReportFromDatanode reportFromDatanode,
176176
if (!alreadyInDn) {
177177
// This is a new Container not in the nodeManager -> dn map yet
178178
getNodeManager().addContainer(datanodeDetails, cid);
179-
180179
// Remove from pending tracker when container is added to DN
181180
// This container was just confirmed for the first time on this DN
182-
// No need to remove on subsequent reports (it's already been removed)
183-
if (container != null) {
184-
PendingContainerTracker tracker =
185-
getNodeManager().getPendingContainerTracker();
186-
if (tracker != null) {
187-
tracker.removePendingAllocation(datanodeDetails, cid);
188-
}
189-
}
181+
PendingContainerTracker tracker = getNodeManager().getPendingContainerTracker();
182+
tracker.removePendingAllocation(getNodeManager().getDatanodeInfo(datanodeDetails).
183+
getPendingContainerAllocations(), cid);
190184
}
191185
if (container == null || ContainerReportValidator
192186
.validate(container, datanodeDetails, replica)) {

hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/IncrementalContainerReportHandler.java

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -104,12 +104,8 @@ protected void processICR(IncrementalContainerReportFromDatanode report,
104104
}
105105
if (ContainerReportValidator.validate(container, dd, replicaProto)) {
106106
processContainerReplica(dd, container, replicaProto, publisher, detailsForLogging);
107-
108-
PendingContainerTracker tracker =
109-
getNodeManager().getPendingContainerTracker();
110-
if (tracker != null) {
111-
tracker.removePendingAllocation(dd, id);
112-
}
107+
PendingContainerTracker tracker = getNodeManager().getPendingContainerTracker();
108+
tracker.removePendingAllocation(getNodeManager().getDatanodeInfo(dd).getPendingContainerAllocations(), id);
113109
}
114110
success = true;
115111
} catch (ContainerNotFoundException e) {

hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -440,23 +440,4 @@ default void removeNode(DatanodeDetails datanodeDetails) throws NodeNotFoundExce
440440
* SCM-side tracker for container allocations not yet reported by datanodes.
441441
*/
442442
PendingContainerTracker getPendingContainerTracker();
443-
444-
/**
445-
* True if the node can accept another container of the given size, accounting for
446-
* {@link #getPendingContainerTracker()}.
447-
*/
448-
boolean hasSpaceForNewContainerAllocation(DatanodeDetails node);
449-
450-
/**
451-
* Records a pending container allocation for {@code node} so that subsequent
452-
* space checks via {@link #hasSpaceForNewContainerAllocation} account for the
453-
* in-flight allocation before the datanode sends an ICR.
454-
*
455-
* @param node the datanode that will receive the new container replica
456-
* @param containerID the container being allocated
457-
*/
458-
default void recordPendingAllocationForDatanode(DatanodeDetails node,
459-
ContainerID containerID) {
460-
getPendingContainerTracker().recordPendingAllocationForDatanode(node, containerID);
461-
}
462443
}

hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState;
4848
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState;
4949
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.LayoutVersionProto;
50+
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
5051
import org.apache.hadoop.hdds.scm.container.ContainerID;
5152
import org.apache.hadoop.hdds.scm.events.SCMEvents;
5253
import org.apache.hadoop.hdds.scm.ha.SCMContext;
@@ -124,7 +125,7 @@ public class NodeStateManager implements Runnable, Closeable {
124125
*/
125126
private final long deadNodeIntervalMs;
126127

127-
private final long containerRollIntervalMs = 5 * 60 * 1000; //TODO
128+
private final long containerRollIntervalMs; //TODO
128129

129130
/**
130131
* The future is used to pause/unpause the scheduled checks.
@@ -214,6 +215,11 @@ public NodeStateManager(ConfigurationSource conf,
214215
scmContext.getFinalizationCheckpoint()) &&
215216
!layoutMatchCondition.test(layout);
216217

218+
containerRollIntervalMs = conf.getTimeDuration(
219+
ScmConfigKeys.OZONE_SCM_PENDING_CONTAINER_ROLL_INTERVAL,
220+
ScmConfigKeys.OZONE_SCM_PENDING_CONTAINER_ROLL_INTERVAL_DEFAULT,
221+
TimeUnit.MILLISECONDS);
222+
217223
scheduleNextHealthCheck();
218224
}
219225

hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/PendingContainerTracker.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
package org.apache.hadoop.hdds.scm.node;
1919

20+
import com.google.common.annotations.VisibleForTesting;
2021
import java.util.HashSet;
2122
import java.util.List;
2223
import java.util.Objects;
@@ -222,4 +223,9 @@ public void removePendingAllocation(TwoWindowBucket bucket, ContainerID containe
222223
metrics.incNumPendingContainersRemoved();
223224
}
224225
}
226+
227+
@VisibleForTesting
228+
public SCMNodeMetrics getMetrics() {
229+
return metrics;
230+
}
225231
}

hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java

Lines changed: 4 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,6 @@
6969
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMRegisteredResponseProto.ErrorCode;
7070
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMVersionRequestProto;
7171
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.StorageReportProto;
72-
import org.apache.hadoop.hdds.scm.ScmConfig;
7372
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
7473
import org.apache.hadoop.hdds.scm.VersionInfo;
7574
import org.apache.hadoop.hdds.scm.container.ContainerID;
@@ -145,13 +144,6 @@ public class SCMNodeManager implements NodeManager {
145144
private final NonWritableNodeFilter nonWritableNodeFilter;
146145
private final int numContainerPerVolume;
147146

148-
/**
149-
* SCM-side pending container allocations per datanode (not yet in container reports).
150-
*/
151-
private final PendingContainerTracker pendingContainerTracker;
152-
153-
private final long maxContainerSizeBytes;
154-
155147
/**
156148
* Lock used to synchronize some operation in Node manager to ensure a
157149
* consistent view of the node state.
@@ -200,7 +192,10 @@ public SCMNodeManager(
200192
this.pendingContainerTracker = new PendingContainerTracker(
201193
(long) conf.getStorageSize(ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE,
202194
ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT, StorageUnit.BYTES),
203-
5 * 60 * 1000, // TODO
195+
conf.getTimeDuration(
196+
ScmConfigKeys.OZONE_SCM_PENDING_CONTAINER_ROLL_INTERVAL,
197+
ScmConfigKeys.OZONE_SCM_PENDING_CONTAINER_ROLL_INTERVAL_DEFAULT,
198+
TimeUnit.MILLISECONDS),
204199
this.metrics);
205200
this.clusterMap = networkTopology;
206201
this.nodeResolver = nodeResolver;
@@ -219,14 +214,6 @@ public SCMNodeManager(
219214
this.scmContext = scmContext;
220215
this.sendCommandNotifyMap = new HashMap<>();
221216
this.nonWritableNodeFilter = new NonWritableNodeFilter(conf);
222-
223-
this.maxContainerSizeBytes = (long) conf.getStorageSize(
224-
ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE,
225-
ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT, StorageUnit.BYTES);
226-
ScmConfig scmConfig = conf.getObject(ScmConfig.class);
227-
long rollIntervalMs = scmConfig.getPendingContainerAllocationRollInterval().toMillis();
228-
this.pendingContainerTracker = new PendingContainerTracker(
229-
maxContainerSizeBytes, rollIntervalMs, this.metrics);
230217
}
231218

232219
@Override
@@ -252,35 +239,6 @@ public PendingContainerTracker getPendingContainerTracker() {
252239
return pendingContainerTracker;
253240
}
254241

255-
/**
256-
* Effective space check aligned with container allocation: per-disk slot model minus
257-
* SCM pending allocations.
258-
*/
259-
@Override
260-
public boolean hasSpaceForNewContainerAllocation(DatanodeDetails node) {
261-
Objects.requireNonNull(node, "node==null");
262-
try {
263-
DatanodeInfo datanodeInfo = getDatanodeInfo(node);
264-
if (datanodeInfo == null) {
265-
LOG.warn("DatanodeInfo not found for node {}", node.getUuidString());
266-
return false;
267-
}
268-
return pendingContainerTracker.hasEffectiveAllocatableSpaceForNewContainer(
269-
node, datanodeInfo);
270-
} catch (Exception e) {
271-
LOG.warn("Error checking allocatable space for node {}", node.getUuidString(), e);
272-
return false;
273-
}
274-
}
275-
276-
@Override
277-
public void recordPendingAllocationForDatanode(DatanodeDetails node,
278-
ContainerID containerID) {
279-
Objects.requireNonNull(node, "node==null");
280-
Objects.requireNonNull(containerID, "containerID==null");
281-
pendingContainerTracker.recordPendingAllocationForDatanode(node, containerID);
282-
}
283-
284242
protected NodeStateManager getNodeStateManager() {
285243
return nodeStateManager;
286244
}
@@ -762,7 +720,6 @@ public void processNodeReport(DatanodeDetails datanodeDetails,
762720
datanodeInfo.updateStorageReports(nodeReport.getStorageReportList());
763721
datanodeInfo.updateMetaDataStorageReports(nodeReport.
764722
getMetadataStorageReportList());
765-
pendingContainerTracker.rollWindowsIfNeeded(datanodeDetails);
766723
metrics.incNumNodeReportProcessed();
767724
}
768725
} catch (NodeNotFoundException e) {

0 commit comments

Comments
 (0)