Skip to content

Commit e215b48

Browse files
HDDS-10767. Reducing DatanodeDetails in the ContainerLocationCache (#10158)
Co-authored-by: guohao1 <guohao1@360.cn>
1 parent 99b53f4 commit e215b48

5 files changed

Lines changed: 118 additions & 11 deletions

File tree

hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -758,7 +758,7 @@ public Builder setDatanodeDetails(DatanodeDetails details) {
758758
this.id = details.id;
759759
this.ipAddress = details.getIpAddressAsByteString();
760760
this.hostName = details.getHostNameAsByteString();
761-
this.networkName = details.getHostNameAsByteString();
761+
this.networkName = details.getNetworkNameAsByteString();
762762
this.networkLocation = details.getNetworkLocationAsByteString();
763763
this.level = details.getLevel();
764764
this.ports = details.getPorts();

hadoop-hdds/common/src/main/resources/ozone-default.xml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4385,6 +4385,16 @@
43854385
</description>
43864386
</property>
43874387

4388+
<property>
4389+
<name>ozone.om.container.location.datanode.cache.size</name>
4390+
<value>10000</value>
4391+
<tag>OZONE, OM</tag>
4392+
<description>
4393+
The size of the datanode details cache used to reduce duplicate datanode objects
4394+
retained by the container location cache in Ozone Manager.
4395+
</description>
4396+
</property>
4397+
43884398
<property>
43894399
<name>ozone.om.container.location.cache.ttl</name>
43904400
<value>360m</value>

hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -550,6 +550,11 @@ public final class OMConfigKeys {
550550
public static final int OZONE_OM_CONTAINER_LOCATION_CACHE_SIZE_DEFAULT
551551
= 100_000;
552552

553+
public static final String OZONE_OM_CONTAINER_LOCATION_DATANODE_CACHE_SIZE
554+
= "ozone.om.container.location.datanode.cache.size";
555+
public static final int
556+
OZONE_OM_CONTAINER_LOCATION_DATANODE_CACHE_SIZE_DEFAULT = 10_000;
557+
553558
public static final String OZONE_OM_CONTAINER_LOCATION_CACHE_TTL
554559
= "ozone.om.container.location.cache.ttl";
555560

hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ScmClient.java

Lines changed: 52 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,17 @@
2121
import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_CONTAINER_LOCATION_CACHE_SIZE_DEFAULT;
2222
import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_CONTAINER_LOCATION_CACHE_TTL;
2323
import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_CONTAINER_LOCATION_CACHE_TTL_DEFAULT;
24+
import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_CONTAINER_LOCATION_DATANODE_CACHE_SIZE;
25+
import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_CONTAINER_LOCATION_DATANODE_CACHE_SIZE_DEFAULT;
2426

27+
import com.google.common.cache.Cache;
2528
import com.google.common.cache.CacheBuilder;
2629
import com.google.common.cache.CacheLoader;
2730
import com.google.common.cache.CacheLoader.InvalidCacheLoadException;
2831
import com.google.common.cache.LoadingCache;
2932
import jakarta.annotation.Nonnull;
3033
import java.io.IOException;
34+
import java.util.ArrayList;
3135
import java.util.HashMap;
3236
import java.util.List;
3337
import java.util.Map;
@@ -37,7 +41,8 @@
3741
import org.apache.hadoop.hdds.client.ECReplicationConfig;
3842
import org.apache.hadoop.hdds.client.ReplicationConfig;
3943
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
40-
import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline;
44+
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
45+
import org.apache.hadoop.hdds.protocol.DatanodeID;
4146
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
4247
import org.apache.hadoop.hdds.scm.protocol.ScmBlockLocationProtocol;
4348
import org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocol;
@@ -52,21 +57,28 @@ public class ScmClient {
5257
private final StorageContainerLocationProtocol containerClient;
5358
private final LoadingCache<Long, Pipeline> containerLocationCache;
5459
private final CacheMetrics containerCacheMetrics;
60+
private final CacheMetrics datanodeDetailsCacheMetrics;
5561

5662
ScmClient(ScmBlockLocationProtocol blockClient,
5763
StorageContainerLocationProtocol containerClient,
5864
OzoneConfiguration configuration) {
5965
this.containerClient = containerClient;
6066
this.blockClient = blockClient;
67+
Cache<DatanodeID, DatanodeDetails> datanodeDetailsCache =
68+
createDatanodeDetailsCache(configuration);
6169
this.containerLocationCache =
62-
createContainerLocationCache(configuration, containerClient);
70+
createContainerLocationCache(configuration, containerClient,
71+
datanodeDetailsCache);
6372
this.containerCacheMetrics = CacheMetrics.create(containerLocationCache,
6473
"ContainerInfo");
74+
this.datanodeDetailsCacheMetrics = CacheMetrics.create(
75+
datanodeDetailsCache, "DatanodeDetails");
6576
}
6677

6778
static LoadingCache<Long, Pipeline> createContainerLocationCache(
6879
OzoneConfiguration configuration,
69-
StorageContainerLocationProtocol containerClient) {
80+
StorageContainerLocationProtocol containerClient,
81+
Cache<DatanodeID, DatanodeDetails> datanodeDetailsCache) {
7082
int maxSize = configuration.getInt(OZONE_OM_CONTAINER_LOCATION_CACHE_SIZE,
7183
OZONE_OM_CONTAINER_LOCATION_CACHE_SIZE_DEFAULT);
7284
TimeUnit unit = OZONE_OM_CONTAINER_LOCATION_CACHE_TTL_DEFAULT.getUnit();
@@ -81,7 +93,9 @@ static LoadingCache<Long, Pipeline> createContainerLocationCache(
8193
@Nonnull
8294
@Override
8395
public Pipeline load(@Nonnull Long key) throws Exception {
84-
return containerClient.getContainerWithPipeline(key).getPipeline();
96+
Pipeline pipeline =
97+
containerClient.getContainerWithPipeline(key).getPipeline();
98+
return newPipelineWithDNCache(pipeline, datanodeDetailsCache);
8599
}
86100

87101
@Nonnull
@@ -92,12 +106,44 @@ public Map<Long, Pipeline> loadAll(
92106
.stream()
93107
.collect(Collectors.toMap(
94108
x -> x.getContainerInfo().getContainerID(),
95-
ContainerWithPipeline::getPipeline
109+
x -> newPipelineWithDNCache(x.getPipeline(),
110+
datanodeDetailsCache)
96111
));
97112
}
98113
});
99114
}
100115

116+
static Cache<DatanodeID, DatanodeDetails> createDatanodeDetailsCache(
117+
OzoneConfiguration configuration) {
118+
int datanodeCacheMaxSize = configuration.getInt(
119+
OZONE_OM_CONTAINER_LOCATION_DATANODE_CACHE_SIZE,
120+
OZONE_OM_CONTAINER_LOCATION_DATANODE_CACHE_SIZE_DEFAULT);
121+
122+
return CacheBuilder.newBuilder()
123+
.maximumSize(datanodeCacheMaxSize)
124+
.recordStats()
125+
.build();
126+
}
127+
128+
static Pipeline newPipelineWithDNCache(Pipeline pipeline,
129+
Cache<DatanodeID, DatanodeDetails> datanodeDetailsCache) {
130+
Pipeline.Builder builder = pipeline.toBuilder();
131+
List<DatanodeDetails> nodes = new ArrayList<>();
132+
for (DatanodeDetails node : pipeline.getNodes()) {
133+
DatanodeDetails datanodeDetails =
134+
datanodeDetailsCache.getIfPresent(node.getID());
135+
// Call compareNodeValues to handle IP / hostname changes
136+
if (datanodeDetails != null && node.compareNodeValues(datanodeDetails)) {
137+
nodes.add(datanodeDetails);
138+
} else {
139+
datanodeDetailsCache.put(node.getID(), node);
140+
nodes.add(node);
141+
}
142+
}
143+
builder.setNodes(nodes);
144+
return builder.build();
145+
}
146+
101147
public ScmBlockLocationProtocol getBlockClient() {
102148
return this.blockClient;
103149
}
@@ -166,6 +212,7 @@ private <T> T handleCacheExecutionException(ExecutionException e)
166212

167213
public void close() {
168214
containerCacheMetrics.unregister();
215+
datanodeDetailsCacheMetrics.unregister();
169216
}
170217

171218
}

hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestScmClient.java

Lines changed: 50 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,16 @@
2121
import static java.util.Arrays.asList;
2222
import static org.apache.hadoop.hdds.client.ReplicationConfig.fromTypeAndFactor;
2323
import static org.junit.jupiter.api.Assertions.assertEquals;
24+
import static org.junit.jupiter.api.Assertions.assertSame;
2425
import static org.junit.jupiter.api.Assertions.assertThrows;
2526
import static org.mockito.Mockito.eq;
2627
import static org.mockito.Mockito.mock;
2728
import static org.mockito.Mockito.times;
2829
import static org.mockito.Mockito.verify;
2930
import static org.mockito.Mockito.when;
3031

32+
import com.google.common.cache.Cache;
33+
import com.google.common.cache.CacheBuilder;
3134
import java.io.IOException;
3235
import java.util.ArrayList;
3336
import java.util.HashMap;
@@ -41,6 +44,7 @@
4144
import org.apache.hadoop.hdds.client.ReplicationType;
4245
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
4346
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
47+
import org.apache.hadoop.hdds.protocol.DatanodeID;
4448
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
4549
import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline;
4650
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
@@ -106,9 +110,12 @@ public void testGetContainerLocations(String testCaseName,
106110
throws IOException {
107111

108112
Map<Long, ContainerWithPipeline> actualLocations = new HashMap<>();
109-
113+
List<DatanodeDetails> dnList = new ArrayList<>();
114+
for (int i = 0; i < 3; i++) {
115+
dnList.add(randomDatanode());
116+
}
110117
for (long containerId : prepopulatedIds) {
111-
ContainerWithPipeline pipeline = createPipeline(containerId);
118+
ContainerWithPipeline pipeline = createPipeline(containerId, dnList);
112119
actualLocations.put(containerId, pipeline);
113120
}
114121

@@ -128,7 +135,7 @@ public void testGetContainerLocations(String testCaseName,
128135
if (!expectedScmCallIds.isEmpty()) {
129136
List<ContainerWithPipeline> scmLocations = new ArrayList<>();
130137
for (long containerId : expectedScmCallIds) {
131-
ContainerWithPipeline pipeline = createPipeline(containerId);
138+
ContainerWithPipeline pipeline = createPipeline(containerId, dnList);
132139
scmLocations.add(pipeline);
133140
actualLocations.put(containerId, pipeline);
134141
}
@@ -166,13 +173,51 @@ public void testGetContainerLocationsWithScmFailures() throws IOException {
166173
assertEquals(runtimeException, actualRt.getCause());
167174
}
168175

169-
ContainerWithPipeline createPipeline(long containerId) {
176+
@Test
177+
public void testDatanodeDetailsCacheRecordsStats() {
178+
Cache<DatanodeID, DatanodeDetails> datanodeDetailsCache =
179+
ScmClient.createDatanodeDetailsCache(new OzoneConfiguration());
180+
181+
datanodeDetailsCache.getIfPresent(DatanodeID.randomID());
182+
183+
assertEquals(1, datanodeDetailsCache.stats().missCount());
184+
}
185+
186+
@Test
187+
public void testDatanodeDetailsCacheUpdatesIpAddressChange() {
188+
Cache<DatanodeID, DatanodeDetails> datanodeDetailsCache =
189+
CacheBuilder.newBuilder().build();
190+
DatanodeDetails original = randomDatanode();
191+
String originalIp = original.getIpAddress();
192+
DatanodeDetails updated = DatanodeDetails.newBuilder()
193+
.setDatanodeDetails(original)
194+
.setIpAddress("updated-ip")
195+
.build();
196+
197+
Pipeline originalPipeline = ScmClient.newPipelineWithDNCache(
198+
createPipeline(1L, asList(original)).getPipeline(),
199+
datanodeDetailsCache);
200+
Pipeline refreshed = ScmClient.newPipelineWithDNCache(
201+
createPipeline(2L, asList(updated)).getPipeline(),
202+
datanodeDetailsCache);
203+
204+
assertSame(original, originalPipeline.getNodes().get(0));
205+
assertEquals(originalIp, original.getIpAddress());
206+
assertEquals(originalIp, originalPipeline.getNodes().get(0).getIpAddress());
207+
DatanodeDetails refreshedNode = refreshed.getNodes().get(0);
208+
assertSame(updated, refreshedNode);
209+
assertEquals("updated-ip", refreshedNode.getIpAddress());
210+
assertSame(updated, datanodeDetailsCache.getIfPresent(original.getID()));
211+
}
212+
213+
ContainerWithPipeline createPipeline(long containerId,
214+
List<DatanodeDetails> dnList) {
170215
ContainerInfo containerInfo = new ContainerInfo.Builder()
171216
.setContainerID(containerId)
172217
.build();
173218
Pipeline pipeline = Pipeline.newBuilder()
174219
.setId(PipelineID.randomId())
175-
.setNodes(asList(randomDatanode(), randomDatanode()))
220+
.setNodes(dnList)
176221
.setReplicationConfig(fromTypeAndFactor(
177222
ReplicationType.RATIS, ReplicationFactor.THREE))
178223
.setState(Pipeline.PipelineState.OPEN)

0 commit comments

Comments
 (0)