Skip to content

Commit 4710ac9

Browse files
authored
HDDS-13866. Use component-specific default directory for Ratis (#9318)
1 parent fd89481 commit 4710ac9

10 files changed

Lines changed: 331 additions & 51 deletions

File tree

hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAUtils.java

Lines changed: 4 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,15 @@
1717

1818
package org.apache.hadoop.hdds.scm.ha;
1919

20-
import static org.apache.hadoop.hdds.HddsConfigKeys.OZONE_METADATA_DIRS;
21-
import static org.apache.hadoop.ozone.OzoneConsts.OZONE_RATIS_SNAPSHOT_DIR;
22-
2320
import com.google.common.base.Strings;
2421
import com.google.common.collect.ImmutableList;
25-
import java.io.File;
2622
import java.io.IOException;
27-
import java.nio.file.Paths;
2823
import java.util.ArrayList;
2924
import java.util.List;
3025
import org.apache.hadoop.hdds.HddsUtils;
3126
import org.apache.hadoop.hdds.conf.ConfigurationSource;
3227
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
28+
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeType;
3329
import org.apache.hadoop.hdds.ratis.ServerNotLeaderException;
3430
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
3531
import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException;
@@ -46,15 +42,11 @@
4642
import org.apache.ratis.protocol.exceptions.ReconfigurationTimeoutException;
4743
import org.apache.ratis.protocol.exceptions.ResourceUnavailableException;
4844
import org.apache.ratis.protocol.exceptions.StateMachineException;
49-
import org.slf4j.Logger;
50-
import org.slf4j.LoggerFactory;
5145

5246
/**
5347
* Utility class used by SCM HA.
5448
*/
5549
public final class SCMHAUtils {
56-
private static final Logger LOG =
57-
LoggerFactory.getLogger(SCMHAUtils.class);
5850

5951
private static final ImmutableList<Class<? extends Exception>>
6052
RETRIABLE_WITH_NO_FAILOVER_EXCEPTION_LIST =
@@ -97,31 +89,18 @@ public static String getSCMRatisDirectory(ConfigurationSource conf) {
9789
conf.get(ScmConfigKeys.OZONE_SCM_HA_RATIS_STORAGE_DIR);
9890

9991
if (Strings.isNullOrEmpty(scmRatisDirectory)) {
100-
scmRatisDirectory = ServerUtils.getDefaultRatisDirectory(conf);
92+
scmRatisDirectory = ServerUtils.getDefaultRatisDirectory(conf, NodeType.SCM);
10193
}
10294
return scmRatisDirectory;
10395
}
10496

105-
public static String getRatisStorageDir(final ConfigurationSource conf) {
106-
String storageDir = conf.get(ScmConfigKeys.OZONE_SCM_HA_RATIS_STORAGE_DIR);
107-
if (Strings.isNullOrEmpty(storageDir)) {
108-
File metaDirPath = ServerUtils.getOzoneMetaDirPath(conf);
109-
storageDir = (new File(metaDirPath, "scm-ha")).getPath();
110-
}
111-
return storageDir;
112-
}
113-
11497
public static String getSCMRatisSnapshotDirectory(ConfigurationSource conf) {
11598
String snapshotDir =
11699
conf.get(ScmConfigKeys.OZONE_SCM_HA_RATIS_SNAPSHOT_DIR);
117100

118-
// If ratis snapshot directory is not set, fall back to ozone.metadata.dir.
101+
// If ratis snapshot directory is not set, fall back to ozone.metadata.dir with component-specific location.
119102
if (Strings.isNullOrEmpty(snapshotDir)) {
120-
LOG.warn("SCM snapshot dir is not configured. Falling back to {} config",
121-
OZONE_METADATA_DIRS);
122-
File metaDirPath = ServerUtils.getOzoneMetaDirPath(conf);
123-
snapshotDir =
124-
Paths.get(metaDirPath.getPath(), OZONE_RATIS_SNAPSHOT_DIR).toString();
103+
snapshotDir = ServerUtils.getDefaultRatisSnapshotDirectory(conf, NodeType.SCM);
125104
}
126105
return snapshotDir;
127106
}

hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/ServerUtils.java

Lines changed: 130 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,17 +21,20 @@
2121
import java.net.InetSocketAddress;
2222
import java.nio.file.Files;
2323
import java.nio.file.Path;
24+
import java.nio.file.Paths;
2425
import java.nio.file.attribute.PosixFilePermissions;
2526
import java.util.Collection;
2627
import org.apache.hadoop.fs.permission.FsPermission;
2728
import org.apache.hadoop.hdds.HddsConfigKeys;
2829
import org.apache.hadoop.hdds.conf.ConfigurationSource;
2930
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
31+
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeType;
3032
import org.apache.hadoop.hdds.recon.ReconConfigKeys;
3133
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
3234
import org.apache.hadoop.ipc_.RPC;
3335
import org.apache.hadoop.ipc_.Server;
3436
import org.apache.hadoop.ozone.OzoneConfigKeys;
37+
import org.apache.hadoop.ozone.OzoneConsts;
3538
import org.apache.hadoop.security.UserGroupInformation;
3639
import org.slf4j.Logger;
3740
import org.slf4j.LoggerFactory;
@@ -300,11 +303,136 @@ public static String getRemoteUserName() {
300303
return remoteUser != null ? remoteUser.getUserName() : null;
301304
}
302305

303-
public static String getDefaultRatisDirectory(ConfigurationSource conf) {
306+
/**
307+
* Get the default Ratis directory for a component when the specific
308+
* configuration is not set. This creates a component-specific subdirectory
309+
* under ozone.metadata.dirs to avoid conflicts when multiple components
310+
* are colocated on the same host.
311+
*
312+
* <p>For backward compatibility during upgrades, this method checks for
313+
* existing Ratis data in old locations before using the new component-specific
314+
* location. See {@link #findExistingRatisDirectory} for details on old locations.
315+
*
316+
* @param conf Configuration source
317+
* @param nodeType Type of the node component
318+
* @return Path to the component-specific ratis directory
319+
*/
320+
public static String getDefaultRatisDirectory(ConfigurationSource conf,
321+
NodeType nodeType) {
304322
LOG.warn("Storage directory for Ratis is not configured. It is a good " +
305323
"idea to map this to an SSD disk. Falling back to {}",
306324
HddsConfigKeys.OZONE_METADATA_DIRS);
307325
File metaDirPath = ServerUtils.getOzoneMetaDirPath(conf);
308-
return (new File(metaDirPath, "ratis")).getPath();
326+
327+
// Check for existing Ratis data from old versions for backward compatibility
328+
String existingDir = findExistingRatisDirectory(metaDirPath, nodeType);
329+
if (existingDir != null) {
330+
return existingDir;
331+
}
332+
333+
// Use new component-specific location for new installations
334+
String componentName = getComponentName(nodeType);
335+
return Paths.get(metaDirPath.getPath(), componentName + ".ratis").toString();
336+
}
337+
338+
/**
339+
* Checks for existing Ratis directories from previous versions for backward
340+
* compatibility during upgrades.
341+
*
342+
* <p>Older versions of Ozone used different directory structures:
343+
* <ul>
344+
* <li>Versions up to 2.0.0: Shared {@code <ozone.metadata.dirs>/ratis} for all components</li>
345+
* <li>Some SCM versions: Used {@code <ozone.metadata.dirs>/scm-ha}</li>
346+
* </ul>
347+
*
348+
* @param metaDirPath The ozone metadata directory path
349+
* @param nodeType Type of the node component
350+
* @return Path to existing old Ratis directory if found, null otherwise
351+
*/
352+
private static String findExistingRatisDirectory(File metaDirPath,
353+
NodeType nodeType) {
354+
// Check component-specific old location (SCM used scm-ha in some versions)
355+
if ("scm".equals(getComponentName(nodeType))) {
356+
File oldScmRatisDir = new File(metaDirPath, "scm-ha");
357+
if (isNonEmptyDirectory(oldScmRatisDir)) {
358+
LOG.info("Found existing SCM Ratis directory at old location: {}. " +
359+
"Using it for backward compatibility during upgrade.",
360+
oldScmRatisDir.getPath());
361+
return oldScmRatisDir.getPath();
362+
}
363+
}
364+
365+
// Check old shared Ratis location (used by version 2.0.0 and earlier)
366+
// All components (OM, SCM) shared /data/metadata/ratis
367+
File oldSharedRatisDir = new File(metaDirPath, "ratis");
368+
if (isNonEmptyDirectory(oldSharedRatisDir)) {
369+
LOG.info("Found existing Ratis directory at old shared location: {}. " +
370+
"Using it for backward compatibility during upgrade.",
371+
oldSharedRatisDir.getPath());
372+
return oldSharedRatisDir.getPath();
373+
}
374+
375+
return null;
376+
}
377+
378+
/**
379+
* Converts NodeType enum to the component name string used for directory naming.
380+
*
381+
* @param nodeType Type of the node component
382+
* @return Component name string (e.g., "om", "scm", "dn", "recon")
383+
*/
384+
private static String getComponentName(NodeType nodeType) {
385+
switch (nodeType) {
386+
case OM:
387+
return "om";
388+
case SCM:
389+
return "scm";
390+
case DATANODE:
391+
return "dn";
392+
case RECON:
393+
return "recon";
394+
default:
395+
throw new IllegalArgumentException("Unknown NodeType: " + nodeType);
396+
}
397+
}
398+
399+
/**
400+
* Get the default Ratis snapshot directory for a component when the specific
401+
* configuration is not set. This creates a component-specific subdirectory
402+
* under ozone.metadata.dirs to avoid conflicts when multiple components
403+
* are colocated on the same host.
404+
*
405+
* New path format: {ozone.metadata.dirs}/{NodeType}.ratis.snapshot
406+
* eg: /data/metadata/om.ratis.snapshot
407+
* /data/metadata/scm.ratis.snapshot
408+
*
409+
* @param conf Configuration source
410+
* @param nodeType Type of the node component
411+
* @return Path to the component-specific ratis snapshot directory
412+
*/
413+
public static String getDefaultRatisSnapshotDirectory(ConfigurationSource conf,
414+
NodeType nodeType) {
415+
LOG.warn("Snapshot directory for Ratis is not configured. Falling back to {}",
416+
HddsConfigKeys.OZONE_METADATA_DIRS);
417+
File metaDirPath = ServerUtils.getOzoneMetaDirPath(conf);
418+
419+
// Use component-specific location
420+
String componentName = getComponentName(nodeType);
421+
return Paths.get(metaDirPath.getPath(),
422+
componentName + ".ratis." + OzoneConsts.OZONE_RATIS_SNAPSHOT_DIR).toString();
423+
}
424+
425+
/**
426+
* Checks if a directory exists and is non-empty.
427+
*
428+
* @param dir Directory to check
429+
* @return true if directory exists and contains at least one file
430+
*/
431+
private static boolean isNonEmptyDirectory(File dir) {
432+
if (dir != null && dir.exists() && dir.isDirectory()) {
433+
File[] files = dir.listFiles();
434+
return files != null && files.length > 0;
435+
}
436+
return false;
309437
}
310438
}

hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HddsServerUtil.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@
7676
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
7777
import org.apache.hadoop.hdds.protocol.SCMSecurityProtocol;
7878
import org.apache.hadoop.hdds.protocol.SecretKeyProtocolScm;
79+
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeType;
7980
import org.apache.hadoop.hdds.protocolPB.SCMSecurityProtocolClientSideTranslatorPB;
8081
import org.apache.hadoop.hdds.protocolPB.SecretKeyProtocolClientSideTranslatorPB;
8182
import org.apache.hadoop.hdds.protocolPB.SecretKeyProtocolDatanodePB;
@@ -424,7 +425,7 @@ public static Collection<String> getOzoneDatanodeRatisDirectory(
424425

425426
if (rawLocations.isEmpty()) {
426427
rawLocations = new ArrayList<>(1);
427-
rawLocations.add(ServerUtils.getDefaultRatisDirectory(conf));
428+
rawLocations.add(ServerUtils.getDefaultRatisDirectory(conf, NodeType.DATANODE));
428429
}
429430
return rawLocations;
430431
}

0 commit comments

Comments
 (0)