From 9f50fba6fb64ae75dee79eb85560427c6e2b33a3 Mon Sep 17 00:00:00 2001 From: Rishabh Patel Date: Wed, 18 Mar 2026 13:03:00 -0700 Subject: [PATCH 1/7] HDDS-14859. Ignore transient errors while validating RocksDb on volumes. --- .../statemachine/DatanodeConfiguration.java | 31 ++++++++++++++++++ .../container/common/volume/HddsVolume.java | 32 +++++++++++++------ 2 files changed, 54 insertions(+), 9 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java index 41f6d36971ff..26be0cb2c42d 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java @@ -61,6 +61,7 @@ public class DatanodeConfiguration extends ReconfigurableConfig { public static final String FAILED_DB_VOLUMES_TOLERATED_KEY = "hdds.datanode.failed.db.volumes.tolerated"; public static final String DISK_CHECK_MIN_GAP_KEY = "hdds.datanode.disk.check.min.gap"; public static final String DISK_CHECK_TIMEOUT_KEY = "hdds.datanode.disk.check.timeout"; + public static final String DISK_CHECK_RETRY_GAP_KEY = "hdds.datanode.disk.check.retry.gap"; // Minimum space should be left on volume. // Ex: If volume has 1000GB and minFreeSpace is configured as 10GB, @@ -99,6 +100,8 @@ public class DatanodeConfiguration extends ReconfigurableConfig { static final Duration DISK_CHECK_TIMEOUT_DEFAULT = Duration.ofMinutes(10); + static final Duration DISK_CHECK_RETRY_GAP_DEFAULT = Duration.ofMinutes(1); + static final boolean CONTAINER_SCHEMA_V3_ENABLED_DEFAULT = true; static final long ROCKSDB_LOG_MAX_FILE_SIZE_BYTES_DEFAULT = 32 * 1024 * 1024; static final int ROCKSDB_LOG_MAX_FILE_NUM_DEFAULT = 64; @@ -404,6 +407,17 @@ public class DatanodeConfiguration extends ReconfigurableConfig { ) private Duration diskCheckTimeout = DISK_CHECK_TIMEOUT_DEFAULT; + @Config(key = DISK_CHECK_RETRY_GAP_KEY, + defaultValue = "1m", + type = ConfigType.TIME, + tags = {DATANODE}, + description = "Time to wait between retries of disk checks." + + " To ignore transient issues, the RocksDb instance on a disk is validated multiple times before" + + " declaring failure. This configuration defines the time to wait between the retry attempts." + + " Unit could be defined with postfix (ns,ms,s,m,h,d)." + ) + private Duration diskCheckRetryGap = DISK_CHECK_RETRY_GAP_DEFAULT; + @Config(key = "hdds.datanode.chunk.data.validation.check", defaultValue = "false", type = ConfigType.BOOLEAN, @@ -688,6 +702,19 @@ public void validate() { diskCheckTimeout = DISK_CHECK_TIMEOUT_DEFAULT; } + if (diskCheckRetryGap.isNegative()) { + LOG.warn("{} must be greater than zero and was set to {}. Defaulting to {}", + DISK_CHECK_RETRY_GAP_KEY, diskCheckRetryGap, DISK_CHECK_RETRY_GAP_DEFAULT); + diskCheckRetryGap = DISK_CHECK_RETRY_GAP_DEFAULT; + } + + if (diskCheckRetryGap.compareTo(diskCheckTimeout) > 0) { + LOG.warn("{} was set to {}. It must be less than {} which is {}. Defaulting to {}", + DISK_CHECK_RETRY_GAP_KEY, diskCheckRetryGap, DISK_CHECK_TIMEOUT_KEY, diskCheckTimeout, + DISK_CHECK_RETRY_GAP_DEFAULT); + diskCheckRetryGap = DISK_CHECK_RETRY_GAP_DEFAULT; + } + if (blockDeleteCommandWorkerInterval.isNegative()) { LOG.warn(BLOCK_DELETE_COMMAND_WORKER_INTERVAL + " must be greater than zero and was set to {}. Defaulting to {}", @@ -903,6 +930,10 @@ public Duration getDiskCheckTimeout() { return diskCheckTimeout; } + public Duration getDiskCheckRetryGap() { + return diskCheckRetryGap; + } + public void setDiskCheckTimeout(Duration duration) { diskCheckTimeout = duration; } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java index f331db7defc3..695616b46d89 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java @@ -25,6 +25,7 @@ import jakarta.annotation.Nullable; import java.io.File; import java.io.IOException; +import java.time.Duration; import java.util.Iterator; import java.util.LinkedList; import java.util.List; @@ -326,17 +327,30 @@ public VolumeCheckResult checkDbHealth(File dbFile) throws InterruptedException return VolumeCheckResult.HEALTHY; } + // We attempt to open RocksDb twice to ignore any transient errors + // and to confirm that we actually cannot open RocksDb in readonly mode. final boolean isVolumeTestResultHealthy = true; - try (ManagedOptions managedOptions = new ManagedOptions(); - ManagedRocksDB ignored = ManagedRocksDB.openReadOnly(managedOptions, dbFile.toString())) { - volumeTestResultQueue.add(isVolumeTestResultHealthy); - } catch (Exception e) { - if (Thread.currentThread().isInterrupted()) { - throw new InterruptedException("Check of database for volume " + this + " interrupted."); + final int maxAttempts = 2; + final Duration maxRetryGap = getDatanodeConfig().getDiskCheckRetryGap(); + for (int attempt = 0; attempt < maxAttempts; attempt++) { + try (ManagedOptions managedOptions = new ManagedOptions(); + ManagedRocksDB ignored = ManagedRocksDB.openReadOnly(managedOptions, dbFile.toString())) { + volumeTestResultQueue.add(isVolumeTestResultHealthy); + break; + } catch (Exception e) { + if (Thread.currentThread().isInterrupted()) { + throw new InterruptedException("Check of database for volume " + this + " interrupted."); + } + + if (attempt == maxAttempts - 1) { + LOG.error("Could not open Volume DB located at {}", dbFile, e); + volumeTestResultQueue.add(!isVolumeTestResultHealthy); + volumeTestFailureCount.incrementAndGet(); + } else { + LOG.warn("Could not open Volume DB located at {}", dbFile, e); + Thread.sleep(maxRetryGap.toMillis()); + } } - LOG.warn("Could not open Volume DB located at {}", dbFile, e); - volumeTestResultQueue.add(!isVolumeTestResultHealthy); - volumeTestFailureCount.incrementAndGet(); } if (volumeTestResultQueue.size() > volumeTestCount From c2ffea9233772089bd52304ddf2176f64ea56658 Mon Sep 17 00:00:00 2001 From: Rishabh Patel Date: Tue, 24 Mar 2026 13:58:45 -0700 Subject: [PATCH 2/7] HDDS-14859. Add support for opening RocksDB as a secondary for volume validation. --- .../hadoop/ozone/container/common/volume/HddsVolume.java | 3 ++- .../hadoop/hdds/utils/db/managed/ManagedRocksDB.java | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java index 695616b46d89..1055590a62b1 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java @@ -334,7 +334,8 @@ public VolumeCheckResult checkDbHealth(File dbFile) throws InterruptedException final Duration maxRetryGap = getDatanodeConfig().getDiskCheckRetryGap(); for (int attempt = 0; attempt < maxAttempts; attempt++) { try (ManagedOptions managedOptions = new ManagedOptions(); - ManagedRocksDB ignored = ManagedRocksDB.openReadOnly(managedOptions, dbFile.toString())) { + ManagedRocksDB ignored = + ManagedRocksDB.openAsSecondary(managedOptions, dbFile.toString(), getTmpDir().getPath())) { volumeTestResultQueue.add(isVolumeTestResultHealthy); break; } catch (Exception e) { diff --git a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksDB.java b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksDB.java index 3401469f6824..420da42a0be0 100644 --- a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksDB.java +++ b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksDB.java @@ -75,6 +75,14 @@ public static ManagedRocksDB openReadOnly( ); } + public static ManagedRocksDB openAsSecondary( + final ManagedOptions options, + final String dbPath, + final String secondaryDbLogFilePath) + throws RocksDBException { + return new ManagedRocksDB(RocksDB.openAsSecondary(options, dbPath, secondaryDbLogFilePath)); + } + public static ManagedRocksDB open( final DBOptions options, final String path, final List columnFamilyDescriptors, From a2160c2f2e35eeee07d08f62c718b810152f17b4 Mon Sep 17 00:00:00 2001 From: Rishabh Patel Date: Sat, 18 Apr 2026 11:27:29 -0700 Subject: [PATCH 3/7] HDDS-14859. Add configurable retry gap for disk checks and validate its constraints. --- .../statemachine/DatanodeConfiguration.java | 31 +++++++++++++++++++ .../container/common/volume/HddsVolume.java | 1 - 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java index a9242355aac2..562148ddff34 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java @@ -63,6 +63,7 @@ public class DatanodeConfiguration extends ReconfigurableConfig { public static final String DISK_CHECK_MIN_GAP_KEY = "hdds.datanode.disk.check.min.gap"; public static final String DISK_CHECK_TIMEOUT_KEY = "hdds.datanode.disk.check.timeout"; public static final String DISK_CHECK_SLIDING_WINDOW_TIMEOUT_KEY = "hdds.datanode.disk.check.sliding.window.timeout"; + public static final String DISK_CHECK_RETRY_GAP_KEY = "hdds.datanode.disk.check.retry.gap"; // Minimum space should be left on volume. // Ex: If volume has 1000GB and minFreeSpace is configured as 10GB, @@ -104,6 +105,8 @@ public class DatanodeConfiguration extends ReconfigurableConfig { static final Duration DISK_CHECK_SLIDING_WINDOW_TIMEOUT_DEFAULT = Duration.ofMinutes(PERIODIC_DISK_CHECK_INTERVAL_MINUTES_DEFAULT).plus(DISK_CHECK_TIMEOUT_DEFAULT); + static final Duration DISK_CHECK_RETRY_GAP_DEFAULT = Duration.ofMinutes(1); + static final boolean CONTAINER_SCHEMA_V3_ENABLED_DEFAULT = true; static final long ROCKSDB_LOG_MAX_FILE_SIZE_BYTES_DEFAULT = 32 * 1024 * 1024; static final int ROCKSDB_LOG_MAX_FILE_NUM_DEFAULT = 64; @@ -430,6 +433,17 @@ public class DatanodeConfiguration extends ReconfigurableConfig { ) private Duration diskCheckSlidingWindowTimeout = DISK_CHECK_SLIDING_WINDOW_TIMEOUT_DEFAULT; + @Config(key = DISK_CHECK_RETRY_GAP_KEY, + defaultValue = "1m", + type = ConfigType.TIME, + tags = {DATANODE}, + description = "Time to wait between retries of disk checks." + + " To ignore transient issues, the RocksDb instance on a disk is validated multiple times before" + + " declaring failure. This configuration defines the time to wait between the retry attempts." + + " Unit could be defined with postfix (ns,ms,s,m,h,d)." + ) + private Duration diskCheckRetryGap = DISK_CHECK_RETRY_GAP_DEFAULT; + @Config(key = "hdds.datanode.chunk.data.validation.check", defaultValue = "false", type = ConfigType.BOOLEAN, @@ -709,6 +723,19 @@ public void validate() { diskCheckSlidingWindowTimeout = defaultTimeout; } + if (diskCheckRetryGap.isNegative()) { + LOG.warn("{} must be greater than zero and was set to {}. Defaulting to {}", + DISK_CHECK_RETRY_GAP_KEY, diskCheckRetryGap, DISK_CHECK_RETRY_GAP_DEFAULT); + diskCheckRetryGap = DISK_CHECK_RETRY_GAP_DEFAULT; + } + + if (diskCheckRetryGap.compareTo(diskCheckTimeout) > 0) { + LOG.warn("{} was set to {}. It must be less than {} which is {}. Defaulting to {}", + DISK_CHECK_RETRY_GAP_KEY, diskCheckRetryGap, DISK_CHECK_TIMEOUT_KEY, diskCheckTimeout, + DISK_CHECK_RETRY_GAP_DEFAULT); + diskCheckRetryGap = DISK_CHECK_RETRY_GAP_DEFAULT; + } + if (blockDeleteCommandWorkerInterval.isNegative()) { LOG.warn(BLOCK_DELETE_COMMAND_WORKER_INTERVAL + " must be greater than zero and was set to {}. Defaulting to {}", @@ -924,6 +951,10 @@ public Duration getDiskCheckTimeout() { return diskCheckTimeout; } + public Duration getDiskCheckRetryGap() { + return diskCheckRetryGap; + } + public void setDiskCheckTimeout(Duration duration) { diskCheckTimeout = duration; } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java index 8729e986ac16..079ae7219200 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java @@ -313,7 +313,6 @@ public VolumeCheckResult checkDbHealth(File dbFile) throws InterruptedException // We attempt to open RocksDb twice to ignore any transient errors // and to confirm that we actually cannot open RocksDb in readonly mode. - final boolean isVolumeTestResultHealthy = true; final int maxAttempts = 2; final Duration maxRetryGap = getDatanodeConfig().getDiskCheckRetryGap(); for (int attempt = 0; attempt < maxAttempts; attempt++) { From 7dc2bfd7df86bac3ba107f69bbab544b88088814 Mon Sep 17 00:00:00 2001 From: Rishabh Patel Date: Sat, 18 Apr 2026 12:10:25 -0700 Subject: [PATCH 4/7] HDDS-14859. Add configurable retry attempts for RocksDb disk health checks. --- .../statemachine/DatanodeConfiguration.java | 40 +++++++++++++++++-- .../container/common/volume/HddsVolume.java | 5 +-- 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java index 562148ddff34..abda1e169c95 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java @@ -64,6 +64,7 @@ public class DatanodeConfiguration extends ReconfigurableConfig { public static final String DISK_CHECK_TIMEOUT_KEY = "hdds.datanode.disk.check.timeout"; public static final String DISK_CHECK_SLIDING_WINDOW_TIMEOUT_KEY = "hdds.datanode.disk.check.sliding.window.timeout"; public static final String DISK_CHECK_RETRY_GAP_KEY = "hdds.datanode.disk.check.retry.gap"; + public static final String DISK_CHECK_RETRY_ATTEMPTS = "hdds.datanode.disk.check.retry.attempts"; // Minimum space should be left on volume. // Ex: If volume has 1000GB and minFreeSpace is configured as 10GB, @@ -106,6 +107,7 @@ public class DatanodeConfiguration extends ReconfigurableConfig { Duration.ofMinutes(PERIODIC_DISK_CHECK_INTERVAL_MINUTES_DEFAULT).plus(DISK_CHECK_TIMEOUT_DEFAULT); static final Duration DISK_CHECK_RETRY_GAP_DEFAULT = Duration.ofMinutes(1); + static final int DISK_CHECK_RETRY_ATTEMPTS_DEFAULT = 2; static final boolean CONTAINER_SCHEMA_V3_ENABLED_DEFAULT = true; static final long ROCKSDB_LOG_MAX_FILE_SIZE_BYTES_DEFAULT = 32 * 1024 * 1024; @@ -376,6 +378,14 @@ public class DatanodeConfiguration extends ReconfigurableConfig { ) private boolean isDiskCheckEnabled = true; + @Config(key = "hdds.datanode.rocksdb.disk.check.io.test.enabled", + defaultValue = "true", + type = ConfigType.BOOLEAN, + tags = {DATANODE}, + description = "The configuration to enable or disable RocksDb disk IO checks." + ) + private boolean isRocksDbDiskCheckEnabled = true; + @Config(key = "hdds.datanode.disk.check.io.failures.tolerated", defaultValue = "1", type = ConfigType.INT, @@ -444,6 +454,14 @@ public class DatanodeConfiguration extends ReconfigurableConfig { ) private Duration diskCheckRetryGap = DISK_CHECK_RETRY_GAP_DEFAULT; + @Config(key = DISK_CHECK_RETRY_ATTEMPTS, + defaultValue = "2", + type = ConfigType.INT, + tags = {DATANODE}, + description = "Number of retry attempts for opening RocksDb before declaring failure." + ) + private int diskCheckRetryAttempts = DISK_CHECK_RETRY_ATTEMPTS_DEFAULT; + @Config(key = "hdds.datanode.chunk.data.validation.check", defaultValue = "false", type = ConfigType.BOOLEAN, @@ -723,16 +741,22 @@ public void validate() { diskCheckSlidingWindowTimeout = defaultTimeout; } + if (diskCheckRetryAttempts <= 0) { + LOG.warn("{} must be greater than zero and was set to {}. Defaulting to {}", + DISK_CHECK_RETRY_ATTEMPTS, diskCheckRetryAttempts, DISK_CHECK_RETRY_ATTEMPTS_DEFAULT); + diskCheckRetryAttempts = DISK_CHECK_RETRY_ATTEMPTS_DEFAULT; + } + if (diskCheckRetryGap.isNegative()) { LOG.warn("{} must be greater than zero and was set to {}. Defaulting to {}", DISK_CHECK_RETRY_GAP_KEY, diskCheckRetryGap, DISK_CHECK_RETRY_GAP_DEFAULT); diskCheckRetryGap = DISK_CHECK_RETRY_GAP_DEFAULT; } - if (diskCheckRetryGap.compareTo(diskCheckTimeout) > 0) { - LOG.warn("{} was set to {}. It must be less than {} which is {}. Defaulting to {}", - DISK_CHECK_RETRY_GAP_KEY, diskCheckRetryGap, DISK_CHECK_TIMEOUT_KEY, diskCheckTimeout, - DISK_CHECK_RETRY_GAP_DEFAULT); + if (diskCheckRetryGap.compareTo(diskCheckTimeout.dividedBy(diskCheckRetryAttempts)) > 0) { + LOG.warn("{} was set to {}. It must be less than {} / {} which is {}. Defaulting to {}", + DISK_CHECK_RETRY_GAP_KEY, diskCheckRetryGap, DISK_CHECK_TIMEOUT_KEY, diskCheckRetryAttempts, + diskCheckTimeout.dividedBy(diskCheckRetryAttempts), DISK_CHECK_RETRY_GAP_DEFAULT); diskCheckRetryGap = DISK_CHECK_RETRY_GAP_DEFAULT; } @@ -959,6 +983,10 @@ public void setDiskCheckTimeout(Duration duration) { diskCheckTimeout = duration; } + public int getDiskCheckRetryAttempts() { + return diskCheckRetryAttempts; + } + public void setDiskCheckEnabled(boolean diskCheckEnabled) { isDiskCheckEnabled = diskCheckEnabled; } @@ -967,6 +995,10 @@ public boolean isDiskCheckEnabled() { return isDiskCheckEnabled; } + public boolean isRocksDbDiskCheckEnabled() { + return isRocksDbDiskCheckEnabled; + } + public Duration getDiskCheckSlidingWindowTimeout() { return diskCheckSlidingWindowTimeout; } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java index 079ae7219200..063d8cb3b0e0 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java @@ -307,13 +307,13 @@ public synchronized VolumeCheckResult check(@Nullable Boolean unused) @VisibleForTesting public VolumeCheckResult checkDbHealth(File dbFile) throws InterruptedException { - if (!getDiskCheckEnabled()) { + if (!(getDiskCheckEnabled() && getDatanodeConfig().isRocksDbDiskCheckEnabled())) { return VolumeCheckResult.HEALTHY; } // We attempt to open RocksDb twice to ignore any transient errors // and to confirm that we actually cannot open RocksDb in readonly mode. - final int maxAttempts = 2; + final int maxAttempts = getDatanodeConfig().getDiskCheckRetryAttempts(); final Duration maxRetryGap = getDatanodeConfig().getDiskCheckRetryGap(); for (int attempt = 0; attempt < maxAttempts; attempt++) { try (ManagedOptions managedOptions = new ManagedOptions(); @@ -337,7 +337,6 @@ public VolumeCheckResult checkDbHealth(File dbFile) throws InterruptedException } } - if (getIoTestSlidingWindow().isExceeded()) { LOG.error("Failed to open the database at \"{}\" for HDDS volume {}: " + "encountered more than the {} tolerated failures.", From 02a69864acd61aedff8d8d8e6a7770a44b62ed48 Mon Sep 17 00:00:00 2001 From: Rishabh Patel Date: Thu, 23 Apr 2026 16:38:29 -0700 Subject: [PATCH 5/7] HDDS-14859. Remove disk check retry logic and related configurations. --- .../statemachine/DatanodeConfiguration.java | 55 ------------------- .../container/common/volume/HddsVolume.java | 36 ++++-------- 2 files changed, 12 insertions(+), 79 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java index abda1e169c95..7de24fb8b184 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java @@ -63,8 +63,6 @@ public class DatanodeConfiguration extends ReconfigurableConfig { public static final String DISK_CHECK_MIN_GAP_KEY = "hdds.datanode.disk.check.min.gap"; public static final String DISK_CHECK_TIMEOUT_KEY = "hdds.datanode.disk.check.timeout"; public static final String DISK_CHECK_SLIDING_WINDOW_TIMEOUT_KEY = "hdds.datanode.disk.check.sliding.window.timeout"; - public static final String DISK_CHECK_RETRY_GAP_KEY = "hdds.datanode.disk.check.retry.gap"; - public static final String DISK_CHECK_RETRY_ATTEMPTS = "hdds.datanode.disk.check.retry.attempts"; // Minimum space should be left on volume. // Ex: If volume has 1000GB and minFreeSpace is configured as 10GB, @@ -106,9 +104,6 @@ public class DatanodeConfiguration extends ReconfigurableConfig { static final Duration DISK_CHECK_SLIDING_WINDOW_TIMEOUT_DEFAULT = Duration.ofMinutes(PERIODIC_DISK_CHECK_INTERVAL_MINUTES_DEFAULT).plus(DISK_CHECK_TIMEOUT_DEFAULT); - static final Duration DISK_CHECK_RETRY_GAP_DEFAULT = Duration.ofMinutes(1); - static final int DISK_CHECK_RETRY_ATTEMPTS_DEFAULT = 2; - static final boolean CONTAINER_SCHEMA_V3_ENABLED_DEFAULT = true; static final long ROCKSDB_LOG_MAX_FILE_SIZE_BYTES_DEFAULT = 32 * 1024 * 1024; static final int ROCKSDB_LOG_MAX_FILE_NUM_DEFAULT = 64; @@ -443,25 +438,6 @@ public class DatanodeConfiguration extends ReconfigurableConfig { ) private Duration diskCheckSlidingWindowTimeout = DISK_CHECK_SLIDING_WINDOW_TIMEOUT_DEFAULT; - @Config(key = DISK_CHECK_RETRY_GAP_KEY, - defaultValue = "1m", - type = ConfigType.TIME, - tags = {DATANODE}, - description = "Time to wait between retries of disk checks." - + " To ignore transient issues, the RocksDb instance on a disk is validated multiple times before" - + " declaring failure. This configuration defines the time to wait between the retry attempts." - + " Unit could be defined with postfix (ns,ms,s,m,h,d)." - ) - private Duration diskCheckRetryGap = DISK_CHECK_RETRY_GAP_DEFAULT; - - @Config(key = DISK_CHECK_RETRY_ATTEMPTS, - defaultValue = "2", - type = ConfigType.INT, - tags = {DATANODE}, - description = "Number of retry attempts for opening RocksDb before declaring failure." - ) - private int diskCheckRetryAttempts = DISK_CHECK_RETRY_ATTEMPTS_DEFAULT; - @Config(key = "hdds.datanode.chunk.data.validation.check", defaultValue = "false", type = ConfigType.BOOLEAN, @@ -741,25 +717,6 @@ public void validate() { diskCheckSlidingWindowTimeout = defaultTimeout; } - if (diskCheckRetryAttempts <= 0) { - LOG.warn("{} must be greater than zero and was set to {}. Defaulting to {}", - DISK_CHECK_RETRY_ATTEMPTS, diskCheckRetryAttempts, DISK_CHECK_RETRY_ATTEMPTS_DEFAULT); - diskCheckRetryAttempts = DISK_CHECK_RETRY_ATTEMPTS_DEFAULT; - } - - if (diskCheckRetryGap.isNegative()) { - LOG.warn("{} must be greater than zero and was set to {}. Defaulting to {}", - DISK_CHECK_RETRY_GAP_KEY, diskCheckRetryGap, DISK_CHECK_RETRY_GAP_DEFAULT); - diskCheckRetryGap = DISK_CHECK_RETRY_GAP_DEFAULT; - } - - if (diskCheckRetryGap.compareTo(diskCheckTimeout.dividedBy(diskCheckRetryAttempts)) > 0) { - LOG.warn("{} was set to {}. It must be less than {} / {} which is {}. Defaulting to {}", - DISK_CHECK_RETRY_GAP_KEY, diskCheckRetryGap, DISK_CHECK_TIMEOUT_KEY, diskCheckRetryAttempts, - diskCheckTimeout.dividedBy(diskCheckRetryAttempts), DISK_CHECK_RETRY_GAP_DEFAULT); - diskCheckRetryGap = DISK_CHECK_RETRY_GAP_DEFAULT; - } - if (blockDeleteCommandWorkerInterval.isNegative()) { LOG.warn(BLOCK_DELETE_COMMAND_WORKER_INTERVAL + " must be greater than zero and was set to {}. Defaulting to {}", @@ -975,18 +932,6 @@ public Duration getDiskCheckTimeout() { return diskCheckTimeout; } - public Duration getDiskCheckRetryGap() { - return diskCheckRetryGap; - } - - public void setDiskCheckTimeout(Duration duration) { - diskCheckTimeout = duration; - } - - public int getDiskCheckRetryAttempts() { - return diskCheckRetryAttempts; - } - public void setDiskCheckEnabled(boolean diskCheckEnabled) { isDiskCheckEnabled = diskCheckEnabled; } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java index 063d8cb3b0e0..438e032a330f 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java @@ -25,7 +25,6 @@ import jakarta.annotation.Nullable; import java.io.File; import java.io.IOException; -import java.time.Duration; import java.util.Iterator; import java.util.List; import java.util.concurrent.ConcurrentSkipListSet; @@ -311,32 +310,21 @@ public VolumeCheckResult checkDbHealth(File dbFile) throws InterruptedException return VolumeCheckResult.HEALTHY; } - // We attempt to open RocksDb twice to ignore any transient errors - // and to confirm that we actually cannot open RocksDb in readonly mode. - final int maxAttempts = getDatanodeConfig().getDiskCheckRetryAttempts(); - final Duration maxRetryGap = getDatanodeConfig().getDiskCheckRetryGap(); - for (int attempt = 0; attempt < maxAttempts; attempt++) { - try (ManagedOptions managedOptions = new ManagedOptions(); - ManagedRocksDB ignored = - ManagedRocksDB.openAsSecondary(managedOptions, dbFile.toString(), getTmpDir().getPath())) { - // Do nothing. Only check if rocksdb is accessible. - LOG.debug("Successfully opened the database at \"{}\" for HDDS volume {}.", dbFile, getStorageDir()); - break; - } catch (Exception e) { - if (Thread.currentThread().isInterrupted()) { - throw new InterruptedException("Check of database for volume " + this + " interrupted."); - } - - if (attempt == maxAttempts - 1) { - LOG.error("Could not open Volume DB located at {}", dbFile, e); - getIoTestSlidingWindow().add(); - } else { - LOG.warn("Could not open Volume DB located at {}", dbFile, e); - Thread.sleep(maxRetryGap.toMillis()); - } + try (ManagedOptions managedOptions = new ManagedOptions(); + ManagedRocksDB ignored = + ManagedRocksDB.openAsSecondary(managedOptions, dbFile.toString(), getTmpDir().getPath())) { + // Do nothing. Only check if rocksdb is accessible. + LOG.debug("Successfully opened the database at \"{}\" for HDDS volume {}.", dbFile, getStorageDir()); + } catch (Exception e) { + if (Thread.currentThread().isInterrupted()) { + throw new InterruptedException("Check of database for volume " + this + " interrupted."); } + + LOG.error("Could not open Volume DB located at {}", dbFile, e); + getIoTestSlidingWindow().add(); } + if (getIoTestSlidingWindow().isExceeded()) { LOG.error("Failed to open the database at \"{}\" for HDDS volume {}: " + "encountered more than the {} tolerated failures.", From dafaf60dc56e83c1a549ad33885abd938668734c Mon Sep 17 00:00:00 2001 From: Rishabh Patel Date: Thu, 23 Apr 2026 16:47:18 -0700 Subject: [PATCH 6/7] HDDS-14859. Add setter for diskCheckTimeout in DatanodeConfiguration. --- .../container/common/statemachine/DatanodeConfiguration.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java index 7de24fb8b184..eb5849e0c3bf 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java @@ -932,6 +932,10 @@ public Duration getDiskCheckTimeout() { return diskCheckTimeout; } + public void setDiskCheckTimeout(Duration duration) { + diskCheckTimeout = duration; + } + public void setDiskCheckEnabled(boolean diskCheckEnabled) { isDiskCheckEnabled = diskCheckEnabled; } From 1986d60341cfb008a25f47e3c46b4051e9ace409 Mon Sep 17 00:00:00 2001 From: Rishabh Patel Date: Mon, 27 Apr 2026 11:15:59 -0700 Subject: [PATCH 7/7] HDDS-14859. Remove deprecated RocksDb disk IO health check configuration. --- .../common/statemachine/DatanodeConfiguration.java | 12 ------------ .../ozone/container/common/volume/HddsVolume.java | 2 +- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java index a4b119efe782..919777f35191 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java @@ -384,14 +384,6 @@ public class DatanodeConfiguration extends ReconfigurableConfig { ) private boolean isDiskCheckEnabled = true; - @Config(key = "hdds.datanode.rocksdb.disk.check.io.test.enabled", - defaultValue = "true", - type = ConfigType.BOOLEAN, - tags = {DATANODE}, - description = "The configuration to enable or disable RocksDb disk IO checks." - ) - private boolean isRocksDbDiskCheckEnabled = true; - @Config(key = "hdds.datanode.disk.check.io.failures.tolerated", defaultValue = "1", type = ConfigType.INT, @@ -1021,10 +1013,6 @@ public boolean isDiskCheckEnabled() { return isDiskCheckEnabled; } - public boolean isRocksDbDiskCheckEnabled() { - return isRocksDbDiskCheckEnabled; - } - public Duration getDiskCheckSlidingWindowTimeout() { return diskCheckSlidingWindowTimeout; } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java index 438e032a330f..67c6098d8bdd 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java @@ -306,7 +306,7 @@ public synchronized VolumeCheckResult check(@Nullable Boolean unused) @VisibleForTesting public VolumeCheckResult checkDbHealth(File dbFile) throws InterruptedException { - if (!(getDiskCheckEnabled() && getDatanodeConfig().isRocksDbDiskCheckEnabled())) { + if (!getDiskCheckEnabled()) { return VolumeCheckResult.HEALTHY; }