From fa2d9839584ea9a673a1c6ecbdab7f47efb9514b Mon Sep 17 00:00:00 2001 From: Sreeja Chintalapati Date: Sun, 30 Nov 2025 22:41:56 +0530 Subject: [PATCH 1/7] HDDS-14039. Create Grafana dashboard for Ozone SCM safemode rules and exit --- .../scm/safemode/DataNodeSafeModeRule.java | 8 +- .../hdds/scm/safemode/SCMSafeModeManager.java | 1 + .../hdds/scm/safemode/SafeModeMetrics.java | 21 + .../safemode/TestDataNodeSafeModeRule.java | 2 + .../dashboards/Ozone - SCM Safemode.json | 636 ++++++++++++++++++ 5 files changed, 667 insertions(+), 1 deletion(-) create mode 100644 hadoop-ozone/dist/src/main/compose/common/grafana/dashboards/Ozone - SCM Safemode.json diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/DataNodeSafeModeRule.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/DataNodeSafeModeRule.java index 63be485e0289..0cd763413e89 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/DataNodeSafeModeRule.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/DataNodeSafeModeRule.java @@ -51,6 +51,7 @@ public DataNodeSafeModeRule(EventQueue eventQueue, requiredDns = conf.getInt( HddsConfigKeys.HDDS_SCM_SAFEMODE_MIN_DATANODE, HddsConfigKeys.HDDS_SCM_SAFEMODE_MIN_DATANODE_DEFAULT); + getSafeModeMetrics().setNumRequiredDatanodesThreshold(requiredDns); registeredDnSet = new HashSet<>(requiredDns * 2); this.nodeManager = nodeManager; } @@ -71,9 +72,14 @@ protected boolean validate() { @Override protected void process(NodeRegistrationContainerReport reportsProto) { - registeredDnSet.add(reportsProto.getDatanodeDetails().getID()); + DatanodeID dnId = reportsProto.getDatanodeDetails().getID(); + boolean added = registeredDnSet.add(dnId); registeredDns = registeredDnSet.size(); + if (added) { + getSafeModeMetrics().incCurrentRegisteredDatanodesCount(); + } + if (scmInSafeMode()) { SCMSafeModeManager.getLogger().info( "SCM in safe mode. {} DataNodes registered, {} required.", diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SCMSafeModeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SCMSafeModeManager.java index bc2a26fbf91c..67d47d101df6 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SCMSafeModeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SCMSafeModeManager.java @@ -119,6 +119,7 @@ public SafeModeMetrics getSafeModeMetrics() { private void emitSafeModeStatus() { final SafeModeStatus safeModeStatus = status.get(); + safeModeMetrics.setScmInSafeMode(safeModeStatus.isInSafeMode()); scmContext.updateSafeModeStatus(safeModeStatus); // notify SCMServiceManager diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeMetrics.java index f5f4ce129923..bf77eaee8bbb 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeMetrics.java @@ -52,6 +52,11 @@ public class SafeModeMetrics { private @Metric MutableCounterLong currentPipelinesWithAtleastOneReplicaReportedCount; + @Metric private MutableGaugeLong scmInSafeMode; + + @Metric private MutableGaugeLong numRequiredDatanodesThreshold; + @Metric private MutableCounterLong currentRegisteredDatanodesCount; + public static SafeModeMetrics create() { final MetricsSystem ms = DefaultMetricsSystem.instance(); return ms.register(SOURCE_NAME, "SCM Safemode Metrics", new SafeModeMetrics()); @@ -86,6 +91,14 @@ public void setNumContainerReportedThreshold(HddsProtos.ReplicationType type, lo } } + public void setScmInSafeMode(boolean inSafeMode) { + this.scmInSafeMode.set(inSafeMode ? 1 : 0); + } + + public void setNumRequiredDatanodesThreshold(long val) { + this.numRequiredDatanodesThreshold.set(val); + } + public void incCurrentContainersWithOneReplicaReportedCount() { this.currentContainersWithOneReplicaReportedCount.incr(); } @@ -94,6 +107,10 @@ public void incCurrentContainersWithECDataReplicaReportedCount() { this.currentContainersWithECDataReplicaReportedCount.incr(); } + public void incCurrentRegisteredDatanodesCount() { + this.currentRegisteredDatanodesCount.incr(); + } + MutableGaugeLong getNumHealthyPipelinesThreshold() { return numHealthyPipelinesThreshold; } @@ -122,6 +139,10 @@ MutableGaugeLong getNumContainerWithECDataReplicaReportedThreshold() { MutableCounterLong getCurrentContainersWithOneReplicaReportedCount() { return currentContainersWithOneReplicaReportedCount; } + + MutableCounterLong getCurrentRegisteredDatanodesCount() { + return currentRegisteredDatanodesCount; + } public void unRegister() { MetricsSystem ms = DefaultMetricsSystem.instance(); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestDataNodeSafeModeRule.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestDataNodeSafeModeRule.java index c62293e76489..bfb804672744 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestDataNodeSafeModeRule.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestDataNodeSafeModeRule.java @@ -65,6 +65,8 @@ private void setup(int requiredDns) throws Exception { eventQueue = new EventQueue(); mockSafeModeManager = mock(SCMSafeModeManager.class); + SafeModeMetrics metrics = mock(SafeModeMetrics.class); + when(mockSafeModeManager.getSafeModeMetrics()).thenReturn(metrics); rule = new DataNodeSafeModeRule(eventQueue, ozoneConfiguration, nodeManager, mockSafeModeManager); assertNotNull(rule); diff --git a/hadoop-ozone/dist/src/main/compose/common/grafana/dashboards/Ozone - SCM Safemode.json b/hadoop-ozone/dist/src/main/compose/common/grafana/dashboards/Ozone - SCM Safemode.json new file mode 100644 index 000000000000..18822b3b9d1c --- /dev/null +++ b/hadoop-ozone/dist/src/main/compose/common/grafana/dashboards/Ozone - SCM Safemode.json @@ -0,0 +1,636 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "prometheus" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 1, + "links": [], + "panels": [ + { + "collapsed": true, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, + "id": 100, + "panels": [ + { + "datasource": { + "type": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Binary", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { "h": 7, "w": 6, "x": 0, "y": 1 }, + "id": 101, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "safe_mode_metrics_scm_in_safe_mode", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{hostname}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "SCM In Safemode (1=yes, 0=no)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Containers", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { "h": 7, "w": 9, "x": 6, "y": 1 }, + "id": 102, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "safe_mode_metrics_num_container_with_one_replica_reported_threshold", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{hostname}} target", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "safe_mode_metrics_current_containers_with_one_replica_reported_count", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{hostname}} actual", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Ratis Containers: Target vs Actual", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Containers", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { "h": 7, "w": 9, "x": 15, "y": 1 }, + "id": 103, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "safe_mode_metrics_num_container_with_e_c_data_replica_reported_threshold", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{hostname}} target", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "safe_mode_metrics_current_containers_with_e_c_data_replica_reported_count", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{hostname}} actual", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "EC Containers: Target vs Actual", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Pipelines", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { "h": 7, "w": 8, "x": 0, "y": 8 }, + "id": 104, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "safe_mode_metrics_num_healthy_pipelines_threshold", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{hostname}} target", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "safe_mode_metrics_current_healthy_pipelines_count", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{hostname}} actual", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Healthy Pipelines: Target vs Actual", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Pipelines", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { "h": 7, "w": 8, "x": 8, "y": 8 }, + "id": 105, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "safe_mode_metrics_num_pipelines_with_atleast_one_replica_reported_threshold", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{hostname}} target", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "safe_mode_metrics_current_pipelines_with_atleast_one_replica_reported_count", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{hostname}} actual", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "One-Replica Pipelines: Target vs Actual", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "DataNodes", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { "h": 7, "w": 8, "x": 16, "y": 8 }, + "id": 106, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "safe_mode_metrics_num_required_datanodes_threshold", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{hostname}} target", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "safe_mode_metrics_current_registered_datanodes_count", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{hostname}} actual", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Registered DataNodes: Target vs Actual", + "type": "timeseries" + } + ], + "title": "SCM Safemode: Summary", + "type": "row" + } + ], + "preload": false, + "refresh": "45s", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Ozone - SCM Safemode", + "weekStart": "" +} From f15eb49d95fcf388ff3ad9a8b4dccc079858f32a Mon Sep 17 00:00:00 2001 From: Sreeja Chintalapati Date: Mon, 1 Dec 2025 10:20:27 +0530 Subject: [PATCH 2/7] Fixed metric name --- .../common/grafana/dashboards/Ozone - SCM Safemode.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-ozone/dist/src/main/compose/common/grafana/dashboards/Ozone - SCM Safemode.json b/hadoop-ozone/dist/src/main/compose/common/grafana/dashboards/Ozone - SCM Safemode.json index 18822b3b9d1c..5dbd3abe4ce9 100644 --- a/hadoop-ozone/dist/src/main/compose/common/grafana/dashboards/Ozone - SCM Safemode.json +++ b/hadoop-ozone/dist/src/main/compose/common/grafana/dashboards/Ozone - SCM Safemode.json @@ -291,7 +291,7 @@ { "disableTextWrap": false, "editorMode": "builder", - "expr": "safe_mode_metrics_num_container_with_e_c_data_replica_reported_threshold", + "expr": "safe_mode_metrics_num_container_with_ec_data_replica_reported_threshold", "fullMetaSearch": false, "includeNullMetadata": true, "legendFormat": "{{hostname}} target", @@ -302,7 +302,7 @@ { "disableTextWrap": false, "editorMode": "builder", - "expr": "safe_mode_metrics_current_containers_with_e_c_data_replica_reported_count", + "expr": "safe_mode_metrics_current_containers_with_ec_data_replica_reported_count", "fullMetaSearch": false, "includeNullMetadata": true, "legendFormat": "{{hostname}} actual", From 1f1fd6faed11c605b757a19d57857f801676a300 Mon Sep 17 00:00:00 2001 From: Sreeja Chintalapati Date: Fri, 5 Dec 2025 23:48:53 +0530 Subject: [PATCH 3/7] Updated grafana dashboard --- .../grafana/dashboards/Ozone - SCM Safemode.json | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/hadoop-ozone/dist/src/main/compose/common/grafana/dashboards/Ozone - SCM Safemode.json b/hadoop-ozone/dist/src/main/compose/common/grafana/dashboards/Ozone - SCM Safemode.json index 5dbd3abe4ce9..02837542417d 100644 --- a/hadoop-ozone/dist/src/main/compose/common/grafana/dashboards/Ozone - SCM Safemode.json +++ b/hadoop-ozone/dist/src/main/compose/common/grafana/dashboards/Ozone - SCM Safemode.json @@ -31,6 +31,9 @@ }, "fieldConfig": { "defaults": { + "min": 0, + "max": 1, + "decimals": 0, "color": { "mode": "palette-classic" }, @@ -120,6 +123,8 @@ }, "fieldConfig": { "defaults": { + "min": 0, + "decimals": 0, "color": { "mode": "palette-classic" }, @@ -220,6 +225,8 @@ }, "fieldConfig": { "defaults": { + "min": 0, + "decimals": 0, "color": { "mode": "palette-classic" }, @@ -320,6 +327,8 @@ }, "fieldConfig": { "defaults": { + "min": 0, + "decimals": 0, "color": { "mode": "palette-classic" }, @@ -420,6 +429,8 @@ }, "fieldConfig": { "defaults": { + "min": 0, + "decimals": 0, "color": { "mode": "palette-classic" }, @@ -520,6 +531,8 @@ }, "fieldConfig": { "defaults": { + "min": 0, + "decimals": 0, "color": { "mode": "palette-classic" }, From 04a7b864fd644aea434e0466471d63d9d563d8ab Mon Sep 17 00:00:00 2001 From: Sreeja Chintalapati Date: Fri, 12 Dec 2025 13:19:19 +0530 Subject: [PATCH 4/7] Added test coverage for metrics and minor update to dashboard --- .../hdds/scm/safemode/SafeModeMetrics.java | 8 ++++- .../safemode/TestDataNodeSafeModeRule.java | 8 +++-- .../scm/safemode/TestSCMSafeModeManager.java | 30 +++++++++++++++---- .../dashboards/Ozone - SCM Safemode.json | 2 +- 4 files changed, 38 insertions(+), 10 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeMetrics.java index bf77eaee8bbb..ae65eafcb910 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeMetrics.java @@ -22,6 +22,7 @@ import org.apache.hadoop.metrics2.annotation.Metric; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.MutableCounterLong; +import org.apache.hadoop.metrics2.lib.MutableGaugeInt; import org.apache.hadoop.metrics2.lib.MutableGaugeLong; /** @@ -52,7 +53,8 @@ public class SafeModeMetrics { private @Metric MutableCounterLong currentPipelinesWithAtleastOneReplicaReportedCount; - @Metric private MutableGaugeLong scmInSafeMode; + @Metric("Metric will be set to 1 if SCM is in SafeMode, otherwise 0") + private MutableGaugeInt scmInSafeMode; @Metric private MutableGaugeLong numRequiredDatanodesThreshold; @Metric private MutableCounterLong currentRegisteredDatanodesCount; @@ -144,6 +146,10 @@ MutableCounterLong getCurrentRegisteredDatanodesCount() { return currentRegisteredDatanodesCount; } + MutableGaugeInt getScmInSafeMode() { + return scmInSafeMode; + } + public void unRegister() { MetricsSystem ms = DefaultMetricsSystem.instance(); ms.unregisterSource(SOURCE_NAME); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestDataNodeSafeModeRule.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestDataNodeSafeModeRule.java index bfb804672744..011e97aac99f 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestDataNodeSafeModeRule.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestDataNodeSafeModeRule.java @@ -17,6 +17,7 @@ package org.apache.hadoop.hdds.scm.safemode; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -53,6 +54,7 @@ public class TestDataNodeSafeModeRule { private EventQueue eventQueue; private NodeManager nodeManager; private SCMSafeModeManager mockSafeModeManager; + private SafeModeMetrics metrics; private void setup(int requiredDns) throws Exception { OzoneConfiguration ozoneConfiguration = new OzoneConfiguration(); @@ -65,7 +67,7 @@ private void setup(int requiredDns) throws Exception { eventQueue = new EventQueue(); mockSafeModeManager = mock(SCMSafeModeManager.class); - SafeModeMetrics metrics = mock(SafeModeMetrics.class); + metrics = SafeModeMetrics.create(); when(mockSafeModeManager.getSafeModeMetrics()).thenReturn(metrics); rule = new DataNodeSafeModeRule(eventQueue, ozoneConfiguration, nodeManager, mockSafeModeManager); @@ -96,6 +98,7 @@ public void testDataNodeSafeModeRuleWithNoNodes() throws Exception { "SCM in safe mode. 1 DataNodes registered, 1 required."), 1000, 5000); assertTrue(rule.validate()); + assertEquals(1, metrics.getCurrentRegisteredDatanodesCount().value()); } @Test @@ -122,7 +125,7 @@ public void testDataNodeSafeModeRuleWithMultipleNodes() throws Exception { "SCM in safe mode. 2 DataNodes registered, 3 required."), 1000, 5000); assertFalse(rule.validate()); - + assertEquals(2, metrics.getCurrentRegisteredDatanodesCount().value()); DatanodeDetails dd = MockDatanodeDetails.randomDatanodeDetails(); NodeRegistrationContainerReport nodeReg = new NodeRegistrationContainerReport(dd, null); @@ -133,6 +136,7 @@ public void testDataNodeSafeModeRuleWithMultipleNodes() throws Exception { "SCM in safe mode. 3 DataNodes registered, 3 required."), 1000, 5000); assertTrue(rule.validate()); + assertEquals(3, metrics.getCurrentRegisteredDatanodesCount().value()); } @Test diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java index 1cbd6bc3725b..0714f38cfdaf 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java @@ -109,6 +109,9 @@ public void setUp() throws IOException { @AfterEach public void destroyDbStore() throws Exception { + if (scmSafeModeManager != null) { + scmSafeModeManager.getSafeModeMetrics().unRegister(); + } if (scmMetadataStore.getStore() != null) { scmMetadataStore.getStore().close(); } @@ -136,6 +139,7 @@ private void testSafeMode(int numContainers) throws Exception { scmSafeModeManager.start(); assertTrue(scmSafeModeManager.getInSafeMode()); + assertEquals(1, scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value()); validateRuleStatus("DatanodeSafeModeRule", "registered datanodes 0"); SCMDatanodeProtocolServer.NodeRegistrationContainerReport nodeRegistrationContainerReport = HddsTestUtils.createNodeRegistrationContainerReport(containers); @@ -149,7 +153,8 @@ private void testSafeMode(int numContainers) throws Exception { assertEquals(cutOff, scmSafeModeManager.getSafeModeMetrics() .getNumContainerWithOneReplicaReportedThreshold().value()); - GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode(), + GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode() && + scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value() == 0, 100, 1000 * 5); assertEquals(cutOff, scmSafeModeManager.getSafeModeMetrics() @@ -182,6 +187,7 @@ public void testSafeModeExitRule() throws Exception { .getNumContainerWithOneReplicaReportedThreshold().value()); assertTrue(scmSafeModeManager.getInSafeMode()); + assertEquals(1, scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value()); validateRuleStatus("ContainerSafeModeRule", "0.00% of [Ratis] Containers(0 / 100) with at least one reported"); testContainerThreshold(containers.subList(0, 25), 0.25); @@ -200,7 +206,8 @@ public void testSafeModeExitRule() throws Exception { assertEquals(100, scmSafeModeManager.getSafeModeMetrics() .getCurrentContainersWithOneReplicaReportedCount().value()); - GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode(), + GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode() && + scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value() == 0, 100, 1000 * 5); } @@ -306,6 +313,7 @@ public void testSafeModeExitRuleWithPipelineAvailabilityCheck( scmSafeModeManager.start(); assertTrue(scmSafeModeManager.getInSafeMode()); + assertEquals(1, scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value()); if (healthyPipelinePercent > 0) { validateRuleStatus("HealthyPipelineSafeModeRule", "healthy Ratis/THREE pipelines"); @@ -365,7 +373,8 @@ public void testSafeModeExitRuleWithPipelineAvailabilityCheck( .getCurrentPipelinesWithAtleastOneReplicaCount().value()); - GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode(), + GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode() && + scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value() == 0, 100, 1000 * 5); } @@ -477,8 +486,10 @@ public void testContainerSafeModeRule() throws Exception { scmSafeModeManager = new SCMSafeModeManager(config, null, null, containerManager, serviceManager, queue, scmContext); + scmSafeModeManager.start(); assertTrue(scmSafeModeManager.getInSafeMode()); + assertEquals(1, scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value()); // When 10 CLOSED containers are reported by DNs, the computed container // threshold should be 10/20 as there are only 20 CLOSED NON-EMPTY @@ -492,7 +503,8 @@ public void testContainerSafeModeRule() throws Exception { // the container threshold should be (10+10)/20. testContainerThreshold(containers.subList(10, 25), 1.0); - GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode(), + GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode() && + scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value() == 0, 100, 1000 * 5); } @@ -584,6 +596,7 @@ private void testSafeModeDataNodes(int numOfDns) throws Exception { // Assert SCM is in Safe mode. assertTrue(scmSafeModeManager.getInSafeMode()); + assertEquals(1, scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value()); // Register all DataNodes except last one and assert SCM is in safe mode. for (int i = 0; i < numOfDns - 1; i++) { @@ -604,7 +617,8 @@ private void testSafeModeDataNodes(int numOfDns) throws Exception { // Register last DataNode and check that SCM is out of Safe mode. queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT, HddsTestUtils.createNodeRegistrationContainerReport(containers)); - GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode(), + GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode() && + scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value() == 0, 10, 1000 * 10); } @@ -700,10 +714,12 @@ public void testSafeModePipelineExitRule() throws Exception { assertTrue(scmSafeModeManager.getInSafeMode()); + assertEquals(1, scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value()); firePipelineEvent(pipelineManager, pipeline); - GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode(), + GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode() && + scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value() == 0, 100, 1000 * 10); pipelineManager.close(); } @@ -744,6 +760,7 @@ public void testPipelinesNotCreatedUntilPreCheckPasses() throws Exception { // Assert SCM is in Safe mode. assertTrue(scmSafeModeManager.getInSafeMode()); + assertEquals(1, scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value()); // stop background pipeline creator as we manually create // pipeline below @@ -781,5 +798,6 @@ public void testPipelinesNotCreatedUntilPreCheckPasses() throws Exception { queue.processAll(5000); assertTrue(scmSafeModeManager.getPreCheckComplete()); assertFalse(scmSafeModeManager.getInSafeMode()); + assertEquals(0, scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value()); } } diff --git a/hadoop-ozone/dist/src/main/compose/common/grafana/dashboards/Ozone - SCM Safemode.json b/hadoop-ozone/dist/src/main/compose/common/grafana/dashboards/Ozone - SCM Safemode.json index 02837542417d..27442a3d8466 100644 --- a/hadoop-ozone/dist/src/main/compose/common/grafana/dashboards/Ozone - SCM Safemode.json +++ b/hadoop-ozone/dist/src/main/compose/common/grafana/dashboards/Ozone - SCM Safemode.json @@ -21,7 +21,7 @@ "links": [], "panels": [ { - "collapsed": true, + "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, "id": 100, "panels": [ From 3f3c876318fd77cc736fb5b89b8b363dddbfec1e Mon Sep 17 00:00:00 2001 From: Sreeja Chintalapati Date: Thu, 18 Dec 2025 11:36:21 +0530 Subject: [PATCH 5/7] Updated test --- .../scm/safemode/TestSCMSafeModeManager.java | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java index 0714f38cfdaf..f74a0e4bdfd9 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java @@ -153,9 +153,9 @@ private void testSafeMode(int numContainers) throws Exception { assertEquals(cutOff, scmSafeModeManager.getSafeModeMetrics() .getNumContainerWithOneReplicaReportedThreshold().value()); - GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode() && - scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value() == 0, + GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode(), 100, 1000 * 5); + assertEquals(0, scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value()); assertEquals(cutOff, scmSafeModeManager.getSafeModeMetrics() .getCurrentContainersWithOneReplicaReportedCount().value()); @@ -206,9 +206,9 @@ public void testSafeModeExitRule() throws Exception { assertEquals(100, scmSafeModeManager.getSafeModeMetrics() .getCurrentContainersWithOneReplicaReportedCount().value()); - GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode() && - scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value() == 0, + GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode(), 100, 1000 * 5); + assertEquals(0, scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value()); } private OzoneConfiguration createConf(double healthyPercent, @@ -373,9 +373,9 @@ public void testSafeModeExitRuleWithPipelineAvailabilityCheck( .getCurrentPipelinesWithAtleastOneReplicaCount().value()); - GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode() && - scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value() == 0, + GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode(), 100, 1000 * 5); + assertEquals(0, scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value()); } /** @@ -503,9 +503,9 @@ public void testContainerSafeModeRule() throws Exception { // the container threshold should be (10+10)/20. testContainerThreshold(containers.subList(10, 25), 1.0); - GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode() && - scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value() == 0, + GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode(), 100, 1000 * 5); + assertEquals(0, scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value()); } // We simulate common EC types: EC-2-2-1024K, EC-3-2-1024K, EC-6-3-1024K. @@ -617,9 +617,10 @@ private void testSafeModeDataNodes(int numOfDns) throws Exception { // Register last DataNode and check that SCM is out of Safe mode. queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT, HddsTestUtils.createNodeRegistrationContainerReport(containers)); - GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode() && - scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value() == 0, + GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode(), 10, 1000 * 10); + queue.processAll(5000); + assertEquals(0, scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value()); } private void testContainerThreshold(List dnContainers, @@ -718,9 +719,9 @@ public void testSafeModePipelineExitRule() throws Exception { firePipelineEvent(pipelineManager, pipeline); - GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode() && - scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value() == 0, + GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode(), 100, 1000 * 10); + assertEquals(0, scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value()); pipelineManager.close(); } From 476656d30198de0afa4f9a42c10d2e2b97c203df Mon Sep 17 00:00:00 2001 From: Sreeja Chintalapati Date: Tue, 30 Dec 2025 10:17:27 +0530 Subject: [PATCH 6/7] Updated grafana dashboard --- .../dashboards/Ozone - SCM Safemode.json | 1231 +++++++++-------- 1 file changed, 674 insertions(+), 557 deletions(-) diff --git a/hadoop-ozone/dist/src/main/compose/common/grafana/dashboards/Ozone - SCM Safemode.json b/hadoop-ozone/dist/src/main/compose/common/grafana/dashboards/Ozone - SCM Safemode.json index 27442a3d8466..ac0c291b83a6 100644 --- a/hadoop-ozone/dist/src/main/compose/common/grafana/dashboards/Ozone - SCM Safemode.json +++ b/hadoop-ozone/dist/src/main/compose/common/grafana/dashboards/Ozone - SCM Safemode.json @@ -20,616 +20,733 @@ "id": 1, "links": [], "panels": [ + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, + "id": 100, + "panels": [], + "title": "SCM Safemode: Summary", + "type": "row" + }, { - "collapsed": false, - "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, - "id": 100, - "panels": [ - { - "datasource": { - "type": "prometheus" + "datasource": { + "type": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - "fieldConfig": { - "defaults": { - "min": 0, - "max": 1, - "decimals": 0, - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "Binary", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "unit": "short", + "decimals": 0, + "mappings": [ + { + "options": { + "0": { + "color": "green", + "text": "Exited safemode" }, - "thresholdsStyle": { - "mode": "off" + "1": { + "color": "red", + "text": "In Safemode" } }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { "h": 7, "w": 6, "x": 0, "y": 1 }, - "id": 101, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "disableTextWrap": false, - "editorMode": "builder", - "expr": "safe_mode_metrics_scm_in_safe_mode", - "fullMetaSearch": false, - "includeNullMetadata": true, - "legendFormat": "{{hostname}}", - "range": true, - "refId": "A", - "useBackend": false + "type": "value" } ], - "title": "SCM In Safemode (1=yes, 0=no)", - "type": "timeseries" + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 1 + } + ] + } }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 1 }, + "id": 101, + "options": { + "alignValue": "center", + "legend": { + "displayMode": "hidden", + "placement": "bottom", + "showLegend": true + }, + "mergeValues": true, + "rowHeight": 0.9, + "showValue": "always", + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ { - "datasource": { - "type": "prometheus" + "disableTextWrap": false, + "editorMode": "builder", + "expr": "safe_mode_metrics_scm_in_safe_mode", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{hostname}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "SCM Safemode Status", + "type": "state-timeline" + }, + { + "datasource": { + "type": "prometheus" + }, + "fieldConfig": { + "defaults": { + "min": 0, + "decimals": 0, + "color": { + "mode": "palette-classic" }, - "fieldConfig": { - "defaults": { - "min": 0, - "decimals": 0, - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "Containers", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Containers", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "overrides": [] - }, - "gridPos": { "h": 7, "w": 9, "x": 6, "y": 1 }, - "id": 102, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "disableTextWrap": false, - "editorMode": "builder", - "expr": "safe_mode_metrics_num_container_with_one_replica_reported_threshold", - "fullMetaSearch": false, - "includeNullMetadata": true, - "legendFormat": "{{hostname}} target", - "range": true, - "refId": "A", - "useBackend": false + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - { - "disableTextWrap": false, - "editorMode": "builder", - "expr": "safe_mode_metrics_current_containers_with_one_replica_reported_count", - "fullMetaSearch": false, - "includeNullMetadata": true, - "legendFormat": "{{hostname}} actual", - "range": true, - "refId": "B", - "useBackend": false + "thresholdsStyle": { + "mode": "off" } - ], - "title": "Ratis Containers: Target vs Actual", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus" }, - "fieldConfig": { - "defaults": { - "min": 0, - "decimals": 0, - "color": { - "mode": "palette-classic" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "Containers", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Target Threshold" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "dash": [10, 10], + "fill": "dash" } }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + { + "id": "custom.lineWidth", + "value": 3 + }, + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } } - }, - "overrides": [] + ] + } + ] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 1 }, + "id": 102, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "max(safe_mode_metrics_num_container_with_one_replica_reported_threshold)", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "Target Threshold", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "safe_mode_metrics_current_containers_with_one_replica_reported_count", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{hostname}} actual", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Ratis Containers: Target vs Actual", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus" + }, + "fieldConfig": { + "defaults": { + "min": 0, + "decimals": 0, + "color": { + "mode": "palette-classic" }, - "gridPos": { "h": 7, "w": 9, "x": 15, "y": 1 }, - "id": 103, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Containers", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "disableTextWrap": false, - "editorMode": "builder", - "expr": "safe_mode_metrics_num_container_with_ec_data_replica_reported_threshold", - "fullMetaSearch": false, - "includeNullMetadata": true, - "legendFormat": "{{hostname}} target", - "range": true, - "refId": "A", - "useBackend": false + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - { - "disableTextWrap": false, - "editorMode": "builder", - "expr": "safe_mode_metrics_current_containers_with_ec_data_replica_reported_count", - "fullMetaSearch": false, - "includeNullMetadata": true, - "legendFormat": "{{hostname}} actual", - "range": true, - "refId": "B", - "useBackend": false + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - ], - "title": "EC Containers: Target vs Actual", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus" }, - "fieldConfig": { - "defaults": { - "min": 0, - "decimals": 0, - "color": { - "mode": "palette-classic" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "Pipelines", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Target Threshold" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "dash": [10, 10], + "fill": "dash" } }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + { + "id": "custom.lineWidth", + "value": 3 + }, + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } } - }, - "overrides": [] + ] + } + ] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 9 }, + "id": 103, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "max(safe_mode_metrics_num_container_with_ec_data_replica_reported_threshold)", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "Target Threshold", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "safe_mode_metrics_current_containers_with_ec_data_replica_reported_count", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{hostname}} actual", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "EC Containers: Target vs Actual", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus" + }, + "fieldConfig": { + "defaults": { + "min": 0, + "decimals": 0, + "color": { + "mode": "palette-classic" }, - "gridPos": { "h": 7, "w": 8, "x": 0, "y": 8 }, - "id": 104, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Pipelines", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "disableTextWrap": false, - "editorMode": "builder", - "expr": "safe_mode_metrics_num_healthy_pipelines_threshold", - "fullMetaSearch": false, - "includeNullMetadata": true, - "legendFormat": "{{hostname}} target", - "range": true, - "refId": "A", - "useBackend": false + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - { - "disableTextWrap": false, - "editorMode": "builder", - "expr": "safe_mode_metrics_current_healthy_pipelines_count", - "fullMetaSearch": false, - "includeNullMetadata": true, - "legendFormat": "{{hostname}} actual", - "range": true, - "refId": "B", - "useBackend": false + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - ], - "title": "Healthy Pipelines: Target vs Actual", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus" }, - "fieldConfig": { - "defaults": { - "min": 0, - "decimals": 0, - "color": { - "mode": "palette-classic" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "Pipelines", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Target Threshold" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "dash": [10, 10], + "fill": "dash" } }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + { + "id": "custom.lineWidth", + "value": 3 + }, + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } } - }, - "overrides": [] + ] + } + ] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 9 }, + "id": 104, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "max(safe_mode_metrics_num_healthy_pipelines_threshold)", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "Target Threshold", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "safe_mode_metrics_current_healthy_pipelines_count", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{hostname}} actual", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Healthy Pipelines: Target vs Actual", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus" + }, + "fieldConfig": { + "defaults": { + "min": 0, + "decimals": 0, + "color": { + "mode": "palette-classic" }, - "gridPos": { "h": 7, "w": 8, "x": 8, "y": 8 }, - "id": 105, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Pipelines", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "disableTextWrap": false, - "editorMode": "builder", - "expr": "safe_mode_metrics_num_pipelines_with_atleast_one_replica_reported_threshold", - "fullMetaSearch": false, - "includeNullMetadata": true, - "legendFormat": "{{hostname}} target", - "range": true, - "refId": "A", - "useBackend": false + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - { - "disableTextWrap": false, - "editorMode": "builder", - "expr": "safe_mode_metrics_current_pipelines_with_atleast_one_replica_reported_count", - "fullMetaSearch": false, - "includeNullMetadata": true, - "legendFormat": "{{hostname}} actual", - "range": true, - "refId": "B", - "useBackend": false + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - ], - "title": "One-Replica Pipelines: Target vs Actual", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus" }, - "fieldConfig": { - "defaults": { - "min": 0, - "decimals": 0, - "color": { - "mode": "palette-classic" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "DataNodes", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Target Threshold" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "dash": [10, 10], + "fill": "dash" } }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + { + "id": "custom.lineWidth", + "value": 3 + }, + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } } - }, - "overrides": [] + ] + } + ] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 17 }, + "id": 105, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "max(safe_mode_metrics_num_pipelines_with_atleast_one_replica_reported_threshold)", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "Target Threshold", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "safe_mode_metrics_current_pipelines_with_atleast_one_replica_reported_count", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{hostname}} actual", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "One-Replica Pipelines: Target vs Actual", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus" + }, + "fieldConfig": { + "defaults": { + "min": 0, + "decimals": 0, + "color": { + "mode": "palette-classic" }, - "gridPos": { "h": 7, "w": 8, "x": 16, "y": 8 }, - "id": 106, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "DataNodes", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "tooltip": { - "mode": "single", - "sort": "none" + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, - "targets": [ - { - "disableTextWrap": false, - "editorMode": "builder", - "expr": "safe_mode_metrics_num_required_datanodes_threshold", - "fullMetaSearch": false, - "includeNullMetadata": true, - "legendFormat": "{{hostname}} target", - "range": true, - "refId": "A", - "useBackend": false + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Target Threshold" }, - { - "disableTextWrap": false, - "editorMode": "builder", - "expr": "safe_mode_metrics_current_registered_datanodes_count", - "fullMetaSearch": false, - "includeNullMetadata": true, - "legendFormat": "{{hostname}} actual", - "range": true, - "refId": "B", - "useBackend": false - } - ], - "title": "Registered DataNodes: Target vs Actual", - "type": "timeseries" + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "dash": [10, 10], + "fill": "dash" + } + }, + { + "id": "custom.lineWidth", + "value": 3 + }, + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 17 }, + "id": 106, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "max(safe_mode_metrics_num_required_datanodes_threshold)", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "Target Threshold", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "safe_mode_metrics_current_registered_datanodes_count", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{hostname}} actual", + "range": true, + "refId": "B", + "useBackend": false } ], - "title": "SCM Safemode: Summary", - "type": "row" + "title": "Registered DataNodes: Target vs Actual", + "type": "timeseries" } ], "preload": false, From 03d0292d9660afacc89370e04e897e83c30a44f9 Mon Sep 17 00:00:00 2001 From: Sreeja Chintalapati Date: Mon, 5 Jan 2026 09:54:39 +0530 Subject: [PATCH 7/7] Fixed test issues --- .../scm/safemode/TestSCMSafeModeManager.java | 29 ++++++++++++++----- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java index f74a0e4bdfd9..1ef531f8bf85 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java @@ -155,7 +155,9 @@ private void testSafeMode(int numContainers) throws Exception { GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode(), 100, 1000 * 5); - assertEquals(0, scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value()); + GenericTestUtils.waitFor(() -> + scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value() == 0, + 100, 1000 * 5); assertEquals(cutOff, scmSafeModeManager.getSafeModeMetrics() .getCurrentContainersWithOneReplicaReportedCount().value()); @@ -208,7 +210,9 @@ public void testSafeModeExitRule() throws Exception { GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode(), 100, 1000 * 5); - assertEquals(0, scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value()); + GenericTestUtils.waitFor(() -> + scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value() == 0, + 100, 1000 * 5); } private OzoneConfiguration createConf(double healthyPercent, @@ -375,7 +379,9 @@ public void testSafeModeExitRuleWithPipelineAvailabilityCheck( GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode(), 100, 1000 * 5); - assertEquals(0, scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value()); + GenericTestUtils.waitFor(() -> + scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value() == 0, + 100, 1000 * 5); } /** @@ -505,7 +511,9 @@ public void testContainerSafeModeRule() throws Exception { GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode(), 100, 1000 * 5); - assertEquals(0, scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value()); + GenericTestUtils.waitFor(() -> + scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value() == 0, + 100, 1000 * 5); } // We simulate common EC types: EC-2-2-1024K, EC-3-2-1024K, EC-6-3-1024K. @@ -619,8 +627,9 @@ private void testSafeModeDataNodes(int numOfDns) throws Exception { HddsTestUtils.createNodeRegistrationContainerReport(containers)); GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode(), 10, 1000 * 10); - queue.processAll(5000); - assertEquals(0, scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value()); + GenericTestUtils.waitFor(() -> + scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value() == 0, + 100, 1000 * 5); } private void testContainerThreshold(List dnContainers, @@ -721,7 +730,9 @@ public void testSafeModePipelineExitRule() throws Exception { GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode(), 100, 1000 * 10); - assertEquals(0, scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value()); + GenericTestUtils.waitFor(() -> + scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value() == 0, + 100, 1000 * 5); pipelineManager.close(); } @@ -799,6 +810,8 @@ public void testPipelinesNotCreatedUntilPreCheckPasses() throws Exception { queue.processAll(5000); assertTrue(scmSafeModeManager.getPreCheckComplete()); assertFalse(scmSafeModeManager.getInSafeMode()); - assertEquals(0, scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value()); + GenericTestUtils.waitFor(() -> + scmSafeModeManager.getSafeModeMetrics().getScmInSafeMode().value() == 0, + 100, 1000 * 5); } }