From 6cd41bbfd75dcbbdd083308860b0ffa8ad1bb939 Mon Sep 17 00:00:00 2001 From: Arun Sarin Date: Fri, 24 Apr 2026 00:59:56 +0530 Subject: [PATCH 1/4] HDDS-15004. Stabilize TestReconContainerEndpoint#testContainerEndpointForOBSBucket --- .../recon/TestReconContainerEndpoint.java | 46 ++++++++++++++++--- 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconContainerEndpoint.java b/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconContainerEndpoint.java index a8863046f6ee..92fe2498e014 100644 --- a/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconContainerEndpoint.java +++ b/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconContainerEndpoint.java @@ -36,11 +36,14 @@ import org.apache.hadoop.ozone.om.OMConfigKeys; import org.apache.hadoop.ozone.om.helpers.BucketLayout; import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyArgs; +import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; import org.apache.hadoop.ozone.recon.api.ContainerEndpoint; import org.apache.hadoop.ozone.recon.api.types.KeyMetadata; import org.apache.hadoop.ozone.recon.api.types.KeysResponse; import org.apache.hadoop.ozone.recon.recovery.ReconOMMetadataManager; import org.apache.hadoop.ozone.recon.spi.impl.OzoneManagerServiceProviderImpl; +import org.apache.hadoop.ozone.recon.tasks.ContainerKeyMapperHelper; import org.apache.hadoop.ozone.recon.tasks.ReconTaskControllerImpl; import org.apache.ozone.test.GenericTestUtils; import org.junit.jupiter.api.AfterEach; @@ -60,6 +63,9 @@ public class TestReconContainerEndpoint { @BeforeEach public void init() throws Exception { + // ContainerKeyMapper tasks share static maps/flags across the JVM; reset so a + // prior test method cannot break mapper state for this cluster instance. + ContainerKeyMapperHelper.clearSharedContainerCountMap(); OzoneConfiguration conf = new OzoneConfiguration(); conf.set(OMConfigKeys.OZONE_DEFAULT_BUCKET_LAYOUT, OMConfigKeys.OZONE_BUCKET_LAYOUT_FILE_SYSTEM_OPTIMIZED); @@ -77,11 +83,15 @@ public void init() throws Exception { @AfterEach public void shutdown() throws IOException { - if (client != null) { - client.close(); - } - if (cluster != null) { - cluster.shutdown(); + try { + if (client != null) { + client.close(); + } + if (cluster != null) { + cluster.shutdown(); + } + } finally { + ContainerKeyMapperHelper.clearSharedContainerCountMap(); } } @@ -117,6 +127,9 @@ public void testContainerEndpointForFSOLayout() throws Exception { CompletableFuture completableFuture = omMetaManagerUtils.waitForEventBufferEmpty(reconTaskController.getEventBuffer()); GenericTestUtils.waitFor(completableFuture::isDone, 100, 30000); + completableFuture.join(); + // The buffer can be empty while tasks still finish processing a dequeued batch. + Thread.sleep(2000); //Search for the bucket from the bucket table and verify its FSO OmBucketInfo bucketInfo = cluster.getOzoneManager().getBucketInfo(volName, bucketName); @@ -186,14 +199,17 @@ public void testContainerEndpointForOBSBucket() throws Exception { CompletableFuture completableFuture = omMetaManagerUtils.waitForEventBufferEmpty(reconTaskController.getEventBuffer()); GenericTestUtils.waitFor(completableFuture::isDone, 100, 30000); + completableFuture.join(); + Thread.sleep(2000); // Search for the bucket from the bucket table and verify its OBS OmBucketInfo bucketInfo = cluster.getOzoneManager().getBucketInfo(volumeName, obsBucketName); assertNotNull(bucketInfo); assertEquals(BucketLayout.OBJECT_STORE, bucketInfo.getBucketLayout()); - // Initialize the ContainerEndpoint - long containerId = 1L; + long containerId = getContainerIdForKey(volumeName, obsBucketName, + obsSingleFileKey); + Response response = getContainerEndpointResponse(containerId); assertNotNull(response, "Response should not be null."); @@ -226,6 +242,22 @@ private Response getContainerEndpointResponse(long containerId) { return containerEndpoint.getKeysForContainer(containerId, 10, ""); } + private long getContainerIdForKey(String volumeName, String bucketName, + String keyName) throws IOException { + OmKeyArgs keyArgs = new OmKeyArgs.Builder() + .setVolumeName(volumeName) + .setBucketName(bucketName) + .setKeyName(keyName) + .build(); + OmKeyLocationInfo location = cluster.getOzoneManager() + .lookupKey(keyArgs) + .getKeyLocationVersions() + .get(0) + .getBlocksLatestVersionOnly() + .get(0); + return location.getContainerID(); + } + private void writeTestData(String volumeName, String bucketName, String keyPath, String data) throws Exception { try (OzoneOutputStream out = client.getObjectStore().getVolume(volumeName) From 5424549992cc8a35d297da7d056c99613a499805 Mon Sep 17 00:00:00 2001 From: Arun Sarin Date: Wed, 29 Apr 2026 23:05:56 +0530 Subject: [PATCH 2/4] HDDS-15004. Addressed review comments --- .../recon/TestReconContainerEndpoint.java | 37 ++++++++++++++----- .../recon/TestReconOmMetaManagerUtils.java | 28 ++++++++++++++ 2 files changed, 56 insertions(+), 9 deletions(-) diff --git a/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconContainerEndpoint.java b/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconContainerEndpoint.java index 92fe2498e014..8c4f62041d86 100644 --- a/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconContainerEndpoint.java +++ b/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconContainerEndpoint.java @@ -23,9 +23,12 @@ import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.Collection; +import java.util.HashMap; +import java.util.Map; import java.util.concurrent.CompletableFuture; import javax.ws.rs.core.Response; import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.utils.IOUtils; import org.apache.hadoop.hdds.scm.server.OzoneStorageContainerManager; import org.apache.hadoop.ozone.MiniOzoneCluster; import org.apache.hadoop.ozone.client.BucketArgs; @@ -42,6 +45,7 @@ import org.apache.hadoop.ozone.recon.api.types.KeyMetadata; import org.apache.hadoop.ozone.recon.api.types.KeysResponse; import org.apache.hadoop.ozone.recon.recovery.ReconOMMetadataManager; +import org.apache.hadoop.ozone.recon.spi.ReconContainerMetadataManager; import org.apache.hadoop.ozone.recon.spi.impl.OzoneManagerServiceProviderImpl; import org.apache.hadoop.ozone.recon.tasks.ContainerKeyMapperHelper; import org.apache.hadoop.ozone.recon.tasks.ReconTaskControllerImpl; @@ -84,9 +88,7 @@ public void init() throws Exception { @AfterEach public void shutdown() throws IOException { try { - if (client != null) { - client.close(); - } + IOUtils.closeQuietly(client); if (cluster != null) { cluster.shutdown(); } @@ -128,8 +130,8 @@ public void testContainerEndpointForFSOLayout() throws Exception { omMetaManagerUtils.waitForEventBufferEmpty(reconTaskController.getEventBuffer()); GenericTestUtils.waitFor(completableFuture::isDone, 100, 30000); completableFuture.join(); - // The buffer can be empty while tasks still finish processing a dequeued batch. - Thread.sleep(2000); + waitUntilReconIndexesKeysForPaths(volName, bucketName, + nestedDirKey, singleFileKey); //Search for the bucket from the bucket table and verify its FSO OmBucketInfo bucketInfo = cluster.getOzoneManager().getBucketInfo(volName, bucketName); @@ -137,8 +139,7 @@ public void testContainerEndpointForFSOLayout() throws Exception { assertEquals(BucketLayout.FILE_SYSTEM_OPTIMIZED, bucketInfo.getBucketLayout()); - // Assuming a known container ID that these keys have been written into - long testContainerID = 1L; + long testContainerID = getContainerIdForKey(volName, bucketName, nestedDirKey); // Query the ContainerEndpoint for the keys in the specified container Response response = getContainerEndpointResponse(testContainerID); @@ -158,7 +159,7 @@ public void testContainerEndpointForFSOLayout() throws Exception { assertEquals("file1", keyMetadata.getKey()); assertEquals("testvol/fsobucket/dir1/dir2/dir3/file1", keyMetadata.getCompletePath()); - testContainerID = 2L; + testContainerID = getContainerIdForKey(volName, bucketName, singleFileKey); response = getContainerEndpointResponse(testContainerID); data = (KeysResponse) response.getEntity(); keyMetadataList = data.getKeys(); @@ -200,7 +201,7 @@ public void testContainerEndpointForOBSBucket() throws Exception { omMetaManagerUtils.waitForEventBufferEmpty(reconTaskController.getEventBuffer()); GenericTestUtils.waitFor(completableFuture::isDone, 100, 30000); completableFuture.join(); - Thread.sleep(2000); + waitUntilReconIndexesKeysForPaths(volumeName, obsBucketName, obsSingleFileKey); // Search for the bucket from the bucket table and verify its OBS OmBucketInfo bucketInfo = cluster.getOzoneManager().getBucketInfo(volumeName, obsBucketName); @@ -242,6 +243,24 @@ private Response getContainerEndpointResponse(long containerId) { return containerEndpoint.getKeysForContainer(containerId, 10, ""); } + /** + * Wait until Recon's container-key index reflects all written keys (by container id). + * The OM event queue can be empty while a batch is still being processed. + */ + private void waitUntilReconIndexesKeysForPaths(String volumeName, + String bucketName, String... keyPaths) + throws Exception { + Map requiredCountByContainer = new HashMap<>(); + for (String keyPath : keyPaths) { + long containerId = + getContainerIdForKey(volumeName, bucketName, keyPath); + requiredCountByContainer.merge(containerId, 1, Integer::sum); + } + ReconContainerMetadataManager mgr = + recon.getReconServer().getReconContainerMetadataManager(); + TestReconOmMetaManagerUtils.waitUntilReconKeyCounts(mgr, requiredCountByContainer); + } + private long getContainerIdForKey(String volumeName, String bucketName, String keyName) throws IOException { OmKeyArgs keyArgs = new OmKeyArgs.Builder() diff --git a/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconOmMetaManagerUtils.java b/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconOmMetaManagerUtils.java index 4ef84f2e6d9b..a2f69f624c5e 100644 --- a/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconOmMetaManagerUtils.java +++ b/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconOmMetaManagerUtils.java @@ -17,7 +17,10 @@ package org.apache.hadoop.ozone.recon; +import java.io.IOException; +import java.util.Map; import java.util.concurrent.CompletableFuture; +import org.apache.hadoop.ozone.recon.spi.ReconContainerMetadataManager; import org.apache.hadoop.ozone.recon.tasks.OMUpdateEventBuffer; import org.apache.ozone.test.GenericTestUtils; @@ -43,4 +46,29 @@ public CompletableFuture waitForEventBufferEmpty(OMUpdateEventBuffer event } }); } + + /** + * Waits until Recon's container-key index reports at least the given number of keys + * per container id. Use after OM sync when the event buffer can be empty while a + * dequeued batch is still being processed. + * + * @param mgr Recon container metadata manager + * @param minimumCountPerContainer map of container ID to minimum inclusive key count + * @throws Exception if the condition is not met within the timeout or on interrupt + */ + public static void waitUntilReconKeyCounts(ReconContainerMetadataManager mgr, + Map minimumCountPerContainer) throws Exception { + GenericTestUtils.waitFor(() -> { + try { + for (Map.Entry e : minimumCountPerContainer.entrySet()) { + if (mgr.getKeyCountForContainer(e.getKey()) < e.getValue()) { + return false; + } + } + return true; + } catch (IOException ex) { + throw new RuntimeException(ex); + } + }, 500, 45000); + } } From 67cea403a91df31a1e69ca29e4ccb84e71f02eef Mon Sep 17 00:00:00 2001 From: Arun Sarin Date: Thu, 30 Apr 2026 00:14:12 +0530 Subject: [PATCH 3/4] HDDS-15004. Addressed review comments --- .../hadoop/ozone/recon/TestReconOmMetaManagerUtils.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconOmMetaManagerUtils.java b/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconOmMetaManagerUtils.java index a2f69f624c5e..8aa32ac40baa 100644 --- a/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconOmMetaManagerUtils.java +++ b/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconOmMetaManagerUtils.java @@ -51,6 +51,10 @@ public CompletableFuture waitForEventBufferEmpty(OMUpdateEventBuffer event * Waits until Recon's container-key index reports at least the given number of keys * per container id. Use after OM sync when the event buffer can be empty while a * dequeued batch is still being processed. + *

+ * IO failures from {@code mgr} reads (including temporary {@code RocksDatabaseException} + * while Recon applies updates) are treated as "not ready yet"; the wait repeats until the + * timeout if counts never converge. * * @param mgr Recon container metadata manager * @param minimumCountPerContainer map of container ID to minimum inclusive key count @@ -67,8 +71,9 @@ public static void waitUntilReconKeyCounts(ReconContainerMetadataManager mgr, } return true; } catch (IOException ex) { - throw new RuntimeException(ex); + // Retry: concurrent Recon indexing can transiently expose a closed Rocks handle. + return false; } - }, 500, 45000); + }, 1000, 90000); } } From 44488711791e325392a75cadf2ac979d39629da2 Mon Sep 17 00:00:00 2001 From: Arun Sarin Date: Wed, 6 May 2026 22:29:21 +0530 Subject: [PATCH 4/4] HDDS-15004. Addressed review comments --- .../ozone/recon/TestReconContainerEndpoint.java | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconContainerEndpoint.java b/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconContainerEndpoint.java index 8c4f62041d86..3ff0a636d814 100644 --- a/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconContainerEndpoint.java +++ b/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconContainerEndpoint.java @@ -28,8 +28,8 @@ import java.util.concurrent.CompletableFuture; import javax.ws.rs.core.Response; import org.apache.hadoop.hdds.conf.OzoneConfiguration; -import org.apache.hadoop.hdds.utils.IOUtils; import org.apache.hadoop.hdds.scm.server.OzoneStorageContainerManager; +import org.apache.hadoop.hdds.utils.IOUtils; import org.apache.hadoop.ozone.MiniOzoneCluster; import org.apache.hadoop.ozone.client.BucketArgs; import org.apache.hadoop.ozone.client.ObjectStore; @@ -86,15 +86,9 @@ public void init() throws Exception { } @AfterEach - public void shutdown() throws IOException { - try { - IOUtils.closeQuietly(client); - if (cluster != null) { - cluster.shutdown(); - } - } finally { - ContainerKeyMapperHelper.clearSharedContainerCountMap(); - } + public void shutdown() { + IOUtils.closeQuietly(client, cluster); + ContainerKeyMapperHelper.clearSharedContainerCountMap(); } @Test