From 1d79acd983e02118bf5f3e52c2c60b76f0a87073 Mon Sep 17 00:00:00 2001 From: Ivan Andika Date: Mon, 23 Feb 2026 15:48:47 +0800 Subject: [PATCH 1/6] HDDS-14425. Implement Ratis follower read exception handling --- .../ozone/om/exceptions/OMReadException.java | 48 +++++++++++++++++ .../om/exceptions/OMReadIndexException.java | 48 +++++++++++++++++ ...pcOMFollowerReadFailoverProxyProvider.java | 16 ++++++ .../om/ha/OMFailoverProxyProviderBase.java | 40 ++++++++++++++ ...pcOMFollowerReadFailoverProxyProvider.java | 52 ++++++++++++++++--- .../om/TestOzoneManagerHAFollowerRead.java | 25 +++++++++ ...ManagerHAFollowerReadWithStoppedNodes.java | 4 ++ .../om/ratis/OzoneManagerRatisServer.java | 18 +++++++ 8 files changed, 244 insertions(+), 7 deletions(-) create mode 100644 hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/exceptions/OMReadException.java create mode 100644 hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/exceptions/OMReadIndexException.java diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/exceptions/OMReadException.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/exceptions/OMReadException.java new file mode 100644 index 00000000000..581f83a1a21 --- /dev/null +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/exceptions/OMReadException.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.om.exceptions; + +import java.io.IOException; +import org.apache.ratis.protocol.RaftPeerId; +import org.apache.ratis.protocol.exceptions.ReadException; + +/** + * Exceptions thrown by + * {@link org.apache.hadoop.ozone.om.protocolPB.OzoneManagerProtocolPB} when + * Ratis read fails due reasons such as timeout. + */ +public class OMReadException extends IOException { + + public OMReadException(RaftPeerId currentPeerId, String cause) { + super("OM: " + currentPeerId + " read failed. " + + (cause != null ? "Cause: " + cause : "")); + } + + /** + * Convert {@link ReadException} to {@link OMReadException}. + * @param readException Ratis Read exception. + * @param currentPeer Current peer. + * @return OMReadException. + */ + public static OMReadException convertToOMReadException( + ReadException readException, RaftPeerId currentPeer) { + Throwable cause = readException.getCause(); + return new OMReadException(currentPeer, + cause != null ? cause.getMessage() : null); + } +} diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/exceptions/OMReadIndexException.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/exceptions/OMReadIndexException.java new file mode 100644 index 00000000000..c14d18e0e2a --- /dev/null +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/exceptions/OMReadIndexException.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.om.exceptions; + +import java.io.IOException; +import org.apache.ratis.protocol.RaftPeerId; +import org.apache.ratis.protocol.exceptions.ReadIndexException; + +/** + * Exceptions thrown by + * {@link org.apache.hadoop.ozone.om.protocolPB.OzoneManagerProtocolPB} when + * ReadIndex call fails to complete. + */ +public class OMReadIndexException extends IOException { + + public OMReadIndexException(RaftPeerId currentPeerId, String cause) { + super("OM: " + currentPeerId + " read index failed. " + + (cause != null ? "Cause: " + cause : "")); + } + + /** + * Convert {@link ReadIndexException} to {@link OMReadIndexException}. + * @param readIndexException Ratis ReadIndex exception. + * @param currentPeer Current peer. + * @return OMReadIndexException + */ + public static OMReadIndexException convertToOMReadIndexException( + ReadIndexException readIndexException, RaftPeerId currentPeer) { + Throwable cause = readIndexException.getCause(); + return new OMReadIndexException(currentPeer, + cause != null ? cause.getMessage() : null); + } +} diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/HadoopRpcOMFollowerReadFailoverProxyProvider.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/HadoopRpcOMFollowerReadFailoverProxyProvider.java index bc2f9a246d8..1568d1f0a03 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/HadoopRpcOMFollowerReadFailoverProxyProvider.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/HadoopRpcOMFollowerReadFailoverProxyProvider.java @@ -19,6 +19,8 @@ import static org.apache.hadoop.ozone.om.ha.OMFailoverProxyProviderBase.getLeaderNotReadyException; import static org.apache.hadoop.ozone.om.ha.OMFailoverProxyProviderBase.getNotLeaderException; +import static org.apache.hadoop.ozone.om.ha.OMFailoverProxyProviderBase.getReadException; +import static org.apache.hadoop.ozone.om.ha.OMFailoverProxyProviderBase.getReadIndexException; import com.google.common.annotations.VisibleForTesting; import com.google.protobuf.RpcController; @@ -38,6 +40,8 @@ import org.apache.hadoop.ozone.OmUtils; import org.apache.hadoop.ozone.om.exceptions.OMLeaderNotReadyException; import org.apache.hadoop.ozone.om.exceptions.OMNotLeaderException; +import org.apache.hadoop.ozone.om.exceptions.OMReadException; +import org.apache.hadoop.ozone.om.exceptions.OMReadIndexException; import org.apache.hadoop.ozone.om.protocolPB.OzoneManagerProtocolPB; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest; import org.slf4j.Logger; @@ -263,6 +267,18 @@ public Object invoke(Object proxy, final Method method, final Object[] args) // If we break here instead, we will retry the same leader again without waiting throw e; } + + OMReadIndexException readIndexException = getReadIndexException(e); + if (readIndexException != null) { + // This should trigger failover in the following shouldFailover + LOG.debug("Encountered OMReadIndexException from {}. ", current.proxyInfo); + } + + OMReadException readException = getReadException(e); + if (readException != null) { + // This should trigger failover in the following shouldFailover + LOG.debug("Encountered OMReadException from {}. ", current.proxyInfo); + } } if (!failoverProxy.shouldFailover(e)) { diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/OMFailoverProxyProviderBase.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/OMFailoverProxyProviderBase.java index 57d6caf823e..90a27e8cd23 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/OMFailoverProxyProviderBase.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/OMFailoverProxyProviderBase.java @@ -42,6 +42,8 @@ import org.apache.hadoop.ozone.om.exceptions.OMException; import org.apache.hadoop.ozone.om.exceptions.OMLeaderNotReadyException; import org.apache.hadoop.ozone.om.exceptions.OMNotLeaderException; +import org.apache.hadoop.ozone.om.exceptions.OMReadException; +import org.apache.hadoop.ozone.om.exceptions.OMReadIndexException; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.SecretManager; @@ -434,6 +436,44 @@ public static OMNotLeaderException getNotLeaderException( return null; } + /** + * Unwrap the exception and return the wrapped ReadIndexException if any. + * + * @param exception exception to unwrap. + * @return the unwrapped OMReadIndexException or null if the wrapped + * exception is not OMReadIndexException. + */ + public static OMReadIndexException getReadIndexException(Exception exception) { + Throwable cause = exception.getCause(); + if (cause instanceof RemoteException) { + IOException ioException = + ((RemoteException) cause).unwrapRemoteException(); + if (ioException instanceof OMReadIndexException) { + return (OMReadIndexException) ioException; + } + } + return null; + } + + /** + * Unwrap the exception and return the wrapped ReadException if any. + * + * @param exception exception to unwrap. + * @return the unwrapped OMReadException or null if the wrapped + * exception is not OMReadException. + */ + public static OMReadException getReadException(Exception exception) { + Throwable cause = exception.getCause(); + if (cause instanceof RemoteException) { + IOException ioException = + ((RemoteException) cause).unwrapRemoteException(); + if (ioException instanceof OMReadException) { + return (OMReadException) ioException; + } + } + return null; + } + protected ConfigurationSource getConf() { return conf; } diff --git a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/ha/TestHadoopRpcOMFollowerReadFailoverProxyProvider.java b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/ha/TestHadoopRpcOMFollowerReadFailoverProxyProvider.java index 3587ca5dacb..6e43d515d43 100644 --- a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/ha/TestHadoopRpcOMFollowerReadFailoverProxyProvider.java +++ b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/ha/TestHadoopRpcOMFollowerReadFailoverProxyProvider.java @@ -58,6 +58,8 @@ import org.apache.hadoop.ozone.ha.ConfUtils; import org.apache.hadoop.ozone.om.exceptions.OMLeaderNotReadyException; import org.apache.hadoop.ozone.om.exceptions.OMNotLeaderException; +import org.apache.hadoop.ozone.om.exceptions.OMReadException; +import org.apache.hadoop.ozone.om.exceptions.OMReadIndexException; import org.apache.hadoop.ozone.om.protocolPB.OzoneManagerProtocolPB; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.CreateKeyRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.GetKeyInfoRequest; @@ -337,6 +339,22 @@ void testNullRequest() throws Exception { assertInstanceOf(RpcNoSuchProtocolException.class, exception.getCause()); } + @Test + void testReadIndexException() throws Exception { + setupProxyProvider(3); + omNodeAnswers[0].isThrowReadIndexException = true; + doRead(); + assertHandledBy(1); + } + + @Test + void testReadException() throws Exception { + setupProxyProvider(3); + omNodeAnswers[0].isThrowReadException = true; + doRead(); + assertHandledBy(1); + } + private void setupProxyProvider(int omNodeCount) throws Exception { setupProxyProvider(omNodeCount, new OzoneConfiguration()); } @@ -489,6 +507,8 @@ private static class OMAnswer { private volatile boolean isLeader = false; private volatile boolean isLeaderReady = true; private volatile boolean isFollowerReadSupported = true; + private volatile boolean isThrowReadIndexException = false; + private volatile boolean isThrowReadException = false; private OMProtocolAnswer clientAnswer = new OMProtocolAnswer(); @@ -524,13 +544,31 @@ public OMResponse answer(InvocationOnMock invocationOnMock) throws Throwable { } break; case GetKeyInfo: - if (!isLeader && !isFollowerReadSupported) { - throw new ServiceException( - new RemoteException( - OMNotLeaderException.class.getCanonicalName(), - "OM follower read is not supported" - ) - ); + if (!isLeader) { + if (!isFollowerReadSupported) { + throw new ServiceException( + new RemoteException( + OMNotLeaderException.class.getCanonicalName(), + "OM follower read is not supported" + ) + ); + } + if (isThrowReadIndexException) { + throw new ServiceException( + new RemoteException( + OMReadIndexException.class.getCanonicalName(), + "ReadIndex exception" + ) + ); + } + if (isThrowReadException) { + throw new ServiceException( + new RemoteException( + OMReadException.class.getCanonicalName(), + "ReadException" + ) + ); + } } if (isLeader && !isLeaderReady) { throw new ServiceException( diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHAFollowerRead.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHAFollowerRead.java index ddafde7103e..969e266ccdb 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHAFollowerRead.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHAFollowerRead.java @@ -428,6 +428,31 @@ protected void createKeyTest(boolean checkSuccess) throws Exception { } } + protected void listVolumes(boolean checkSuccess) + throws Exception { + try { + getObjectStore().getClientProxy().listVolumes(null, null, 100); + } catch (IOException e) { + if (!checkSuccess) { + // If the last OM to be tried by the RetryProxy is down, we would get + // ConnectException. Otherwise, we would get a RemoteException from the + // last running OM as it would fail to get a quorum. + if (e instanceof RemoteException) { + // Linearizable read will fail with OMReadIndexException if the follower does not recognize any leader + // or leader is uncontactable. It will throw OMReadException if the read submitted to Ratis encounters + // timeout. + assertThat(e).hasMessageFindingMatch("OMRead(Index)?Exception"); + } else if (e instanceof ConnectException) { + assertThat(e).hasMessageContaining("Connection refused"); + } else { + assertThat(e).hasMessageContaining("Could not determine or connect to OM Leader"); + } + } else { + throw e; + } + } + } + protected void waitForLeaderToBeReady() throws InterruptedException, TimeoutException { // Wait for Leader Election timeout diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHAFollowerReadWithStoppedNodes.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHAFollowerReadWithStoppedNodes.java index 7361b800a3a..47c7230e43b 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHAFollowerReadWithStoppedNodes.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHAFollowerReadWithStoppedNodes.java @@ -111,8 +111,12 @@ void twoOMDown() throws Exception { getCluster().stopOzoneManager(2); Thread.sleep(NODE_FAILURE_TIMEOUT * 4); + // Write requests will fail with OMNotLeaderException createVolumeTest(false); createKeyTest(false); + + // Read requests will fail with either OMReadIndexException or OMReadException + listVolumes(false); } @Test diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java index bccf9e0900a..185729df6aa 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java @@ -62,6 +62,8 @@ import org.apache.hadoop.ozone.om.exceptions.OMException; import org.apache.hadoop.ozone.om.exceptions.OMLeaderNotReadyException; import org.apache.hadoop.ozone.om.exceptions.OMNotLeaderException; +import org.apache.hadoop.ozone.om.exceptions.OMReadException; +import org.apache.hadoop.ozone.om.exceptions.OMReadIndexException; import org.apache.hadoop.ozone.om.helpers.OMNodeDetails; import org.apache.hadoop.ozone.om.helpers.OMRatisHelper; import org.apache.hadoop.ozone.om.ratis.utils.OzoneManagerRatisUtils; @@ -88,6 +90,8 @@ import org.apache.ratis.protocol.exceptions.LeaderNotReadyException; import org.apache.ratis.protocol.exceptions.LeaderSteppingDownException; import org.apache.ratis.protocol.exceptions.NotLeaderException; +import org.apache.ratis.protocol.exceptions.ReadException; +import org.apache.ratis.protocol.exceptions.ReadIndexException; import org.apache.ratis.protocol.exceptions.StateMachineException; import org.apache.ratis.rpc.RpcType; import org.apache.ratis.rpc.SupportedRpcType; @@ -588,6 +592,20 @@ private OMResponse createOmResponseImpl(OMRequest omRequest, throw new ServiceException(new OMNotLeaderException(leaderSteppingDownException.getMessage())); } + ReadIndexException readIndexException = reply.getReadIndexException(); + if (readIndexException != null) { + throw new ServiceException( + OMReadIndexException.convertToOMReadIndexException( + readIndexException, getRaftPeerId())); + } + + ReadException readException = reply.getReadException(); + if (readException != null) { + throw new ServiceException( + OMReadException.convertToOMReadException( + readException, getRaftPeerId())); + } + StateMachineException stateMachineException = reply.getStateMachineException(); if (stateMachineException != null) { From a021a4c32876d7e692bf1147eaa77f540441e93f Mon Sep 17 00:00:00 2001 From: Ivan Andika Date: Sun, 1 Mar 2026 20:49:07 +0800 Subject: [PATCH 2/6] Use the original exception --- .../ozone/om/exceptions/OMReadException.java | 48 ------------------- .../om/exceptions/OMReadIndexException.java | 48 ------------------- ...pcOMFollowerReadFailoverProxyProvider.java | 12 ++--- .../om/ha/OMFailoverProxyProviderBase.java | 24 +++++----- ...pcOMFollowerReadFailoverProxyProvider.java | 6 +-- .../om/TestOzoneManagerHAFollowerRead.java | 4 +- ...ManagerHAFollowerReadWithStoppedNodes.java | 2 +- .../om/ratis/OzoneManagerRatisServer.java | 10 +--- 8 files changed, 25 insertions(+), 129 deletions(-) delete mode 100644 hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/exceptions/OMReadException.java delete mode 100644 hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/exceptions/OMReadIndexException.java diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/exceptions/OMReadException.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/exceptions/OMReadException.java deleted file mode 100644 index 581f83a1a21..00000000000 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/exceptions/OMReadException.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.ozone.om.exceptions; - -import java.io.IOException; -import org.apache.ratis.protocol.RaftPeerId; -import org.apache.ratis.protocol.exceptions.ReadException; - -/** - * Exceptions thrown by - * {@link org.apache.hadoop.ozone.om.protocolPB.OzoneManagerProtocolPB} when - * Ratis read fails due reasons such as timeout. - */ -public class OMReadException extends IOException { - - public OMReadException(RaftPeerId currentPeerId, String cause) { - super("OM: " + currentPeerId + " read failed. " + - (cause != null ? "Cause: " + cause : "")); - } - - /** - * Convert {@link ReadException} to {@link OMReadException}. - * @param readException Ratis Read exception. - * @param currentPeer Current peer. - * @return OMReadException. - */ - public static OMReadException convertToOMReadException( - ReadException readException, RaftPeerId currentPeer) { - Throwable cause = readException.getCause(); - return new OMReadException(currentPeer, - cause != null ? cause.getMessage() : null); - } -} diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/exceptions/OMReadIndexException.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/exceptions/OMReadIndexException.java deleted file mode 100644 index c14d18e0e2a..00000000000 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/exceptions/OMReadIndexException.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.ozone.om.exceptions; - -import java.io.IOException; -import org.apache.ratis.protocol.RaftPeerId; -import org.apache.ratis.protocol.exceptions.ReadIndexException; - -/** - * Exceptions thrown by - * {@link org.apache.hadoop.ozone.om.protocolPB.OzoneManagerProtocolPB} when - * ReadIndex call fails to complete. - */ -public class OMReadIndexException extends IOException { - - public OMReadIndexException(RaftPeerId currentPeerId, String cause) { - super("OM: " + currentPeerId + " read index failed. " + - (cause != null ? "Cause: " + cause : "")); - } - - /** - * Convert {@link ReadIndexException} to {@link OMReadIndexException}. - * @param readIndexException Ratis ReadIndex exception. - * @param currentPeer Current peer. - * @return OMReadIndexException - */ - public static OMReadIndexException convertToOMReadIndexException( - ReadIndexException readIndexException, RaftPeerId currentPeer) { - Throwable cause = readIndexException.getCause(); - return new OMReadIndexException(currentPeer, - cause != null ? cause.getMessage() : null); - } -} diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/HadoopRpcOMFollowerReadFailoverProxyProvider.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/HadoopRpcOMFollowerReadFailoverProxyProvider.java index 777dde77a52..305f5c51769 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/HadoopRpcOMFollowerReadFailoverProxyProvider.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/HadoopRpcOMFollowerReadFailoverProxyProvider.java @@ -40,12 +40,12 @@ import org.apache.hadoop.ozone.OmUtils; import org.apache.hadoop.ozone.om.exceptions.OMLeaderNotReadyException; import org.apache.hadoop.ozone.om.exceptions.OMNotLeaderException; -import org.apache.hadoop.ozone.om.exceptions.OMReadException; -import org.apache.hadoop.ozone.om.exceptions.OMReadIndexException; import org.apache.hadoop.ozone.om.helpers.ReadConsistency; import org.apache.hadoop.ozone.om.protocolPB.OzoneManagerProtocolPB; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.ReadConsistencyHint; +import org.apache.ratis.protocol.exceptions.ReadException; +import org.apache.ratis.protocol.exceptions.ReadIndexException; import org.apache.ratis.util.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -307,16 +307,16 @@ public Object invoke(Object proxy, final Method method, final Object[] args) throw e; } - OMReadIndexException readIndexException = getReadIndexException(e); + ReadIndexException readIndexException = getReadIndexException(e); if (readIndexException != null) { // This should trigger failover in the following shouldFailover - LOG.debug("Encountered OMReadIndexException from {}. ", current.proxyInfo); + LOG.debug("Encountered ReadIndexException from {}. ", current.proxyInfo); } - OMReadException readException = getReadException(e); + ReadException readException = getReadException(e); if (readException != null) { // This should trigger failover in the following shouldFailover - LOG.debug("Encountered OMReadException from {}. ", current.proxyInfo); + LOG.debug("Encountered ReadException from {}. ", current.proxyInfo); } } diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/OMFailoverProxyProviderBase.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/OMFailoverProxyProviderBase.java index 90a27e8cd23..3b07921d379 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/OMFailoverProxyProviderBase.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/OMFailoverProxyProviderBase.java @@ -42,11 +42,11 @@ import org.apache.hadoop.ozone.om.exceptions.OMException; import org.apache.hadoop.ozone.om.exceptions.OMLeaderNotReadyException; import org.apache.hadoop.ozone.om.exceptions.OMNotLeaderException; -import org.apache.hadoop.ozone.om.exceptions.OMReadException; -import org.apache.hadoop.ozone.om.exceptions.OMReadIndexException; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.SecretManager; +import org.apache.ratis.protocol.exceptions.ReadException; +import org.apache.ratis.protocol.exceptions.ReadIndexException; import org.apache.ratis.protocol.exceptions.StateMachineException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -440,16 +440,16 @@ public static OMNotLeaderException getNotLeaderException( * Unwrap the exception and return the wrapped ReadIndexException if any. * * @param exception exception to unwrap. - * @return the unwrapped OMReadIndexException or null if the wrapped - * exception is not OMReadIndexException. + * @return the unwrapped ReadIndexException or null if the wrapped + * exception is not ReadIndexException. */ - public static OMReadIndexException getReadIndexException(Exception exception) { + public static ReadIndexException getReadIndexException(Exception exception) { Throwable cause = exception.getCause(); if (cause instanceof RemoteException) { IOException ioException = ((RemoteException) cause).unwrapRemoteException(); - if (ioException instanceof OMReadIndexException) { - return (OMReadIndexException) ioException; + if (ioException instanceof ReadIndexException) { + return (ReadIndexException) ioException; } } return null; @@ -459,16 +459,16 @@ public static OMReadIndexException getReadIndexException(Exception exception) { * Unwrap the exception and return the wrapped ReadException if any. * * @param exception exception to unwrap. - * @return the unwrapped OMReadException or null if the wrapped - * exception is not OMReadException. + * @return the unwrapped ReadException or null if the wrapped + * exception is not ReadException. */ - public static OMReadException getReadException(Exception exception) { + public static ReadException getReadException(Exception exception) { Throwable cause = exception.getCause(); if (cause instanceof RemoteException) { IOException ioException = ((RemoteException) cause).unwrapRemoteException(); - if (ioException instanceof OMReadException) { - return (OMReadException) ioException; + if (ioException instanceof ReadException) { + return (ReadException) ioException; } } return null; diff --git a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/ha/TestHadoopRpcOMFollowerReadFailoverProxyProvider.java b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/ha/TestHadoopRpcOMFollowerReadFailoverProxyProvider.java index 6e43d515d43..24df84cb0bb 100644 --- a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/ha/TestHadoopRpcOMFollowerReadFailoverProxyProvider.java +++ b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/ha/TestHadoopRpcOMFollowerReadFailoverProxyProvider.java @@ -58,8 +58,6 @@ import org.apache.hadoop.ozone.ha.ConfUtils; import org.apache.hadoop.ozone.om.exceptions.OMLeaderNotReadyException; import org.apache.hadoop.ozone.om.exceptions.OMNotLeaderException; -import org.apache.hadoop.ozone.om.exceptions.OMReadException; -import org.apache.hadoop.ozone.om.exceptions.OMReadIndexException; import org.apache.hadoop.ozone.om.protocolPB.OzoneManagerProtocolPB; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.CreateKeyRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.GetKeyInfoRequest; @@ -556,7 +554,7 @@ public OMResponse answer(InvocationOnMock invocationOnMock) throws Throwable { if (isThrowReadIndexException) { throw new ServiceException( new RemoteException( - OMReadIndexException.class.getCanonicalName(), + ReadIndexException.class.getCanonicalName(), "ReadIndex exception" ) ); @@ -564,7 +562,7 @@ public OMResponse answer(InvocationOnMock invocationOnMock) throws Throwable { if (isThrowReadException) { throw new ServiceException( new RemoteException( - OMReadException.class.getCanonicalName(), + ReadException.class.getCanonicalName(), "ReadException" ) ); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHAFollowerRead.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHAFollowerRead.java index bfe36990cd0..8fb1d4cb85d 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHAFollowerRead.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHAFollowerRead.java @@ -444,8 +444,8 @@ protected void listVolumes(boolean checkSuccess) // ConnectException. Otherwise, we would get a RemoteException from the // last running OM as it would fail to get a quorum. if (e instanceof RemoteException) { - // Linearizable read will fail with OMReadIndexException if the follower does not recognize any leader - // or leader is uncontactable. It will throw OMReadException if the read submitted to Ratis encounters + // Linearizable read will fail with ReadIndexException if the follower does not recognize any leader + // or leader is uncontactable. It will throw ReadException if the read submitted to Ratis encounters // timeout. assertThat(e).hasMessageFindingMatch("OMRead(Index)?Exception"); } else if (e instanceof ConnectException) { diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHAFollowerReadWithStoppedNodes.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHAFollowerReadWithStoppedNodes.java index 47c7230e43b..878bfad603b 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHAFollowerReadWithStoppedNodes.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHAFollowerReadWithStoppedNodes.java @@ -115,7 +115,7 @@ void twoOMDown() throws Exception { createVolumeTest(false); createKeyTest(false); - // Read requests will fail with either OMReadIndexException or OMReadException + // Read requests will fail with either ReadIndexException or ReadException listVolumes(false); } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java index 2ed00a34f1e..dab93e75900 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java @@ -63,8 +63,6 @@ import org.apache.hadoop.ozone.om.exceptions.OMException; import org.apache.hadoop.ozone.om.exceptions.OMLeaderNotReadyException; import org.apache.hadoop.ozone.om.exceptions.OMNotLeaderException; -import org.apache.hadoop.ozone.om.exceptions.OMReadException; -import org.apache.hadoop.ozone.om.exceptions.OMReadIndexException; import org.apache.hadoop.ozone.om.helpers.OMNodeDetails; import org.apache.hadoop.ozone.om.helpers.OMRatisHelper; import org.apache.hadoop.ozone.om.helpers.ReadConsistency; @@ -614,16 +612,12 @@ private OMResponse createOmResponseImpl(OMRequest omRequest, ReadIndexException readIndexException = reply.getReadIndexException(); if (readIndexException != null) { - throw new ServiceException( - OMReadIndexException.convertToOMReadIndexException( - readIndexException, getRaftPeerId())); + throw new ServiceException(readIndexException); } ReadException readException = reply.getReadException(); if (readException != null) { - throw new ServiceException( - OMReadException.convertToOMReadException( - readException, getRaftPeerId())); + throw new ServiceException(readException); } StateMachineException stateMachineException = From b18bbd39e909b30c50751a3972347ca2a1fa0fb8 Mon Sep 17 00:00:00 2001 From: Ivan Andika Date: Sun, 1 Mar 2026 20:49:52 +0800 Subject: [PATCH 3/6] Update test --- .../apache/hadoop/ozone/om/TestOzoneManagerHAFollowerRead.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHAFollowerRead.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHAFollowerRead.java index 8fb1d4cb85d..c2314a717c1 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHAFollowerRead.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHAFollowerRead.java @@ -447,7 +447,7 @@ protected void listVolumes(boolean checkSuccess) // Linearizable read will fail with ReadIndexException if the follower does not recognize any leader // or leader is uncontactable. It will throw ReadException if the read submitted to Ratis encounters // timeout. - assertThat(e).hasMessageFindingMatch("OMRead(Index)?Exception"); + assertThat(e).hasMessageFindingMatch("Read(Index)?Exception"); } else if (e instanceof ConnectException) { assertThat(e).hasMessageContaining("Connection refused"); } else { From 08a8fb3edfbc6ab45abc30b07bffb90d5df8586b Mon Sep 17 00:00:00 2001 From: Ivan Andika Date: Sun, 1 Mar 2026 22:40:40 +0800 Subject: [PATCH 4/6] Fix build --- .../om/ha/TestHadoopRpcOMFollowerReadFailoverProxyProvider.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/ha/TestHadoopRpcOMFollowerReadFailoverProxyProvider.java b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/ha/TestHadoopRpcOMFollowerReadFailoverProxyProvider.java index 24df84cb0bb..f77c5b561d4 100644 --- a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/ha/TestHadoopRpcOMFollowerReadFailoverProxyProvider.java +++ b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/ha/TestHadoopRpcOMFollowerReadFailoverProxyProvider.java @@ -68,6 +68,8 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.Time; import org.apache.ratis.protocol.ClientId; +import org.apache.ratis.protocol.exceptions.ReadException; +import org.apache.ratis.protocol.exceptions.ReadIndexException; import org.junit.jupiter.api.Test; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; From fa31dc6bad5338df2852ce71282824cc0a6c71f5 Mon Sep 17 00:00:00 2001 From: Ivan Andika Date: Mon, 2 Mar 2026 10:17:53 +0800 Subject: [PATCH 5/6] Fix test --- .../apache/hadoop/ozone/om/TestOzoneManagerHAFollowerRead.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHAFollowerRead.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHAFollowerRead.java index c2314a717c1..f64128abb93 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHAFollowerRead.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHAFollowerRead.java @@ -65,6 +65,7 @@ import org.apache.hadoop.ozone.client.io.OzoneOutputStream; import org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServerConfig; import org.apache.hadoop.ozone.security.acl.OzoneObj; +import org.apache.ratis.protocol.exceptions.RaftException; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; @@ -447,7 +448,7 @@ protected void listVolumes(boolean checkSuccess) // Linearizable read will fail with ReadIndexException if the follower does not recognize any leader // or leader is uncontactable. It will throw ReadException if the read submitted to Ratis encounters // timeout. - assertThat(e).hasMessageFindingMatch("Read(Index)?Exception"); + assertThat(((RemoteException) e).unwrapRemoteException()).isInstanceOf(RaftException.class); } else if (e instanceof ConnectException) { assertThat(e).hasMessageContaining("Connection refused"); } else { From 997981df1d818908687f71258ce76db5d2ab31b6 Mon Sep 17 00:00:00 2001 From: Ivan Andika Date: Mon, 2 Mar 2026 11:16:13 +0800 Subject: [PATCH 6/6] Try to fix flaky testAllowLeaderSkipLinearizableRead --- .../TestOzoneShellHAWithFollowerRead.java | 35 +++++++++++-------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneShellHAWithFollowerRead.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneShellHAWithFollowerRead.java index e7bcf8672e1..605ed82b89c 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneShellHAWithFollowerRead.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneShellHAWithFollowerRead.java @@ -26,7 +26,6 @@ import org.apache.hadoop.ozone.om.OzoneManager; import org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServerConfig; import org.apache.ratis.server.RaftServerConfigKeys; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -58,21 +57,27 @@ public void init() throws Exception { @Test public void testAllowLeaderSkipLinearizableRead() throws Exception { - super.testListAllKeysInternal("skipvol1"); - long lastMetrics = getCluster().getOMLeader().getMetrics().getNumLeaderSkipLinearizableRead(); - Assertions.assertTrue(lastMetrics > 0); - OzoneConfiguration oldConf = getCluster().getConf(); - OzoneConfiguration newConf = new OzoneConfiguration(oldConf); - newConf.setBoolean("ozone.om.allow.leader.skip.linearizable.read", false); - getCluster().getOMLeader().setConfiguration(newConf); - - super.testListAllKeysInternal("skipvol2"); - - long curMetrics = getCluster().getOMLeader().getMetrics().getNumLeaderSkipLinearizableRead(); - assertEquals(lastMetrics, curMetrics); - - getCluster().getOMLeader().setConfiguration(oldConf); + try { + String[] args = new String[]{"volume", "list"}; + OzoneShell ozoneShell = new OzoneShell(); + ozoneShell.getOzoneConf().setBoolean("ozone.client.follower.read.enabled", true); + for (int i = 0; i < 100; i++) { + execute(ozoneShell, args); + } + long lastMetrics = getCluster().getOMLeader().getMetrics().getNumLeaderSkipLinearizableRead(); + assertThat(lastMetrics).isGreaterThan(0); + OzoneConfiguration newConf = new OzoneConfiguration(oldConf); + newConf.setBoolean("ozone.om.allow.leader.skip.linearizable.read", false); + getCluster().getOMLeader().setConfiguration(newConf); + for (int i = 0; i < 100; i++) { + execute(ozoneShell, args); + } + long curMetrics = getCluster().getOMLeader().getMetrics().getNumLeaderSkipLinearizableRead(); + assertEquals(lastMetrics, curMetrics); + } finally { + getCluster().getOMLeader().setConfiguration(oldConf); + } } @Test