diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/http/Http2ParentChannelExceptionHandlerTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/http/Http2ParentChannelExceptionHandlerTest.java new file mode 100644 index 000000000000..3738764a4063 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/http/Http2ParentChannelExceptionHandlerTest.java @@ -0,0 +1,86 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.implementation.http; + +import io.netty.channel.embedded.EmbeddedChannel; +import org.testng.annotations.Test; + +import java.io.IOException; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/** + * Verifies that {@link Http2ParentChannelExceptionHandler} consumes exceptions + * on the HTTP/2 parent channel, preventing the Netty WARN log + * "An exceptionCaught() event was fired, and it reached at the tail of the pipeline". + * + * Uses Netty's {@link EmbeddedChannel} which records unhandled exceptions internally. + * {@code checkException()} re-throws any exception that reached the pipeline tail. + */ +public class Http2ParentChannelExceptionHandlerTest { + + /** + * BEFORE fix — without the handler, exceptions reach the pipeline tail. + * EmbeddedChannel's checkException() re-throws the unhandled exception, + * proving it reached Netty's TailContext (which in production logs as WARN). + */ + @Test(groups = "unit") + public void withoutHandler_exceptionReachesTail() { + EmbeddedChannel channel = new EmbeddedChannel(); + + channel.pipeline().fireExceptionCaught( + new IOException("Connection reset by peer")); + + assertThatThrownBy(channel::checkException) + .isInstanceOf(IOException.class) + .hasMessageContaining("Connection reset by peer"); + + channel.finishAndReleaseAll(); + } + + /** + * AFTER fix — with the handler installed, exceptions are consumed at DEBUG + * level. checkException() does NOT throw, proving the exception never reached + * the pipeline tail. + */ + @Test(groups = "unit") + public void withHandler_exceptionConsumedAndChannelStaysOpen() { + EmbeddedChannel channel = new EmbeddedChannel( + new Http2ParentChannelExceptionHandler()); + + channel.pipeline().fireExceptionCaught( + new IOException("Connection reset by peer")); + + // Exception consumed — does NOT reach tail + channel.checkException(); + + // Channel is NOT closed — handler does not alter lifecycle + assertThat(channel.isOpen()).isTrue(); + + channel.finishAndReleaseAll(); + } + + /** + * NativeIoException is the actual exception type seen in production. + * Verify RuntimeException subclasses are also consumed (NativeIoException + * extends RuntimeException, not IOException). + */ + @Test(groups = "unit") + public void withHandler_runtimeExceptionAlsoConsumed() { + EmbeddedChannel channel = new EmbeddedChannel( + new Http2ParentChannelExceptionHandler()); + + channel.pipeline().fireExceptionCaught( + new RuntimeException("recvAddress(..) failed with error(-104): Connection reset by peer")); + + // Exception consumed + channel.checkException(); + + // Channel stays open + assertThat(channel.isOpen()).isTrue(); + + channel.finishAndReleaseAll(); + } +} diff --git a/sdk/cosmos/azure-cosmos/CHANGELOG.md b/sdk/cosmos/azure-cosmos/CHANGELOG.md index 818e49226a40..0c1b73410d27 100644 --- a/sdk/cosmos/azure-cosmos/CHANGELOG.md +++ b/sdk/cosmos/azure-cosmos/CHANGELOG.md @@ -7,7 +7,8 @@ #### Breaking Changes #### Bugs Fixed -Fixing an NPE caused due to boxed Boolean conversion. - See [PR 48656](https://github.com/Azure/azure-sdk-for-java/pull/48656/) +* Fixing an NPE caused due to boxed Boolean conversion. - See [PR 48656](https://github.com/Azure/azure-sdk-for-java/pull/48656/) +* Fixed Netty WARN log "An exceptionCaught() event was fired, and it reached at the tail of the pipeline" appearing on HTTP/2 connections when the server resets idle TCP connections. Added an exception handler on the HTTP/2 parent channel to consume connection-level exceptions at DEBUG level, matching HTTP/1.1 behavior. #### Other Changes diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/http/Http2ParentChannelExceptionHandler.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/http/Http2ParentChannelExceptionHandler.java new file mode 100644 index 000000000000..b818893c5366 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/http/Http2ParentChannelExceptionHandler.java @@ -0,0 +1,54 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.implementation.http; + +import io.netty.channel.ChannelHandlerContext; +import io.netty.channel.ChannelInboundHandlerAdapter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Exception handler for the HTTP/2 parent (TCP) channel pipeline. + *

+ * In HTTP/2, reactor-netty multiplexes streams on a shared parent TCP connection. + * Child stream channels have {@code ChannelOperationsHandler} which catches exceptions + * and fails the active subscriber (matching HTTP/1.1 behavior). However, the parent + * channel has no such handler — exceptions propagate to Netty's {@code TailContext} + * which logs them as WARN ("An exceptionCaught() event was fired, and it reached at + * the tail of the pipeline"). + *

+ * This handler is installed on the parent channel to consume those exceptions at + * DEBUG level, aligning the logging behavior with HTTP/1.1. It does NOT close the + * channel or alter connection lifecycle — reactor-netty and the connection pool's + * eviction predicate ({@code !channel.isActive()}) handle that independently. + *

+ *

+ * Typical exceptions that reach this handler (after being processed by + * {@code Http2FrameCodec} and {@code Http2MultiplexHandler} internally): + * + * @see ReactorNettyClient#configureChannelPipelineHandlers() + */ +final class Http2ParentChannelExceptionHandler extends ChannelInboundHandlerAdapter { + + static final String HANDLER_NAME = "cosmosH2ParentExceptionHandler"; + + private static final Logger logger = LoggerFactory.getLogger(Http2ParentChannelExceptionHandler.class); + + @Override + public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) { + if (logger.isDebugEnabled()) { + logger.debug("Exception on HTTP/2 parent connection [id:{}]: {}", + ctx.channel().id().asShortText(), cause.toString(), cause); + } + // Do NOT close the channel or alter connection lifecycle. + // Reactor-netty and the pool eviction predicate handle that independently. + } +} diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/http/ReactorNettyClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/http/ReactorNettyClient.java index 04ee87d22594..5f6a1b3d0995 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/http/ReactorNettyClient.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/http/ReactorNettyClient.java @@ -164,6 +164,25 @@ private void configureChannelPipelineHandlers() { "customHeaderCleaner", new Http2ResponseHeaderCleanerHandler()); } + + // Install exception handler on the HTTP/2 parent (TCP) channel. + // In H2, doOnConnected fires for stream (child) channels — channel.parent() + // is the TCP connection. The parent pipeline has no ChannelOperationsHandler + // (unlike H1.1), so TCP-level exceptions (RST, broken pipe) propagate to + // Netty's TailContext and get logged as WARN. This handler matches H1.1 + // behavior by consuming exceptions at DEBUG level. + Channel parent = connection.channel().parent(); + if (parent != null + && parent.pipeline().get(Http2ParentChannelExceptionHandler.HANDLER_NAME) == null) { + + try { + parent.pipeline().addLast( + Http2ParentChannelExceptionHandler.HANDLER_NAME, + new Http2ParentChannelExceptionHandler()); + } catch (IllegalArgumentException ignored) { + // Duplicate handler — already installed by a concurrent stream + } + } })); } }