Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

package com.azure.cosmos.implementation.http;

import io.netty.channel.embedded.EmbeddedChannel;
import org.testng.annotations.Test;

import java.io.IOException;

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;

/**
* Verifies that {@link Http2ParentChannelExceptionHandler} consumes exceptions
* on the HTTP/2 parent channel, preventing the Netty WARN log
* "An exceptionCaught() event was fired, and it reached at the tail of the pipeline".
*
* Uses Netty's {@link EmbeddedChannel} which records unhandled exceptions internally.
* {@code checkException()} re-throws any exception that reached the pipeline tail.
*/
public class Http2ParentChannelExceptionHandlerTest {

/**
* BEFORE fix — without the handler, exceptions reach the pipeline tail.
* EmbeddedChannel's checkException() re-throws the unhandled exception,
* proving it reached Netty's TailContext (which in production logs as WARN).
*/
@Test(groups = "unit")
public void withoutHandler_exceptionReachesTail() {
EmbeddedChannel channel = new EmbeddedChannel();

channel.pipeline().fireExceptionCaught(
new IOException("Connection reset by peer"));

assertThatThrownBy(channel::checkException)
.isInstanceOf(IOException.class)
.hasMessageContaining("Connection reset by peer");

channel.finishAndReleaseAll();
}

/**
* AFTER fix — with the handler installed, exceptions are consumed at DEBUG
* level. checkException() does NOT throw, proving the exception never reached
* the pipeline tail.
*/
@Test(groups = "unit")
public void withHandler_exceptionConsumedAndChannelStaysOpen() {
EmbeddedChannel channel = new EmbeddedChannel(
new Http2ParentChannelExceptionHandler());

channel.pipeline().fireExceptionCaught(
new IOException("Connection reset by peer"));

// Exception consumed — does NOT reach tail
channel.checkException();

// Channel is NOT closed — handler does not alter lifecycle
assertThat(channel.isOpen()).isTrue();

channel.finishAndReleaseAll();
}

/**
* NativeIoException is the actual exception type seen in production.
* Verify RuntimeException subclasses are also consumed (NativeIoException
* extends RuntimeException, not IOException).
*/
@Test(groups = "unit")
public void withHandler_runtimeExceptionAlsoConsumed() {
EmbeddedChannel channel = new EmbeddedChannel(
new Http2ParentChannelExceptionHandler());

channel.pipeline().fireExceptionCaught(
new RuntimeException("recvAddress(..) failed with error(-104): Connection reset by peer"));

// Exception consumed
channel.checkException();

// Channel stays open
assertThat(channel.isOpen()).isTrue();

channel.finishAndReleaseAll();
}
}
3 changes: 2 additions & 1 deletion sdk/cosmos/azure-cosmos/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
#### Breaking Changes

#### Bugs Fixed
Fixing an NPE caused due to boxed Boolean conversion. - See [PR 48656](https://github.com/Azure/azure-sdk-for-java/pull/48656/)
* Fixing an NPE caused due to boxed Boolean conversion. - See [PR 48656](https://github.com/Azure/azure-sdk-for-java/pull/48656/)
* Fixed Netty WARN log "An exceptionCaught() event was fired, and it reached at the tail of the pipeline" appearing on HTTP/2 connections when the server resets idle TCP connections. Added an exception handler on the HTTP/2 parent channel to consume connection-level exceptions at DEBUG level, matching HTTP/1.1 behavior.

#### Other Changes

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

package com.azure.cosmos.implementation.http;

import io.netty.channel.ChannelHandlerContext;
import io.netty.channel.ChannelInboundHandlerAdapter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Exception handler for the HTTP/2 parent (TCP) channel pipeline.
* <p>
* In HTTP/2, reactor-netty multiplexes streams on a shared parent TCP connection.
* Child stream channels have {@code ChannelOperationsHandler} which catches exceptions
* and fails the active subscriber (matching HTTP/1.1 behavior). However, the parent
* channel has no such handler — exceptions propagate to Netty's {@code TailContext}
* which logs them as WARN ("An exceptionCaught() event was fired, and it reached at
* the tail of the pipeline").
* <p>
* This handler is installed on the parent channel to consume those exceptions at
* DEBUG level, aligning the logging behavior with HTTP/1.1. It does NOT close the
* channel or alter connection lifecycle — reactor-netty and the connection pool's
* eviction predicate ({@code !channel.isActive()}) handle that independently.
* <ul>
* <li>{@code NativeIoException: recvAddress(..) failed with error(-104)} — TCP RST
* from server-side idle timeout, load balancer recycling, or network interruption</li>
* <li>{@code IOException} (broken pipe, connection reset) — connection already dead</li>
* <li>{@code SSLException} — post-handshake TLS error on established connection</li>
* <li>{@code Http2Exception} — protocol errors already handled internally by
* {@code Http2FrameCodec} and {@code Http2MultiplexHandler} before reaching here</li>
* </ul>
* <p>
* Typical exceptions that reach this handler (after being processed by
* {@code Http2FrameCodec} and {@code Http2MultiplexHandler} internally):
*
* @see ReactorNettyClient#configureChannelPipelineHandlers()
*/
final class Http2ParentChannelExceptionHandler extends ChannelInboundHandlerAdapter {

static final String HANDLER_NAME = "cosmosH2ParentExceptionHandler";

private static final Logger logger = LoggerFactory.getLogger(Http2ParentChannelExceptionHandler.class);

@Override
public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) {
if (logger.isDebugEnabled()) {
logger.debug("Exception on HTTP/2 parent connection [id:{}]: {}",
ctx.channel().id().asShortText(), cause.toString(), cause);
}
// Do NOT close the channel or alter connection lifecycle.
// Reactor-netty and the pool eviction predicate handle that independently.
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,25 @@ private void configureChannelPipelineHandlers() {
"customHeaderCleaner",
new Http2ResponseHeaderCleanerHandler());
}

// Install exception handler on the HTTP/2 parent (TCP) channel.
// In H2, doOnConnected fires for stream (child) channels — channel.parent()
// is the TCP connection. The parent pipeline has no ChannelOperationsHandler
// (unlike H1.1), so TCP-level exceptions (RST, broken pipe) propagate to
// Netty's TailContext and get logged as WARN. This handler matches H1.1
// behavior by consuming exceptions at DEBUG level.
Channel parent = connection.channel().parent();
if (parent != null
&& parent.pipeline().get(Http2ParentChannelExceptionHandler.HANDLER_NAME) == null) {

try {
parent.pipeline().addLast(
Http2ParentChannelExceptionHandler.HANDLER_NAME,
new Http2ParentChannelExceptionHandler());
} catch (IllegalArgumentException ignored) {
// Duplicate handler — already installed by a concurrent stream
}
}
Comment on lines +168 to +185
Copy link

Copilot AI Apr 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change introduces new behavior (consuming parent-channel exceptions and closing the parent connection) without accompanying test coverage. There are existing Netty/transport tests in azure-cosmos-tests (e.g., ones that use EmbeddedChannel); please add a unit/integration test that asserts the handler is installed on the H2 parent pipeline and that an exception on the parent is consumed (no TailContext WARN) and results in the parent channel closing.

Copilot generated this review using guidance from repository custom instructions.
}));
}
}
Expand Down
Loading