Skip to content

Commit e5f9537

Browse files
jeet1995Copilot
andcommitted
Add exception handler on HTTP/2 parent channel to suppress WARN logs
In HTTP/2, reactor-netty multiplexes streams on a shared parent TCP connection. The parent channel pipeline has no ChannelOperationsHandler (unlike HTTP/1.1), so TCP-level exceptions like Connection reset by peer (ECONNRESET) propagate to Netty's TailContext, which logs them as WARN. This adds Http2ParentChannelExceptionHandler to the parent channel via doOnConnected (accessing channel.parent()). The handler consumes exceptions at DEBUG level WITHOUT closing the channel or altering connection lifecycle, matching HTTP/1.1 logging behavior. Changes: - Handler logs cause.toString() (not getMessage()) for null-safe diagnostics - Defensive try-catch for duplicate handler name on concurrent stream creation - Before/after verified with EmbeddedChannel unit tests Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 9336a4b commit e5f9537

File tree

4 files changed

+161
-1
lines changed

4 files changed

+161
-1
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
// Copyright (c) Microsoft Corporation. All rights reserved.
2+
// Licensed under the MIT License.
3+
4+
package com.azure.cosmos.implementation.http;
5+
6+
import io.netty.channel.embedded.EmbeddedChannel;
7+
import org.testng.annotations.Test;
8+
9+
import java.io.IOException;
10+
11+
import static org.assertj.core.api.Assertions.assertThat;
12+
import static org.assertj.core.api.Assertions.assertThatThrownBy;
13+
14+
/**
15+
* Verifies that {@link Http2ParentChannelExceptionHandler} consumes exceptions
16+
* on the HTTP/2 parent channel, preventing the Netty WARN log
17+
* "An exceptionCaught() event was fired, and it reached at the tail of the pipeline".
18+
*
19+
* Uses Netty's {@link EmbeddedChannel} which records unhandled exceptions internally.
20+
* {@code checkException()} re-throws any exception that reached the pipeline tail.
21+
*/
22+
public class Http2ParentChannelExceptionHandlerTest {
23+
24+
/**
25+
* BEFORE fix — without the handler, exceptions reach the pipeline tail.
26+
* EmbeddedChannel's checkException() re-throws the unhandled exception,
27+
* proving it reached Netty's TailContext (which in production logs as WARN).
28+
*/
29+
@Test(groups = "unit")
30+
public void withoutHandler_exceptionReachesTail() {
31+
EmbeddedChannel channel = new EmbeddedChannel();
32+
33+
channel.pipeline().fireExceptionCaught(
34+
new IOException("Connection reset by peer"));
35+
36+
assertThatThrownBy(channel::checkException)
37+
.isInstanceOf(IOException.class)
38+
.hasMessageContaining("Connection reset by peer");
39+
40+
channel.finishAndReleaseAll();
41+
}
42+
43+
/**
44+
* AFTER fix — with the handler installed, exceptions are consumed at DEBUG
45+
* level. checkException() does NOT throw, proving the exception never reached
46+
* the pipeline tail.
47+
*/
48+
@Test(groups = "unit")
49+
public void withHandler_exceptionConsumedAndChannelStaysOpen() {
50+
EmbeddedChannel channel = new EmbeddedChannel(
51+
new Http2ParentChannelExceptionHandler());
52+
53+
channel.pipeline().fireExceptionCaught(
54+
new IOException("Connection reset by peer"));
55+
56+
// Exception consumed — does NOT reach tail
57+
channel.checkException();
58+
59+
// Channel is NOT closed — handler does not alter lifecycle
60+
assertThat(channel.isOpen()).isTrue();
61+
62+
channel.finishAndReleaseAll();
63+
}
64+
65+
/**
66+
* NativeIoException is the actual exception type seen in production.
67+
* Verify RuntimeException subclasses are also consumed (NativeIoException
68+
* extends RuntimeException, not IOException).
69+
*/
70+
@Test(groups = "unit")
71+
public void withHandler_runtimeExceptionAlsoConsumed() {
72+
EmbeddedChannel channel = new EmbeddedChannel(
73+
new Http2ParentChannelExceptionHandler());
74+
75+
channel.pipeline().fireExceptionCaught(
76+
new RuntimeException("recvAddress(..) failed with error(-104): Connection reset by peer"));
77+
78+
// Exception consumed
79+
channel.checkException();
80+
81+
// Channel stays open
82+
assertThat(channel.isOpen()).isTrue();
83+
84+
channel.finishAndReleaseAll();
85+
}
86+
}

sdk/cosmos/azure-cosmos/CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
#### Breaking Changes
88

99
#### Bugs Fixed
10-
Fixing an NPE caused due to boxed Boolean conversion. - See [PR 48656](https://github.com/Azure/azure-sdk-for-java/pull/48656/)
10+
* Fixing an NPE caused due to boxed Boolean conversion. - See [PR 48656](https://github.com/Azure/azure-sdk-for-java/pull/48656/)
11+
* Fixed Netty WARN log "An exceptionCaught() event was fired, and it reached at the tail of the pipeline" appearing on HTTP/2 connections when the server resets idle TCP connections. Added an exception handler on the HTTP/2 parent channel to consume connection-level exceptions at DEBUG level, matching HTTP/1.1 behavior.
1112

1213
#### Other Changes
1314

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
// Copyright (c) Microsoft Corporation. All rights reserved.
2+
// Licensed under the MIT License.
3+
4+
package com.azure.cosmos.implementation.http;
5+
6+
import io.netty.channel.ChannelHandlerContext;
7+
import io.netty.channel.ChannelInboundHandlerAdapter;
8+
import org.slf4j.Logger;
9+
import org.slf4j.LoggerFactory;
10+
11+
/**
12+
* Exception handler for the HTTP/2 parent (TCP) channel pipeline.
13+
* <p>
14+
* In HTTP/2, reactor-netty multiplexes streams on a shared parent TCP connection.
15+
* Child stream channels have {@code ChannelOperationsHandler} which catches exceptions
16+
* and fails the active subscriber (matching HTTP/1.1 behavior). However, the parent
17+
* channel has no such handler — exceptions propagate to Netty's {@code TailContext}
18+
* which logs them as WARN ("An exceptionCaught() event was fired, and it reached at
19+
* the tail of the pipeline").
20+
* <p>
21+
* This handler is installed on the parent channel to consume those exceptions at
22+
* DEBUG level, aligning the logging behavior with HTTP/1.1. It does NOT close the
23+
* channel or alter connection lifecycle — reactor-netty and the connection pool's
24+
* eviction predicate ({@code !channel.isActive()}) handle that independently.
25+
* <ul>
26+
* <li>{@code NativeIoException: recvAddress(..) failed with error(-104)} — TCP RST
27+
* from server-side idle timeout, load balancer recycling, or network interruption</li>
28+
* <li>{@code IOException} (broken pipe, connection reset) — connection already dead</li>
29+
* <li>{@code SSLException} — post-handshake TLS error on established connection</li>
30+
* <li>{@code Http2Exception} — protocol errors already handled internally by
31+
* {@code Http2FrameCodec} and {@code Http2MultiplexHandler} before reaching here</li>
32+
* </ul>
33+
* <p>
34+
* Typical exceptions that reach this handler (after being processed by
35+
* {@code Http2FrameCodec} and {@code Http2MultiplexHandler} internally):
36+
*
37+
* @see ReactorNettyClient#configureChannelPipelineHandlers()
38+
*/
39+
final class Http2ParentChannelExceptionHandler extends ChannelInboundHandlerAdapter {
40+
41+
static final String HANDLER_NAME = "cosmosH2ParentExceptionHandler";
42+
43+
private static final Logger logger = LoggerFactory.getLogger(Http2ParentChannelExceptionHandler.class);
44+
45+
@Override
46+
public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) {
47+
if (logger.isDebugEnabled()) {
48+
logger.debug("Exception on HTTP/2 parent connection [id:{}]: {}",
49+
ctx.channel().id().asShortText(), cause.toString(), cause);
50+
}
51+
// Do NOT close the channel or alter connection lifecycle.
52+
// Reactor-netty and the pool eviction predicate handle that independently.
53+
}
54+
}

sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/http/ReactorNettyClient.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,25 @@ private void configureChannelPipelineHandlers() {
164164
"customHeaderCleaner",
165165
new Http2ResponseHeaderCleanerHandler());
166166
}
167+
168+
// Install exception handler on the HTTP/2 parent (TCP) channel.
169+
// In H2, doOnConnected fires for stream (child) channels — channel.parent()
170+
// is the TCP connection. The parent pipeline has no ChannelOperationsHandler
171+
// (unlike H1.1), so TCP-level exceptions (RST, broken pipe) propagate to
172+
// Netty's TailContext and get logged as WARN. This handler matches H1.1
173+
// behavior by consuming exceptions at DEBUG level.
174+
Channel parent = connection.channel().parent();
175+
if (parent != null
176+
&& parent.pipeline().get(Http2ParentChannelExceptionHandler.HANDLER_NAME) == null) {
177+
178+
try {
179+
parent.pipeline().addLast(
180+
Http2ParentChannelExceptionHandler.HANDLER_NAME,
181+
new Http2ParentChannelExceptionHandler());
182+
} catch (IllegalArgumentException ignored) {
183+
// Duplicate handler — already installed by a concurrent stream
184+
}
185+
}
167186
}));
168187
}
169188
}

0 commit comments

Comments
 (0)