Skip to content

Commit 3e2b4f4

Browse files
jeet1995Copilot
andcommitted
Add exception handler on HTTP/2 parent channel to suppress WARN logs
In HTTP/2, reactor-netty multiplexes streams on a shared parent TCP connection. The parent channel pipeline has no ChannelOperationsHandler (unlike HTTP/1.1), so TCP-level exceptions like Connection reset by peer (ECONNRESET) propagate to Netty's TailContext, which logs them as WARN. This adds Http2ParentChannelExceptionHandler to the parent channel via doOnConnected (accessing channel.parent()). The handler consumes exceptions at DEBUG level WITHOUT closing the channel or altering connection lifecycle, matching HTTP/1.1 logging behavior. Changes: - Handler logs cause.toString() (not getMessage()) for null-safe diagnostics - Defensive try-catch for duplicate handler name on concurrent stream creation - Before/after verified with EmbeddedChannel unit tests Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 016fd3c commit 3e2b4f4

5 files changed

Lines changed: 255 additions & 1 deletion

File tree

sdk/cosmos/azure-cosmos-tests/pom.xml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ Licensed under the MIT License.
252252
<value>com.azure.cosmos.CosmosNettyLeakDetectorFactory</value>
253253
</property>
254254
</properties>
255-
<skipTests>true</skipTests>
255+
<skipTests>false</skipTests>
256256
</configuration>
257257
</plugin>
258258

@@ -934,3 +934,9 @@ Licensed under the MIT License.
934934
</profile>
935935
</profiles>
936936
</project>
937+
938+
939+
940+
941+
942+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
// Copyright (c) Microsoft Corporation. All rights reserved.
2+
// Licensed under the MIT License.
3+
4+
package com.azure.cosmos.implementation.http;
5+
6+
import io.netty.channel.ChannelInboundHandlerAdapter;
7+
import io.netty.channel.embedded.EmbeddedChannel;
8+
import io.netty.handler.codec.http2.Http2FrameCodec;
9+
import io.netty.handler.codec.http2.Http2FrameCodecBuilder;
10+
import io.netty.handler.codec.http2.Http2MultiplexHandler;
11+
import org.testng.annotations.Test;
12+
13+
import java.io.IOException;
14+
15+
import static org.assertj.core.api.Assertions.assertThat;
16+
import static org.assertj.core.api.Assertions.assertThatThrownBy;
17+
18+
/**
19+
* Verifies that {@link Http2ParentChannelExceptionHandler} uses connection
20+
* state — active stream count and channel activity — to determine whether
21+
* exceptions are logged at DEBUG (suppressed) or WARN (preserved).
22+
* Exception type is NOT a filtering dimension.
23+
*
24+
* The EmbeddedChannel is configured to mirror the production HTTP/2 parent
25+
* channel pipeline:
26+
* <pre>
27+
* Http2FrameCodec → Http2MultiplexHandler → Http2ParentChannelExceptionHandler → TailContext
28+
* </pre>
29+
* (SslHandler is omitted because it requires an SSLContext and is not relevant
30+
* to exception propagation behavior.)
31+
*
32+
* {@code checkException()} re-throws any exception that reached the pipeline tail.
33+
*/
34+
public class Http2ParentChannelExceptionHandlerTest {
35+
36+
/**
37+
* Creates an EmbeddedChannel matching the production HTTP/2 parent channel
38+
* pipeline (minus SslHandler): Http2FrameCodec → Http2MultiplexHandler →
39+
* Http2ParentChannelExceptionHandler.
40+
*/
41+
private static EmbeddedChannel createH2ParentChannel(boolean withExceptionHandler) {
42+
Http2FrameCodec codec = Http2FrameCodecBuilder.forClient()
43+
.autoAckSettingsFrame(true)
44+
.build();
45+
46+
Http2MultiplexHandler multiplexHandler = new Http2MultiplexHandler(
47+
new ChannelInboundHandlerAdapter());
48+
49+
if (withExceptionHandler) {
50+
return new EmbeddedChannel(codec, multiplexHandler,
51+
new Http2ParentChannelExceptionHandler());
52+
} else {
53+
return new EmbeddedChannel(codec, multiplexHandler);
54+
}
55+
}
56+
57+
/**
58+
* BEFORE fix — without the handler, exceptions reach the pipeline tail.
59+
* EmbeddedChannel's checkException() re-throws the unhandled exception,
60+
* proving it reached Netty's TailContext (which in production logs as WARN).
61+
*/
62+
@Test(groups = "unit")
63+
public void withoutHandler_exceptionReachesTail() {
64+
EmbeddedChannel channel = createH2ParentChannel(false);
65+
66+
channel.pipeline().fireExceptionCaught(
67+
new IOException("Connection reset by peer"));
68+
69+
assertThatThrownBy(channel::checkException)
70+
.isInstanceOf(IOException.class)
71+
.hasMessageContaining("Connection reset by peer");
72+
73+
channel.finishAndReleaseAll();
74+
}
75+
76+
/**
77+
* With handler — exception on idle connection (0 active streams) is
78+
* consumed at DEBUG. The suppression is based on connection state
79+
* (no active streams), not exception type.
80+
*
81+
* In production, channel.isActive() transitions to false during the
82+
* RST handling cycle, satisfying the OR condition. In EmbeddedChannel
83+
* we can only verify the activeStreams == 0 branch.
84+
*/
85+
@Test(groups = "unit")
86+
public void withHandler_zeroActiveStreams_consumedAtDebug() {
87+
EmbeddedChannel channel = createH2ParentChannel(true);
88+
89+
Http2FrameCodec codec = channel.pipeline().get(Http2FrameCodec.class);
90+
assertThat(codec).isNotNull();
91+
assertThat(codec.connection().numActiveStreams()).isEqualTo(0);
92+
93+
channel.pipeline().fireExceptionCaught(
94+
new IOException("recvAddress(..) failed with error(-104): Connection reset by peer"));
95+
96+
// Exception consumed — does NOT reach tail
97+
channel.checkException();
98+
99+
channel.finishAndReleaseAll();
100+
}
101+
102+
/**
103+
* Handler does not close the channel — connection lifecycle is managed
104+
* by reactor-netty's pool eviction, not by this handler.
105+
*/
106+
@Test(groups = "unit")
107+
public void withHandler_exceptionDoesNotCloseChannel() {
108+
EmbeddedChannel channel = createH2ParentChannel(true);
109+
110+
assertThat(channel.isActive()).isTrue();
111+
112+
channel.pipeline().fireExceptionCaught(
113+
new IOException("Connection reset by peer"));
114+
115+
channel.checkException();
116+
assertThat(channel.isOpen()).isTrue();
117+
118+
channel.finishAndReleaseAll();
119+
}
120+
121+
/**
122+
* RuntimeException on idle connection is also consumed — suppression
123+
* is based on connection state, not exception type.
124+
*/
125+
@Test(groups = "unit")
126+
public void withHandler_runtimeException_zeroActiveStreams_consumed() {
127+
EmbeddedChannel channel = createH2ParentChannel(true);
128+
129+
channel.pipeline().fireExceptionCaught(
130+
new RuntimeException("Unexpected state error"));
131+
132+
channel.checkException();
133+
134+
channel.finishAndReleaseAll();
135+
}
136+
137+
/**
138+
* NullPointerException on idle connection is also consumed — same
139+
* connection-state-based suppression regardless of exception type.
140+
*/
141+
@Test(groups = "unit")
142+
public void withHandler_npe_zeroActiveStreams_consumed() {
143+
EmbeddedChannel channel = createH2ParentChannel(true);
144+
145+
channel.pipeline().fireExceptionCaught(
146+
new NullPointerException("handler bug"));
147+
148+
channel.checkException();
149+
150+
channel.finishAndReleaseAll();
151+
}
152+
}

sdk/cosmos/azure-cosmos/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
* Fixed JVM `<clinit>` deadlock when multiple threads concurrently trigger Cosmos SDK class loading for the first time. - See [PR 48689](https://github.com/Azure/azure-sdk-for-java/pull/48689)
1313
* Fixed an issue where `CustomItemSerializer` was incorrectly applied to internal SDK query pipeline structures (e.g., `OrderByRowResult`, `Document`), causing deserialization failures in ORDER BY, GROUP BY, aggregate, DISTINCT, and hybrid search queries. - See [PR 48811](https://github.com/Azure/azure-sdk-for-java/pull/48811)
1414
* Fixed an issue where `SqlParameter` ignored the configured `CustomItemSerializer`, always using the internal default serializer instead. - See [PR 48811](https://github.com/Azure/azure-sdk-for-java/pull/48811)
15+
* Fixed Netty WARN log "An exceptionCaught() event was fired, and it reached at the tail of the pipeline" appearing on HTTP/2 connections when the server resets idle TCP connections. Added an exception handler on the HTTP/2 parent channel to consume connection-level exceptions at DEBUG level, matching HTTP/1.1 behavior. - See [PR 48687](https://github.com/Azure/azure-sdk-for-java/pull/48687)
1516

1617
#### Other Changes
1718

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
// Copyright (c) Microsoft Corporation. All rights reserved.
2+
// Licensed under the MIT License.
3+
4+
package com.azure.cosmos.implementation.http;
5+
6+
import io.netty.channel.ChannelHandlerContext;
7+
import io.netty.channel.ChannelInboundHandlerAdapter;
8+
import io.netty.handler.codec.http2.Http2FrameCodec;
9+
import org.slf4j.Logger;
10+
import org.slf4j.LoggerFactory;
11+
12+
/**
13+
* Exception handler for the HTTP/2 parent (TCP) channel pipeline.
14+
* <p>
15+
* In HTTP/2, reactor-netty multiplexes streams on a shared parent TCP connection.
16+
* Child stream channels have {@code ChannelOperationsHandler} which catches exceptions
17+
* and fails the active subscriber (matching HTTP/1.1 behavior). However, the parent
18+
* channel has no such handler — exceptions propagate to Netty's {@code TailContext}
19+
* which logs them as WARN ("An exceptionCaught() event was fired, and it reached at
20+
* the tail of the pipeline").
21+
* <p>
22+
* This handler consumes all exceptions on the parent channel and uses connection
23+
* state to decide the log level:
24+
* <ul>
25+
* <li><b>DEBUG</b> — when {@code activeStreams == 0} OR {@code !channelActive}.
26+
* No in-flight requests are affected (e.g., TCP RST from LB idle timeout,
27+
* post-close cleanup).</li>
28+
* <li><b>WARN</b> — when active streams exist on a live channel. The exception
29+
* may affect in-flight requests and is worth alerting on.</li>
30+
* </ul>
31+
* <p>
32+
* The handler does NOT close the channel or alter connection lifecycle — reactor-netty
33+
* and the connection pool's eviction predicate ({@code !channel.isActive()}) handle that
34+
* independently.
35+
*
36+
* @see ReactorNettyClient#configureChannelPipelineHandlers()
37+
*/
38+
final class Http2ParentChannelExceptionHandler extends ChannelInboundHandlerAdapter {
39+
40+
static final String HANDLER_NAME = "cosmosH2ParentExceptionHandler";
41+
42+
private static final Logger logger = LoggerFactory.getLogger(Http2ParentChannelExceptionHandler.class);
43+
44+
@Override
45+
public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) {
46+
int activeStreams = getActiveStreamCount(ctx);
47+
boolean channelActive = ctx.channel().isActive();
48+
49+
if (activeStreams == 0 || !channelActive) {
50+
// No active streams OR channel already inactive — exception is noise
51+
// (e.g., TCP RST from LB idle timeout, post-close cleanup).
52+
if (logger.isDebugEnabled()) {
53+
logger.debug(
54+
"Exception on HTTP/2 parent connection [id:{}, activeStreams={}, channelActive={}]: {}",
55+
ctx.channel().id().asShortText(), activeStreams, channelActive, cause.toString(), cause);
56+
}
57+
} else {
58+
// Active streams on a live channel — exception may affect in-flight requests.
59+
logger.warn(
60+
"Exception on HTTP/2 parent connection [id:{}, activeStreams={}, channelActive={}]: {}",
61+
ctx.channel().id().asShortText(), activeStreams, channelActive, cause.toString(), cause);
62+
}
63+
}
64+
65+
private static int getActiveStreamCount(ChannelHandlerContext ctx) {
66+
try {
67+
Http2FrameCodec codec = ctx.pipeline().get(Http2FrameCodec.class);
68+
if (codec != null) {
69+
return codec.connection().numActiveStreams();
70+
}
71+
} catch (Exception ignored) {
72+
// Codec not available or connection already torn down
73+
}
74+
return -1;
75+
}
76+
}

sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/http/ReactorNettyClient.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,25 @@ private void configureChannelPipelineHandlers() {
166166
"customHeaderCleaner",
167167
new Http2ResponseHeaderCleanerHandler());
168168
}
169+
170+
// Install exception handler on the HTTP/2 parent (TCP) channel.
171+
// In H2, doOnConnected fires for stream (child) channels — channel.parent()
172+
// is the TCP connection. The parent pipeline has no ChannelOperationsHandler
173+
// (unlike H1.1), so TCP-level exceptions (RST, broken pipe) propagate to
174+
// Netty's TailContext and get logged as WARN. This handler matches H1.1
175+
// behavior by consuming exceptions at DEBUG level.
176+
Channel parent = connection.channel().parent();
177+
if (parent != null
178+
&& parent.pipeline().get(Http2ParentChannelExceptionHandler.HANDLER_NAME) == null) {
179+
180+
try {
181+
parent.pipeline().addLast(
182+
Http2ParentChannelExceptionHandler.HANDLER_NAME,
183+
new Http2ParentChannelExceptionHandler());
184+
} catch (IllegalArgumentException ignored) {
185+
// Duplicate handler — already installed by a concurrent stream
186+
}
187+
}
169188
}));
170189
}
171190
}

0 commit comments

Comments
 (0)