Skip to content

Commit b2cf8a9

Browse files
committed
[grid] Skip the TCP tunnel read-idle close when reads are paused
The read-idle close that TcpUpgradeTunnelHandler installs after the tunnel is established (introduced by db9b07a, 2026-03-11, "[grid] Router WebSocket handle dropped close frames, idle disconnects, high-latency proxying", #17197) tears down both tunnel channels when no bytes have been received for 120 seconds. That is the right behaviour when an intermediate load balancer has silently dropped the TCP connection, but it interacts badly with the backpressure mirroring added in 5cf2e2a (2026-05-26, "[grid] Apply TCP backpressure across the WebSocket tunnel handler", #17543): when the peer's outbound buffer crosses its high-water mark, TcpTunnelHandler sets autoRead=false on this side, no channelRead events fire, and the read-idle timer reaches its threshold even though the stall is by design. A sustained slow consumer for more than two minutes will therefore have the tunnel torn down underneath it. Gate the close in IdleCloseHandler.userEventTriggered on the channel's autoRead flag: while reads are paused by backpressure, log at FINE and ignore the event. As soon as the peer drains and TcpTunnelHandler restores autoRead=true the read-idle clock starts again from a fresh read, so a legitimately dropped connection is still detected within the same window once traffic resumes. IdleCloseHandler is promoted from a private nested class to package-private so a focused EmbeddedChannel unit test can exercise both branches: the close-both-channels behaviour on a normal idle event, and the ignored-while-paused behaviour with autoRead=false.
1 parent 83b0c35 commit b2cf8a9

3 files changed

Lines changed: 71 additions & 1 deletion

File tree

java/src/org/openqa/selenium/netty/server/TcpUpgradeTunnelHandler.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,7 @@ public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) {
379379
* the TCP connection without sending a FIN or RST (common with AWS ALB, k8s ingress-nginx at
380380
* their default 60 s idle timeout).
381381
*/
382-
private static final class IdleCloseHandler extends ChannelInboundHandlerAdapter {
382+
static final class IdleCloseHandler extends ChannelInboundHandlerAdapter {
383383

384384
private final Channel peer;
385385

@@ -390,6 +390,18 @@ private static final class IdleCloseHandler extends ChannelInboundHandlerAdapter
390390
@Override
391391
public void userEventTriggered(ChannelHandlerContext ctx, Object evt) throws Exception {
392392
if (evt instanceof IdleStateEvent) {
393+
// Read-idle while backpressure has paused this channel is expected, not a sign of a
394+
// dropped connection: the peer's outbound buffer crossed its high-water mark, the
395+
// TcpTunnelHandler set autoRead=false on this side, and bytes will not be read again
396+
// until the peer drains. Skip the close so a sustained slow consumer does not get the
397+
// tunnel torn down underneath it.
398+
if (!ctx.channel().config().isAutoRead()) {
399+
LOG.log(
400+
Level.FINE,
401+
"TCP tunnel read-idle on {0} ignored: reads paused by backpressure",
402+
ctx.channel());
403+
return;
404+
}
393405
LOG.log(
394406
Level.FINE,
395407
"TCP tunnel read-idle timeout on {0}, closing both channels",

java/test/org/openqa/selenium/netty/server/BUILD.bazel

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ load("@rules_jvm_external//:defs.bzl", "artifact")
22
load("//java:defs.bzl", "JUNIT5_DEPS", "java_library", "java_test_suite")
33

44
SMALL_TEST_SRCS = [
5+
"IdleCloseHandlerTest.java",
56
"MessageInboundConverterTest.java",
67
"MessageOutboundConverterTest.java",
78
"RequestConverterTest.java",
@@ -20,6 +21,7 @@ java_test_suite(
2021
artifact("io.netty:netty-buffer"),
2122
artifact("io.netty:netty-codec-http"),
2223
artifact("io.netty:netty-common"),
24+
artifact("io.netty:netty-handler"),
2325
artifact("io.netty:netty-transport"),
2426
artifact("org.junit.jupiter:junit-jupiter-api"),
2527
artifact("org.assertj:assertj-core"),
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// Licensed to the Software Freedom Conservancy (SFC) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The SFC licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
package org.openqa.selenium.netty.server;
19+
20+
import static org.assertj.core.api.Assertions.assertThat;
21+
22+
import io.netty.channel.embedded.EmbeddedChannel;
23+
import io.netty.handler.timeout.IdleStateEvent;
24+
import org.junit.jupiter.api.Test;
25+
26+
class IdleCloseHandlerTest {
27+
28+
@Test
29+
void idleEventClosesBothChannels() {
30+
EmbeddedChannel peer = new EmbeddedChannel();
31+
EmbeddedChannel self = new EmbeddedChannel(new TcpUpgradeTunnelHandler.IdleCloseHandler(peer));
32+
33+
assertThat(self.config().isAutoRead()).isTrue();
34+
self.pipeline().fireUserEventTriggered(IdleStateEvent.READER_IDLE_STATE_EVENT);
35+
36+
assertThat(self.isOpen()).isFalse();
37+
assertThat(peer.isOpen()).isFalse();
38+
}
39+
40+
@Test
41+
void idleEventIsIgnoredWhileBackpressureHasPausedReads() {
42+
EmbeddedChannel peer = new EmbeddedChannel();
43+
EmbeddedChannel self = new EmbeddedChannel(new TcpUpgradeTunnelHandler.IdleCloseHandler(peer));
44+
45+
// Simulate backpressure: TcpTunnelHandler would have set autoRead=false on this channel
46+
// because the peer's outbound buffer was full.
47+
self.config().setAutoRead(false);
48+
49+
// The read-idle event must NOT tear down the tunnel — no bytes arriving is the expected
50+
// consequence of pausing reads, not a sign of a dropped connection.
51+
self.pipeline().fireUserEventTriggered(IdleStateEvent.READER_IDLE_STATE_EVENT);
52+
53+
assertThat(self.isOpen()).isTrue();
54+
assertThat(peer.isOpen()).isTrue();
55+
}
56+
}

0 commit comments

Comments
 (0)