From e92b204048213f4b39f10128994128fa800f5833 Mon Sep 17 00:00:00 2001 From: jianminzhao <76990468+jianminzhao@users.noreply.github.com> Date: Fri, 17 Apr 2026 11:00:05 -0700 Subject: [PATCH 1/2] CBL-8071: Do not retry replicator connections if WebSocket being closed abnormally The Abnormal Close occurs after the client is already decided to close the socket but fails to receive response from the server. We don't want to retry replicating on this condition. We fix it by a change in WebSocket, that it now treats a connection closure that is not fully complete only due to a pending final server acknowledgement (i.e. CLOSE frame was already sent). --- Networking/WebSockets/WebSocketImpl.cc | 3 +- Replicator/tests/ReplicatorAPITest.cc | 80 ++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 1 deletion(-) diff --git a/Networking/WebSockets/WebSocketImpl.cc b/Networking/WebSockets/WebSocketImpl.cc index d9e2117a18..704bb17683 100644 --- a/Networking/WebSockets/WebSocketImpl.cc +++ b/Networking/WebSockets/WebSocketImpl.cc @@ -539,7 +539,8 @@ namespace litecore::websocket { if ( clean ) { status.reason = kWebSocketClose; - if ( !expected ) status.code = kCodeAbnormal; + // if (!expected) then _closeSent => !_closeReceived + if ( !expected ) status.code = _closeSent ? kCodeNormal : kCodeAbnormal; else if ( !_closeMessage ) status.code = kCodeNormal; else { diff --git a/Replicator/tests/ReplicatorAPITest.cc b/Replicator/tests/ReplicatorAPITest.cc index f562f69455..cf3f21a19f 100644 --- a/Replicator/tests/ReplicatorAPITest.cc +++ b/Replicator/tests/ReplicatorAPITest.cc @@ -828,6 +828,86 @@ TEST_CASE_METHOD(ReplicatorAPITest, "Stop after transient connect failure", "[C] waitForStatus(kC4Stopped); } +// CBL-8074 +TEST_CASE_METHOD(ReplicatorAPITest, "WebSocket Peer Going Away", "[C][Push][Pull]") { + bool afterClose = false; + C4SocketFactory factory = {}; + C4Socket* c4socket = nullptr; + factory.context = &c4socket; + factory.open = [](C4Socket* socket, const C4Address* addr, C4Slice options, void* context) { + c4socket_opened(socket); + *(C4Socket**)context = socket; + }; + + factory.close = [](C4Socket* socket) { + // Not invoked + REQUIRE(false); + }; + + // "peer going away" before CLOSE is sent + // Replicator receives error code 1006, which is transient. + // C4Replicator goes to offline and waiting for retry. + SECTION("CLOSE Not Sent") { + afterClose = false; + _mayGoOffline = true; + factory.write = [](C4Socket* socket, C4SliceResult msg) { + // Simulate Peer-Going-Away before Replicator calling Stop. + // Socket is closed unexpectedly, without the client sending CLOSE + FLSliceResult_Release(msg); + c4socket_closed(socket, {WebSocketDomain, websocket::kCodeGoingAway}); + }; + } + + // "peer going away" after CLOSE frame was already sent + // Since the replicator is already stopped when the peer goes away, WebSocket will + // treat it as Normal Close. + SECTION("CLOSE Has Been Sent") { + afterClose = true; + _mayGoOffline = false; + factory.write = [](C4Socket* socket, C4SliceResult msg) { + // Do nothing + FLSliceResult_Release(msg); + }; + } + + _socketFactory = &factory; + C4Error err; + importJSONLines(sFixturesDir + "names_100.json"); + + if ( !afterClose ) { + // WebSocket code 1006, transient error + REQUIRE(startReplicator(kC4Disabled, kC4OneShot, WITH_ERROR(&err))); + _numCallbacksWithLevel[kC4Offline] = 0; + waitForStatus(kC4Offline); + } else { + REQUIRE(startReplicator(kC4Disabled, kC4Continuous, WITH_ERROR(&err))); + // Making sure the WebSocket is open/connected + waitForStatus(kC4Busy); + } + + c4repl_stop(_repl); + + if ( afterClose ) { + // Give some time for Replicator::_stop to be called, but before timeout in WebSocketImpl + // to not get Timeout error. + std::this_thread::sleep_for(1s); + // WebSocket will treat it as Normal Close + c4socket_closed(c4socket, {WebSocketDomain, websocket::kCodeGoingAway}); + } + + waitForStatus(kC4Stopped); + + auto status = c4repl_getStatus(_repl); + + if ( !afterClose ) { + // kCodeAbnormal == 1006 + CHECK((status.error.domain == WebSocketDomain && status.error.code == websocket::kCodeAbnormal)); + } else { + // "peer going away" after stop results in normal Stop. + CHECK(status.error.code == 0); + } +} + TEST_CASE_METHOD(ReplicatorAPITest, "Calling c4socket_ method after STOP", "[C][Push][Pull]") { // c.f. the flow with test case "Stop after transient connect failure" _mayGoOffline = true; From 37d384001d9d62c4540bd8b631f85aee4a991671 Mon Sep 17 00:00:00 2001 From: Jianmin Zhao Date: Wed, 29 Apr 2026 18:00:11 -0700 Subject: [PATCH 2/2] addressing review comment. --- Networking/WebSockets/WebSocketImpl.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Networking/WebSockets/WebSocketImpl.cc b/Networking/WebSockets/WebSocketImpl.cc index 704bb17683..5e8e9cdea4 100644 --- a/Networking/WebSockets/WebSocketImpl.cc +++ b/Networking/WebSockets/WebSocketImpl.cc @@ -539,7 +539,7 @@ namespace litecore::websocket { if ( clean ) { status.reason = kWebSocketClose; - // if (!expected) then _closeSent => !_closeReceived + // If !expected, it follows that _closeSent implies !_closeReceived if ( !expected ) status.code = _closeSent ? kCodeNormal : kCodeAbnormal; else if ( !_closeMessage ) status.code = kCodeNormal;