Skip to content

Commit c8b2b18

Browse files
authored
CBL-8071: Do not retry replicator connections if WebSocket being closed abnormally (#2475)
* CBL-8071: Do not retry replicator connections if WebSocket being closed abnormally The Abnormal Close occurs after the client is already decided to close the socket but fails to receive response from the server. We don't want to retry replicating on this condition. We fix it by a change in WebSocket, that it now treats a connection closure that is not fully complete only due to a pending final server acknowledgement (i.e. CLOSE frame was already sent).
1 parent 8502761 commit c8b2b18

2 files changed

Lines changed: 82 additions & 1 deletion

File tree

Networking/WebSockets/WebSocketImpl.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -539,7 +539,8 @@ namespace litecore::websocket {
539539

540540
if ( clean ) {
541541
status.reason = kWebSocketClose;
542-
if ( !expected ) status.code = kCodeAbnormal;
542+
// If !expected, it follows that _closeSent implies !_closeReceived
543+
if ( !expected ) status.code = _closeSent ? kCodeNormal : kCodeAbnormal;
543544
else if ( !_closeMessage )
544545
status.code = kCodeNormal;
545546
else {

Replicator/tests/ReplicatorAPITest.cc

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -828,6 +828,86 @@ TEST_CASE_METHOD(ReplicatorAPITest, "Stop after transient connect failure", "[C]
828828
waitForStatus(kC4Stopped);
829829
}
830830

831+
// CBL-8074
832+
TEST_CASE_METHOD(ReplicatorAPITest, "WebSocket Peer Going Away", "[C][Push][Pull]") {
833+
bool afterClose = false;
834+
C4SocketFactory factory = {};
835+
C4Socket* c4socket = nullptr;
836+
factory.context = &c4socket;
837+
factory.open = [](C4Socket* socket, const C4Address* addr, C4Slice options, void* context) {
838+
c4socket_opened(socket);
839+
*(C4Socket**)context = socket;
840+
};
841+
842+
factory.close = [](C4Socket* socket) {
843+
// Not invoked
844+
REQUIRE(false);
845+
};
846+
847+
// "peer going away" before CLOSE is sent
848+
// Replicator receives error code 1006, which is transient.
849+
// C4Replicator goes to offline and waiting for retry.
850+
SECTION("CLOSE Not Sent") {
851+
afterClose = false;
852+
_mayGoOffline = true;
853+
factory.write = [](C4Socket* socket, C4SliceResult msg) {
854+
// Simulate Peer-Going-Away before Replicator calling Stop.
855+
// Socket is closed unexpectedly, without the client sending CLOSE
856+
FLSliceResult_Release(msg);
857+
c4socket_closed(socket, {WebSocketDomain, websocket::kCodeGoingAway});
858+
};
859+
}
860+
861+
// "peer going away" after CLOSE frame was already sent
862+
// Since the replicator is already stopped when the peer goes away, WebSocket will
863+
// treat it as Normal Close.
864+
SECTION("CLOSE Has Been Sent") {
865+
afterClose = true;
866+
_mayGoOffline = false;
867+
factory.write = [](C4Socket* socket, C4SliceResult msg) {
868+
// Do nothing
869+
FLSliceResult_Release(msg);
870+
};
871+
}
872+
873+
_socketFactory = &factory;
874+
C4Error err;
875+
importJSONLines(sFixturesDir + "names_100.json");
876+
877+
if ( !afterClose ) {
878+
// WebSocket code 1006, transient error
879+
REQUIRE(startReplicator(kC4Disabled, kC4OneShot, WITH_ERROR(&err)));
880+
_numCallbacksWithLevel[kC4Offline] = 0;
881+
waitForStatus(kC4Offline);
882+
} else {
883+
REQUIRE(startReplicator(kC4Disabled, kC4Continuous, WITH_ERROR(&err)));
884+
// Making sure the WebSocket is open/connected
885+
waitForStatus(kC4Busy);
886+
}
887+
888+
c4repl_stop(_repl);
889+
890+
if ( afterClose ) {
891+
// Give some time for Replicator::_stop to be called, but before timeout in WebSocketImpl
892+
// to not get Timeout error.
893+
std::this_thread::sleep_for(1s);
894+
// WebSocket will treat it as Normal Close
895+
c4socket_closed(c4socket, {WebSocketDomain, websocket::kCodeGoingAway});
896+
}
897+
898+
waitForStatus(kC4Stopped);
899+
900+
auto status = c4repl_getStatus(_repl);
901+
902+
if ( !afterClose ) {
903+
// kCodeAbnormal == 1006
904+
CHECK((status.error.domain == WebSocketDomain && status.error.code == websocket::kCodeAbnormal));
905+
} else {
906+
// "peer going away" after stop results in normal Stop.
907+
CHECK(status.error.code == 0);
908+
}
909+
}
910+
831911
TEST_CASE_METHOD(ReplicatorAPITest, "Calling c4socket_ method after STOP", "[C][Push][Pull]") {
832912
// c.f. the flow with test case "Stop after transient connect failure"
833913
_mayGoOffline = true;

0 commit comments

Comments
 (0)