Skip to content

Commit ef48769

Browse files
CDRIVER-6092 CDRIVER-6262 CDRIVER-6268 implement exponential backoff and jitter in retry loops (#2240)
* Implement backpressure prose tests 1-4 * Add backpressure and other related unified tests * Add mongoc_retry_backoff_generator_t * Refactor with_transaction to use mongoc_retry_backoff_generator_t * Add mongoc_token_bucket_t * Implement overload retry loops * Add mongoc_retryable_cmd_t * Refactor retry loops to use mongoc_retryable_cmd_t * Fix unified test runner --------- Co-authored-by: Kevin Albertson <kevin.albertson@mongodb.com>
1 parent 4302c68 commit ef48769

45 files changed

Lines changed: 11841 additions & 431 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

NEWS

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,15 @@
1+
libmongoc 2.3.0 [UNRELEASED]
2+
============================
3+
4+
## New Features
5+
6+
- Support retry for server overload errors.
7+
- Supported on all commands.
8+
- Custom application retry logic may need to be adjusted to avoid retrying too long.
9+
- Upgrade is recommended to avoid impacts of server changes related to overload errors.
10+
- If not upgrading, custom application retry logic may need to be adjusted to handle higher rates of overload errors.
11+
- Set the URI option `adaptiveRetry=true` limit retries with a token bucket system.
12+
113
libmongoc 2.2.3
214
===============
315

src/libmongoc/CMakeLists.txt

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -567,7 +567,6 @@ set (MONGOC_SOURCES
567567
${PROJECT_SOURCE_DIR}/src/mongoc/mongoc-cursor.c
568568
${PROJECT_SOURCE_DIR}/src/mongoc/mongoc-cursor-cmd.c
569569
${PROJECT_SOURCE_DIR}/src/mongoc/mongoc-cursor-change-stream.c
570-
${PROJECT_SOURCE_DIR}/src/mongoc/mongoc-cursor-cmd-deprecated.c
571570
${PROJECT_SOURCE_DIR}/src/mongoc/mongoc-cursor-find.c
572571
${PROJECT_SOURCE_DIR}/src/mongoc/mongoc-cursor-array.c
573572
${PROJECT_SOURCE_DIR}/src/mongoc/mongoc-database.c
@@ -607,6 +606,8 @@ set (MONGOC_SOURCES
607606
${PROJECT_SOURCE_DIR}/src/mongoc/mongoc-read-concern.c
608607
${PROJECT_SOURCE_DIR}/src/mongoc/mongoc-read-prefs.c
609608
${PROJECT_SOURCE_DIR}/src/mongoc/mongoc-rpc.c
609+
${PROJECT_SOURCE_DIR}/src/mongoc/mongoc-retryable-cmd.c
610+
${PROJECT_SOURCE_DIR}/src/mongoc/mongoc-retry-backoff-generator.c
610611
${PROJECT_SOURCE_DIR}/src/mongoc/mongoc-server-api.c
611612
${PROJECT_SOURCE_DIR}/src/mongoc/mongoc-server-description.c
612613
${PROJECT_SOURCE_DIR}/src/mongoc/mongoc-server-stream.c
@@ -624,6 +625,7 @@ set (MONGOC_SOURCES
624625
${PROJECT_SOURCE_DIR}/src/mongoc/mongoc-stream-socket.c
625626
${PROJECT_SOURCE_DIR}/src/mongoc/mongoc-structured-log.c
626627
${PROJECT_SOURCE_DIR}/src/mongoc/mongoc-timeout.c
628+
${PROJECT_SOURCE_DIR}/src/mongoc/mongoc-token-bucket.c
627629
${PROJECT_SOURCE_DIR}/src/mongoc/mongoc-topology.c
628630
${PROJECT_SOURCE_DIR}/src/mongoc/mongoc-topology-background-monitoring.c
629631
${PROJECT_SOURCE_DIR}/src/mongoc/mongoc-topology-description.c
@@ -1091,7 +1093,6 @@ set (test-libmongoc-sources
10911093
${PROJECT_SOURCE_DIR}/tests/test-mongoc-hedged-reads.c
10921094
${PROJECT_SOURCE_DIR}/tests/test-mongoc-http.c
10931095
${PROJECT_SOURCE_DIR}/tests/test-mongoc-interrupt.c
1094-
${PROJECT_SOURCE_DIR}/tests/test-mongoc-jitter-source.c
10951096
${PROJECT_SOURCE_DIR}/tests/test-mongoc-linux-distro-scanner.c
10961097
${PROJECT_SOURCE_DIR}/tests/test-mongoc-list.c
10971098
${PROJECT_SOURCE_DIR}/tests/test-mongoc-loadbalanced.c
@@ -1108,6 +1109,7 @@ set (test-libmongoc-sources
11081109
${PROJECT_SOURCE_DIR}/tests/test-mongoc-read-concern.c
11091110
${PROJECT_SOURCE_DIR}/tests/test-mongoc-read-prefs.c
11101111
${PROJECT_SOURCE_DIR}/tests/test-mongoc-read-write-concern.c
1112+
${PROJECT_SOURCE_DIR}/tests/test-mongoc-retry-backoff-generator.c
11111113
${PROJECT_SOURCE_DIR}/tests/test-mongoc-retryability-helpers.c
11121114
${PROJECT_SOURCE_DIR}/tests/test-mongoc-retryable-reads.c
11131115
${PROJECT_SOURCE_DIR}/tests/test-mongoc-retryable-writes.c
@@ -1130,6 +1132,7 @@ set (test-libmongoc-sources
11301132
${PROJECT_SOURCE_DIR}/tests/test-mongoc-structured-log.c
11311133
${PROJECT_SOURCE_DIR}/tests/test-mongoc-thread.c
11321134
${PROJECT_SOURCE_DIR}/tests/test-mongoc-timeout.c
1135+
${PROJECT_SOURCE_DIR}/tests/test-mongoc-token-bucket.c
11331136
${PROJECT_SOURCE_DIR}/tests/test-mongoc-topology-description.c
11341137
${PROJECT_SOURCE_DIR}/tests/test-mongoc-topology-reconcile.c
11351138
${PROJECT_SOURCE_DIR}/tests/test-mongoc-topology-scanner.c

src/libmongoc/doc/mongoc_uri_t.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ Constant Key Def
9797
MONGOC_URI_RETRYREADS retryreads true If "true" and the server is a MongoDB 3.6+ standalone, replica set, or sharded cluster, the driver safely retries a read that failed due to a network error or replica set failover.
9898
MONGOC_URI_RETRYWRITES retrywrites true if driver built w/ TLS If "true" and the server is a MongoDB 3.6+ replica set or sharded cluster, the driver safely retries a write that failed due to a network error or replica set failover. Only inserts, updates of single documents, or deletes of single
9999
documents are retried.
100+
MONGOC_URI_ADAPTIVERETRIES adaptiveretries false If "true", the driver will limit retry attempts during periods of prolonged server overload.
100101
MONGOC_URI_APPNAME appname Empty (no appname) The client application name. This value is used by MongoDB when it logs connection information and profile information, such as slow queries.
101102
MONGOC_URI_TLS tls Empty (not set, same as false) {true|false}, indicating if TLS must be used. (See also :symbol:`mongoc_client_set_ssl_opts` and :symbol:`mongoc_client_pool_set_ssl_opts`.)
102103
MONGOC_URI_COMPRESSORS compressors Empty (no compressors) Comma separated list of compressors, if any, to use to compress the wire protocol messages. Snappy, zlib, and zstd are optional build time dependencies, and enable the "snappy", "zlib", and "zstd" values respectively.

src/libmongoc/src/mongoc/mongoc-bulkwrite.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1853,8 +1853,14 @@ mongoc_bulkwrite_execute(mongoc_bulkwrite_t *self, const mongoc_bulkwriteopts_t
18531853
// Send command.
18541854
{
18551855
mongoc_server_stream_t *new_ss = NULL;
1856-
bool ok = mongoc_cluster_run_retryable_write(
1857-
&self->client->cluster, &parts.assembled, parts.is_retryable_write, &new_ss, &cmd_reply, &error);
1856+
bool ok = mongoc_cluster_run_retryable_write(&self->client->cluster,
1857+
&parts.assembled,
1858+
parts.is_retryable_write,
1859+
self->client->jitter_source,
1860+
self->client->topology->token_bucket,
1861+
&new_ss,
1862+
&cmd_reply,
1863+
&error);
18581864
if (new_ss) {
18591865
// A retry occurred. Save the newly created stream to use for subsequent commands.
18601866
mongoc_server_stream_cleanup(ss);

src/libmongoc/src/mongoc/mongoc-client-private.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@
2626
#include <mongoc/mongoc-apm-private.h>
2727
#include <mongoc/mongoc-buffer-private.h>
2828
#include <mongoc/mongoc-cluster-private.h>
29+
#include <mongoc/mongoc-jitter-source-private.h>
2930
#include <mongoc/mongoc-rpc-private.h>
31+
#include <mongoc/mongoc-token-bucket-private.h>
3032

3133
#include <mongoc/mongoc-config.h>
3234
#include <mongoc/mongoc-host-list.h>
@@ -90,6 +92,8 @@ BSON_BEGIN_DECLS
9092
#define WIRE_VERSION_MIN WIRE_VERSION_4_2 /* a.k.a. minWireVersion */
9193
#define WIRE_VERSION_MAX WIRE_VERSION_8_0 /* a.k.a. maxWireVersion */
9294

95+
#define MONGOC_DEFAULT_RETRY_TOKEN_CAPACITY 1000.0
96+
9397
struct _mongoc_collection_t;
9498

9599
struct _mongoc_client_t {
@@ -121,6 +125,8 @@ struct _mongoc_client_t {
121125
unsigned int csid_rand_seed;
122126

123127
uint32_t generation;
128+
129+
mongoc_jitter_source_t *jitter_source;
124130
};
125131

126132
/* Defines whether _mongoc_client_command_with_opts() is acting as a read
@@ -233,6 +239,9 @@ _mongoc_client_set_stream_initiator_single_or_pooled(mongoc_client_t *client,
233239
mongoc_stream_initiator_t initiator,
234240
void *user_data);
235241

242+
void
243+
_mongoc_client_set_jitter_source(mongoc_client_t *client, mongoc_jitter_source_t *source);
244+
236245
BSON_END_DECLS
237246

238247
#endif /* MONGOC_CLIENT_PRIVATE_H */

src/libmongoc/src/mongoc/mongoc-client-session.c

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include <mongoc/mongoc-rand-private.h>
2424
#include <mongoc/mongoc-read-concern-private.h>
2525
#include <mongoc/mongoc-read-prefs-private.h>
26+
#include <mongoc/mongoc-retry-backoff-generator-private.h>
2627
#include <mongoc/mongoc-trace-private.h>
2728
#include <mongoc/mongoc-util-private.h>
2829

@@ -891,6 +892,10 @@ _max_time_ms_failure(bson_t *reply)
891892
return false;
892893
}
893894

895+
#define MONGOC_WITH_TRANSACTION_RETRY_BACKOFF_GROWTH_FACTOR 1.5
896+
#define MONGOC_WITH_TRANSACTION_RETRY_BACKOFF_INITIAL mlib_duration(5, ms)
897+
#define MONGOC_WITH_TRANSACTION_RETRY_BACKOFF_MAX mlib_duration(500, ms)
898+
894899
bool
895900
mongoc_client_session_with_transaction(mongoc_client_session_t *session,
896901
mongoc_client_session_with_transaction_cb_t cb,
@@ -911,7 +916,16 @@ mongoc_client_session_with_transaction(mongoc_client_session_t *session,
911916

912917
const mlib_timer timer = mlib_expires_after(timeout, ms);
913918

914-
int transaction_attempt = 0;
919+
const mongoc_retry_backoff_params_t retry_backoff_params = {
920+
.growth_factor = MONGOC_WITH_TRANSACTION_RETRY_BACKOFF_GROWTH_FACTOR,
921+
.backoff_initial = MONGOC_WITH_TRANSACTION_RETRY_BACKOFF_INITIAL,
922+
.backoff_max = MONGOC_WITH_TRANSACTION_RETRY_BACKOFF_MAX,
923+
};
924+
925+
mongoc_retry_backoff_generator_t *const retry_backoff_generator =
926+
_mongoc_retry_backoff_generator_new(retry_backoff_params, session->jitter_source);
927+
928+
bool is_first_attempt = true;
915929

916930
/* Attempt to wrap a user callback in start- and end- transaction semantics.
917931
If this fails for transient reasons, restart, either from the very
@@ -921,10 +935,10 @@ mongoc_client_session_with_transaction(mongoc_client_session_t *session,
921935
At the top of this loop, active_reply should always be NULL, and
922936
local_reply should always be uninitialized. */
923937
while (true) {
924-
if (transaction_attempt > 0) {
925-
const double jitter = _mongoc_jitter_source_generate(session->jitter_source);
926-
927-
const mlib_duration backoff_duration = _mongoc_compute_backoff_duration(jitter, transaction_attempt);
938+
if (is_first_attempt) {
939+
is_first_attempt = false;
940+
} else {
941+
const mlib_duration backoff_duration = _mongoc_retry_backoff_generator_next(retry_backoff_generator);
928942

929943
const mlib_timer backoff_timer = mlib_expires_after(backoff_duration);
930944

@@ -944,8 +958,6 @@ mongoc_client_session_with_transaction(mongoc_client_session_t *session,
944958
GOTO(done);
945959
}
946960

947-
transaction_attempt = BSON_MIN(transaction_attempt + 1, MONGOC_BACKOFF_ATTEMPT_LIMIT);
948-
949961
res = cb(session, ctx, &active_reply, error);
950962
state = session->txn.state;
951963

@@ -1030,6 +1042,8 @@ mongoc_client_session_with_transaction(mongoc_client_session_t *session,
10301042
}
10311043

10321044
done:
1045+
_mongoc_retry_backoff_generator_destroy(retry_backoff_generator);
1046+
10331047
/* At this point, active_reply is either pointing to the user's reply
10341048
object, or our local one on the stack, or is NULL. */
10351049
if (reply && active_reply) {

0 commit comments

Comments
 (0)