Skip to content

Commit 84814b2

Browse files
authored
PYTHON-5731 - Server selection deprioritization only for overload errors on replica sets (#2710)
1 parent 908102d commit 84814b2

File tree

4 files changed

+166
-2
lines changed

4 files changed

+166
-2
lines changed

pymongo/asynchronous/mongo_client.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2825,7 +2825,11 @@ async def run(self) -> T:
28252825
if self._last_error is None:
28262826
self._last_error = exc
28272827

2828-
if self._server is not None:
2828+
if (
2829+
self._server is not None
2830+
and self._client.topology_description.topology_type_name == "Sharded"
2831+
or exc.has_error_label("SystemOverloadedError")
2832+
):
28292833
self._deprioritized_servers.append(self._server)
28302834

28312835
def _is_not_eligible_for_retry(self) -> bool:

pymongo/synchronous/mongo_client.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2815,7 +2815,11 @@ def run(self) -> T:
28152815
if self._last_error is None:
28162816
self._last_error = exc
28172817

2818-
if self._server is not None:
2818+
if (
2819+
self._server is not None
2820+
and self._client.topology_description.topology_type_name == "Sharded"
2821+
or exc.has_error_label("SystemOverloadedError")
2822+
):
28192823
self._deprioritized_servers.append(self._server)
28202824

28212825
def _is_not_eligible_for_retry(self) -> bool:

test/asynchronous/test_retryable_reads.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,84 @@ async def test_retryable_reads_are_retried_on_the_same_implicit_session(self):
261261
self.assertEqual(command_docs[0]["lsid"], command_docs[1]["lsid"])
262262
self.assertIsNot(command_docs[0], command_docs[1])
263263

264+
@async_client_context.require_replica_set
265+
@async_client_context.require_secondaries_count(1)
266+
@async_client_context.require_failCommand_fail_point
267+
@async_client_context.require_version_min(4, 4, 0)
268+
async def test_03_01_retryable_reads_caused_by_overload_errors_are_retried_on_a_different_replicaset_server_when_one_is_available(
269+
self
270+
):
271+
listener = OvertCommandListener()
272+
273+
# 1. Create a client `client` with `retryReads=true`, `readPreference=primaryPreferred`, and command event monitoring enabled.
274+
client = await self.async_rs_or_single_client(
275+
event_listeners=[listener], retryReads=True, readPreference="primaryPreferred"
276+
)
277+
278+
# 2. Configure a fail point with the RetryableError and SystemOverloadedError error labels.
279+
command_args = {
280+
"configureFailPoint": "failCommand",
281+
"mode": {"times": 1},
282+
"data": {
283+
"failCommands": ["find"],
284+
"errorLabels": ["RetryableError", "SystemOverloadedError"],
285+
"errorCode": 6,
286+
},
287+
}
288+
await async_set_fail_point(client, command_args)
289+
290+
# 3. Reset the command event monitor to clear the fail point command from its stored events.
291+
listener.reset()
292+
293+
# 4. Execute a `find` command with `client`.
294+
await client.t.t.find_one({})
295+
296+
# 5. Assert that one failed command event and one successful command event occurred.
297+
self.assertEqual(len(listener.failed_events), 1)
298+
self.assertEqual(len(listener.succeeded_events), 1)
299+
300+
# 6. Assert that both events occurred on different servers.
301+
assert listener.failed_events[0].connection_id != listener.succeeded_events[0].connection_id
302+
303+
@async_client_context.require_replica_set
304+
@async_client_context.require_secondaries_count(1)
305+
@async_client_context.require_failCommand_fail_point
306+
@async_client_context.require_version_min(4, 4, 0)
307+
async def test_03_02_retryable_reads_caused_by_non_overload_errors_are_retried_on_the_same_replicaset_server(
308+
self
309+
):
310+
listener = OvertCommandListener()
311+
312+
# 1. Create a client `client` with `retryReads=true`, `readPreference=primaryPreferred`, and command event monitoring enabled.
313+
client = await self.async_rs_or_single_client(
314+
event_listeners=[listener], retryReads=True, readPreference="primaryPreferred"
315+
)
316+
317+
# 2. Configure a fail point with the RetryableError error label.
318+
command_args = {
319+
"configureFailPoint": "failCommand",
320+
"mode": {"times": 1},
321+
"data": {
322+
"failCommands": ["find"],
323+
"errorLabels": ["RetryableError"],
324+
"errorCode": 6,
325+
},
326+
}
327+
await async_set_fail_point(client, command_args)
328+
329+
# 3. Reset the command event monitor to clear the fail point command from its stored events.
330+
listener.reset()
331+
332+
# 4. Execute a `find` command with `client`.
333+
await client.t.t.find_one({})
334+
335+
# 5. Assert that one failed command event and one successful command event occurred.
336+
self.assertEqual(len(listener.failed_events), 1)
337+
self.assertEqual(len(listener.succeeded_events), 1)
338+
339+
# 6. Assert that both events occurred the same server.
340+
assert listener.failed_events[0].connection_id == listener.succeeded_events[0].connection_id
341+
264342

265343
if __name__ == "__main__":
266344
unittest.main()

test/test_retryable_reads.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,84 @@ def test_retryable_reads_are_retried_on_the_same_implicit_session(self):
259259
self.assertEqual(command_docs[0]["lsid"], command_docs[1]["lsid"])
260260
self.assertIsNot(command_docs[0], command_docs[1])
261261

262+
@client_context.require_replica_set
263+
@client_context.require_secondaries_count(1)
264+
@client_context.require_failCommand_fail_point
265+
@client_context.require_version_min(4, 4, 0)
266+
def test_03_01_retryable_reads_caused_by_overload_errors_are_retried_on_a_different_replicaset_server_when_one_is_available(
267+
self
268+
):
269+
listener = OvertCommandListener()
270+
271+
# 1. Create a client `client` with `retryReads=true`, `readPreference=primaryPreferred`, and command event monitoring enabled.
272+
client = self.rs_or_single_client(
273+
event_listeners=[listener], retryReads=True, readPreference="primaryPreferred"
274+
)
275+
276+
# 2. Configure a fail point with the RetryableError and SystemOverloadedError error labels.
277+
command_args = {
278+
"configureFailPoint": "failCommand",
279+
"mode": {"times": 1},
280+
"data": {
281+
"failCommands": ["find"],
282+
"errorLabels": ["RetryableError", "SystemOverloadedError"],
283+
"errorCode": 6,
284+
},
285+
}
286+
set_fail_point(client, command_args)
287+
288+
# 3. Reset the command event monitor to clear the fail point command from its stored events.
289+
listener.reset()
290+
291+
# 4. Execute a `find` command with `client`.
292+
client.t.t.find_one({})
293+
294+
# 5. Assert that one failed command event and one successful command event occurred.
295+
self.assertEqual(len(listener.failed_events), 1)
296+
self.assertEqual(len(listener.succeeded_events), 1)
297+
298+
# 6. Assert that both events occurred on different servers.
299+
assert listener.failed_events[0].connection_id != listener.succeeded_events[0].connection_id
300+
301+
@client_context.require_replica_set
302+
@client_context.require_secondaries_count(1)
303+
@client_context.require_failCommand_fail_point
304+
@client_context.require_version_min(4, 4, 0)
305+
def test_03_02_retryable_reads_caused_by_non_overload_errors_are_retried_on_the_same_replicaset_server(
306+
self
307+
):
308+
listener = OvertCommandListener()
309+
310+
# 1. Create a client `client` with `retryReads=true`, `readPreference=primaryPreferred`, and command event monitoring enabled.
311+
client = self.rs_or_single_client(
312+
event_listeners=[listener], retryReads=True, readPreference="primaryPreferred"
313+
)
314+
315+
# 2. Configure a fail point with the RetryableError error label.
316+
command_args = {
317+
"configureFailPoint": "failCommand",
318+
"mode": {"times": 1},
319+
"data": {
320+
"failCommands": ["find"],
321+
"errorLabels": ["RetryableError"],
322+
"errorCode": 6,
323+
},
324+
}
325+
set_fail_point(client, command_args)
326+
327+
# 3. Reset the command event monitor to clear the fail point command from its stored events.
328+
listener.reset()
329+
330+
# 4. Execute a `find` command with `client`.
331+
client.t.t.find_one({})
332+
333+
# 5. Assert that one failed command event and one successful command event occurred.
334+
self.assertEqual(len(listener.failed_events), 1)
335+
self.assertEqual(len(listener.succeeded_events), 1)
336+
337+
# 6. Assert that both events occurred the same server.
338+
assert listener.failed_events[0].connection_id == listener.succeeded_events[0].connection_id
339+
262340

263341
if __name__ == "__main__":
264342
unittest.main()

0 commit comments

Comments
 (0)