Skip to content

Commit 06d9906

Browse files
committed
fix(DISET): reject all connections during throttling to prevent queue growth
The previous throttle mechanism accepted one connection every 0.25s even while the service was overloaded. When threads were stuck (e.g. blocked on DB queries or deadlocked), each accepted connection added to the already-full queue, making recovery impossible. Now all incoming connections are rejected while wantsThrottle is True, with a brief sleep to avoid busy-spinning. This prevents the self-reinforcing stuck state where the queue grows faster than it can drain.
1 parent c4ce62e commit 06d9906

File tree

1 file changed

+7
-11
lines changed

1 file changed

+7
-11
lines changed

src/DIRAC/Core/DISET/ServiceReactor.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,6 @@ def __acceptIncomingConnection(self, svcName=False):
200200
services at the same time
201201
"""
202202
sel = self.__getListeningSelector(svcName)
203-
throttleExpires = None
204203
while self.__alive:
205204
clientTransport = None
206205
try:
@@ -224,16 +223,13 @@ def __acceptIncomingConnection(self, svcName=False):
224223
gLogger.warn(f"Client connected from banned ip {clientIP}")
225224
clientTransport.close()
226225
continue
227-
# Handle throttling
228-
if self.__services[svcName].wantsThrottle and throttleExpires is None:
229-
throttleExpires = time.time() + THROTTLE_SERVICE_SLEEP_SECONDS
230-
if throttleExpires:
231-
if time.time() > throttleExpires:
232-
throttleExpires = None
233-
else:
234-
gLogger.warn("Rejecting client due to throttling", str(clientTransport.getRemoteAddress()))
235-
clientTransport.close()
236-
continue
226+
# Handle throttling: reject all connections while overloaded
227+
# to prevent queue growth when threads are stuck
228+
if self.__services[svcName].wantsThrottle:
229+
gLogger.warn("Rejecting client due to throttling", str(clientTransport.getRemoteAddress()))
230+
clientTransport.close()
231+
time.sleep(THROTTLE_SERVICE_SLEEP_SECONDS)
232+
continue
237233
# Handle connection
238234
self.__stats.connectionStablished()
239235
self.__services[svcName].handleConnection(clientTransport)

0 commit comments

Comments
 (0)