Skip to content

Commit 30cad35

Browse files
committed
update
1 parent a95ea17 commit 30cad35

1 file changed

Lines changed: 41 additions & 1 deletion

File tree

bin/mailmanctl

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,11 @@ LOCK_LIFETIME = mm_cfg.days(1) + mm_cfg.hours(6)
127127
SNOOZE = mm_cfg.days(1)
128128
MAX_RESTARTS = 10
129129

130+
# Global circuit breaker settings
131+
GLOBAL_RESTART_WINDOW = 30 # 30 seconds window
132+
GLOBAL_MAX_RESTARTS = 100 # Maximum restarts in window
133+
_global_restart_times = [] # List of timestamps for restarts
134+
130135
LogStdErr('error', 'mailmanctl', manual_reprime=0)
131136

132137

@@ -463,6 +468,29 @@ def check_status():
463468
return all_running
464469

465470

471+
def check_global_circuit_breaker():
472+
"""Check if we've exceeded the global restart limit.
473+
474+
Returns:
475+
bool: True if we should stop all runners, False otherwise
476+
"""
477+
global _global_restart_times
478+
now = time.time()
479+
480+
# Remove old timestamps outside the window
481+
_global_restart_times = [t for t in _global_restart_times if now - t < GLOBAL_RESTART_WINDOW]
482+
483+
# Add current timestamp
484+
_global_restart_times.append(now)
485+
486+
# Check if we've exceeded the limit
487+
if len(_global_restart_times) > GLOBAL_MAX_RESTARTS:
488+
syslog('error', 'Global circuit breaker triggered: %d restarts in %d seconds',
489+
len(_global_restart_times), GLOBAL_RESTART_WINDOW)
490+
return True
491+
return False
492+
493+
466494
def main():
467495
try:
468496
args = parse_args()
@@ -661,7 +689,19 @@ Master qrunner detected abnormal subprocess exit
661689
pid, killsig, exitstatus, qrname,
662690
slice+1, count, restarting)
663691

664-
# See if we've reached the maximum number of allowable restarts
692+
# Check global circuit breaker before restarting
693+
if restarting and check_global_circuit_breaker():
694+
syslog('error', 'Global circuit breaker triggered - stopping all runners')
695+
# Stop all runners
696+
for pid in list(kids.keys()):
697+
try:
698+
os.kill(pid, signal.SIGTERM)
699+
except OSError as e:
700+
if e.errno != errno.ESRCH:
701+
raise
702+
# Exit the main loop
703+
break
704+
665705
if exitstatus != signal.SIGINT:
666706
restarts += 1
667707
if restarts > MAX_RESTARTS:

0 commit comments

Comments
 (0)