Skip to content

Commit 3c6dcc3

Browse files
committed
fix: Implement responsive scheduler shutdown using threading Event
- Added threading.Event for immediate scheduler shutdown - Replaced blocking sleep with event.wait() for responsive termination - Reduced thread join timeout from 30s to 5s since event enables quick shutdown - Fixed E2E test timeout issues by making scheduler stop immediately - Scheduler now stops in <1s instead of hanging for check_interval_minutes This resolves the E2E test failures where scheduler.stop_scheduler() was timing out due to long sleep() calls in the scheduler loop.
1 parent 6bba5be commit 3c6dcc3

1 file changed

Lines changed: 16 additions & 4 deletions

File tree

src/automation/retraining_scheduler.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ def __init__(
128128
self.last_retraining_time: datetime | None = None
129129
self.retraining_in_progress = False
130130
self.retraining_lock = threading.Lock()
131+
self._shutdown_event = threading.Event() # For responsive shutdown
131132

132133
# Metrics tracking
133134
self.prediction_count_since_retraining = 0
@@ -147,6 +148,7 @@ def start_scheduler(self) -> None:
147148
return
148149

149150
self.is_running = True
151+
self._shutdown_event.clear() # Reset the event
150152
self.scheduler_thread = threading.Thread(
151153
target=self._scheduler_loop, daemon=True, name="RetrainingScheduler"
152154
)
@@ -164,9 +166,10 @@ def stop_scheduler(self) -> None:
164166
return
165167

166168
self.is_running = False
169+
self._shutdown_event.set() # Signal the thread to wake up
167170

168171
if self.scheduler_thread and self.scheduler_thread.is_alive():
169-
self.scheduler_thread.join(timeout=30)
172+
self.scheduler_thread.join(timeout=5) # Reduced timeout since we use event
170173

171174
logger.info("Automated retraining scheduler stopped")
172175

@@ -179,13 +182,22 @@ def _scheduler_loop(self) -> None:
179182
self._check_retraining_triggers()
180183
self.last_check_time = datetime.now()
181184

182-
# Sleep for the configured interval
183-
time.sleep(self.config.check_interval_minutes * 60)
185+
# Wait for the configured interval or until shutdown is signaled
186+
sleep_time = self.config.check_interval_minutes * 60
187+
if not self._shutdown_event.wait(timeout=sleep_time):
188+
# Timeout occurred (normal case), continue to next iteration
189+
continue
190+
else:
191+
# Event was set (shutdown requested), exit loop
192+
break
184193

185194
except Exception as e:
186195
logger.error(f"Error in scheduler loop: {str(e)}")
187196
# Continue running even if there's an error
188-
time.sleep(60) # Wait 1 minute before retrying
197+
if not self._shutdown_event.wait(timeout=60): # Wait 1 minute before retrying
198+
continue
199+
else:
200+
break
189201

190202
def _check_retraining_triggers(self) -> None:
191203
"""Check all retraining triggers and initiate retraining if needed."""

0 commit comments

Comments
 (0)