|
6 | 6 | import logging |
7 | 7 | from datetime import UTC, datetime, timedelta |
8 | 8 | from time import time |
9 | | -from typing import Any, Callable |
| 9 | +from typing import Any, Awaitable, Callable |
10 | 10 |
|
11 | 11 | import msgpack |
12 | 12 | from opentelemetry import metrics, trace |
|
47 | 47 | _LOCK_RETRY_DELAY_SECONDS = 5 |
48 | 48 |
|
49 | 49 |
|
| 50 | +async def _message_heartbeat( |
| 51 | + renew: Callable[[], Awaitable[None]], |
| 52 | + stop_event: asyncio.Event, |
| 53 | + interval: float, |
| 54 | +) -> None: |
| 55 | + """Periodically reset the PEL idle timer while a message is being processed. |
| 56 | +
|
| 57 | + Calls ``renew()`` at ``interval`` seconds so the autoclaim loop never |
| 58 | + considers the message stale as long as execution is in progress. |
| 59 | + """ |
| 60 | + while not stop_event.is_set(): |
| 61 | + try: |
| 62 | + await asyncio.wait_for(stop_event.wait(), timeout=interval) |
| 63 | + return |
| 64 | + except asyncio.TimeoutError: |
| 65 | + pass |
| 66 | + try: |
| 67 | + await renew() |
| 68 | + except Exception: |
| 69 | + logger.warning("Failed to renew message ownership", exc_info=True) |
| 70 | + |
| 71 | + |
50 | 72 | class Worker: |
51 | 73 | """Execute tasks consumed from a broker. |
52 | 74 |
|
@@ -261,22 +283,42 @@ async def process_message(self, message: bytes | ReceivedMessage) -> None: |
261 | 283 | "Executing task %s (ID: %s)", task_message.task_name, task_message.task_id |
262 | 284 | ) |
263 | 285 |
|
264 | | - result = await self.run_task(task_func, task_message) |
265 | | - |
266 | | - # Handle failure: retry or dead letter queue |
267 | | - if result.is_err: |
268 | | - await self._handle_failure(task_message, result) |
269 | | - else: |
270 | | - await self._handle_success(task_message, result) |
| 286 | + # Start heartbeat to keep PEL ownership while the task runs. |
| 287 | + # Renew at half the idle_timeout so there's always a safety margin. |
| 288 | + heartbeat_stop = asyncio.Event() |
| 289 | + heartbeat_interval = self.broker.idle_timeout / 1000 / 2 |
| 290 | + heartbeat_task: asyncio.Task[None] | None = None |
| 291 | + if isinstance(message, ReceivedMessage): |
| 292 | + heartbeat_task = asyncio.create_task( |
| 293 | + _message_heartbeat(message.renew, heartbeat_stop, heartbeat_interval) |
| 294 | + ) |
271 | 295 |
|
272 | | - # Always persist the result to the backend |
273 | 296 | try: |
274 | | - if self.broker.result_backend: |
275 | | - await self.broker.result_backend.set_result( |
276 | | - task_message.task_id, result |
277 | | - ) |
278 | | - except Exception: |
279 | | - logger.exception("Failed to save result") |
| 297 | + result = await self.run_task(task_func, task_message) |
| 298 | + |
| 299 | + # Handle failure: retry or dead letter queue |
| 300 | + if result.is_err: |
| 301 | + await self._handle_failure(task_message, result) |
| 302 | + else: |
| 303 | + await self._handle_success(task_message, result) |
| 304 | + |
| 305 | + # Always persist the result to the backend |
| 306 | + try: |
| 307 | + if self.broker.result_backend: |
| 308 | + await self.broker.result_backend.set_result( |
| 309 | + task_message.task_id, result |
| 310 | + ) |
| 311 | + except Exception: |
| 312 | + logger.exception("Failed to save result") |
| 313 | + |
| 314 | + finally: |
| 315 | + if heartbeat_task is not None: |
| 316 | + heartbeat_stop.set() |
| 317 | + heartbeat_task.cancel() |
| 318 | + try: |
| 319 | + await heartbeat_task |
| 320 | + except asyncio.CancelledError: |
| 321 | + pass |
280 | 322 |
|
281 | 323 | if isinstance(message, ReceivedMessage): |
282 | 324 | await message.ack() |
|
0 commit comments