Skip to content

Commit 5194a76

Browse files
Implement retry logic for persisting failed task state
1 parent e8751fb commit 5194a76

1 file changed

Lines changed: 24 additions & 1 deletion

File tree

web/server/codechecker_server/task_executors/abstract_task.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import os
1414
import pathlib
1515
import shutil
16+
import time
1617
from typing import Optional
1718

1819
from codechecker_common.logger import get_logger
@@ -199,7 +200,29 @@ def _log_exception_and_fail(db_task: DBTask):
199200
"SYSTEM[AbstractTask::execute()]")
200201
db_task.set_finished(successfully=False)
201202

202-
task_manager._mutate_task_record(self, _log_exception_and_fail)
203+
# If a database error occurs, we may not be able to set the
204+
# task state to 'FAILED' immediately since the database server
205+
# could be down. Therefore, we retry multiple times.
206+
# Between retries, we sleep for 2^i seconds.
207+
retries: int = 0
208+
max_retries: int = 10
209+
while retries < max_retries:
210+
if retries > 0:
211+
time.sleep(2 ** retries)
212+
213+
try:
214+
task_manager._mutate_task_record(self,
215+
_log_exception_and_fail)
216+
break # Success
217+
except Exception:
218+
retries += 1
219+
LOG.error("Failed to set task '%s' state to 'FAILED'! "
220+
"(machine: '%s', executor: #%d, retry: %d/%d)",
221+
self.token, task_manager.machine_id, os.getpid(),
222+
retries, max_retries)
223+
import traceback
224+
traceback.print_exc()
225+
203226
finally:
204227
self.destroy_data()
205228
task_manager._send_done_message(self.token)

0 commit comments

Comments
 (0)