|
15 | 15 | use OCP\AppFramework\Db\QBMapper; |
16 | 16 | use OCP\AppFramework\Utility\ITimeFactory; |
17 | 17 | use OCP\DB\Exception; |
| 18 | +use OCP\DB\QueryBuilder\ConflictResolutionMode; |
18 | 19 | use OCP\DB\QueryBuilder\IQueryBuilder; |
19 | 20 | use OCP\IDBConnection; |
20 | 21 |
|
@@ -75,6 +76,160 @@ public function findOldestScheduledByType(array $taskTypes, array $taskIdsToIgno |
75 | 76 | return $this->findEntity($qb); |
76 | 77 | } |
77 | 78 |
|
| 79 | + /** |
| 80 | + * Atomically claim the oldest scheduled task of the given task types and mark it RUNNING. |
| 81 | + * |
| 82 | + * This is the structural fix for the worker "claim loop": instead of every worker |
| 83 | + * racing for the single oldest task (a thundering herd that grows a per-worker |
| 84 | + * `id NOT IN (...)` ignore list and slows the SELECT), each worker claims a |
| 85 | + * *distinct* task in one round trip. |
| 86 | + * |
| 87 | + * On databases that support row-level locking with SKIP LOCKED |
| 88 | + * (MySQL/MariaDB/PostgreSQL) the claim is a single transaction: |
| 89 | + * SELECT ... WHERE status = SCHEDULED [AND type IN (...)] |
| 90 | + * ORDER BY last_updated ASC LIMIT 1 FOR UPDATE SKIP LOCKED |
| 91 | + * followed by a guarded UPDATE to RUNNING. Concurrent workers skip rows already |
| 92 | + * locked by another transaction, so no two workers ever claim the same task. |
| 93 | + * |
| 94 | + * SQLite does not support SKIP LOCKED (verified: Doctrine throws "Operation |
| 95 | + * 'SKIP LOCKED' is not supported by platform"), so we feature-detect via the DB |
| 96 | + * provider and fall back to the existing bounded {@see lockTask} retry, which is |
| 97 | + * still safe because the UPDATE ... WHERE status = SCHEDULED is itself atomic and |
| 98 | + * SQLite serialises writers. |
| 99 | + * |
| 100 | + * A task is only ever transitioned SCHEDULED -> RUNNING here; it is never marked |
| 101 | + * FAILED by claiming. If the task cannot be claimed (none scheduled, or it was |
| 102 | + * taken by another worker between SELECT and UPDATE) this returns null. |
| 103 | + * |
| 104 | + * @param list<string> $taskTypes When non-empty, only tasks of these task type IDs are considered. |
| 105 | + * @return Task|null The claimed task (status RUNNING), or null if nothing could be claimed. |
| 106 | + * @throws Exception |
| 107 | + */ |
| 108 | + public function claimOldestScheduledTask(array $taskTypes): ?Task { |
| 109 | + if ($this->db->getDatabaseProvider() === IDBConnection::PLATFORM_SQLITE) { |
| 110 | + // SKIP LOCKED is unsupported on SQLite: fall back to the bounded lock-and-retry claim. |
| 111 | + return $this->claimWithBoundedRetry($taskTypes); |
| 112 | + } |
| 113 | + |
| 114 | + return $this->claimWithSkipLocked($taskTypes); |
| 115 | + } |
| 116 | + |
| 117 | + /** |
| 118 | + * Atomic claim using FOR UPDATE SKIP LOCKED in a single transaction. |
| 119 | + * |
| 120 | + * @param list<string> $taskTypes |
| 121 | + * @return Task|null |
| 122 | + * @throws Exception |
| 123 | + */ |
| 124 | + private function claimWithSkipLocked(array $taskTypes): ?Task { |
| 125 | + $this->db->beginTransaction(); |
| 126 | + try { |
| 127 | + $qb = $this->db->getQueryBuilder(); |
| 128 | + $qb->select(Task::COLUMNS) |
| 129 | + ->from($this->tableName) |
| 130 | + ->where($qb->expr()->eq('status', $qb->createPositionalParameter(\OCP\TaskProcessing\Task::STATUS_SCHEDULED, IQueryBuilder::PARAM_INT))) |
| 131 | + ->orderBy('last_updated', 'ASC') |
| 132 | + ->setMaxResults(1) |
| 133 | + ->forUpdate(ConflictResolutionMode::SkipLocked); |
| 134 | + |
| 135 | + if (!empty($taskTypes)) { |
| 136 | + $filter = []; |
| 137 | + foreach ($taskTypes as $taskType) { |
| 138 | + $filter[] = $qb->expr()->eq('type', $qb->createPositionalParameter($taskType)); |
| 139 | + } |
| 140 | + $qb->andWhere($qb->expr()->orX(...$filter)); |
| 141 | + } |
| 142 | + |
| 143 | + $result = $qb->executeQuery(); |
| 144 | + $row = $result->fetch(); |
| 145 | + $result->closeCursor(); |
| 146 | + |
| 147 | + if ($row === false) { |
| 148 | + // Nothing schedulable (or every candidate is locked by another worker). |
| 149 | + $this->db->commit(); |
| 150 | + return null; |
| 151 | + } |
| 152 | + |
| 153 | + /** @var Task $task */ |
| 154 | + $task = $this->mapRowToEntity($row); |
| 155 | + |
| 156 | + // Record the start time at claim time: because the worker receives the task |
| 157 | + // already in status RUNNING, the later SCHEDULED -> RUNNING transition in |
| 158 | + // Manager::setTaskStatus is skipped and would otherwise never persist started_at. |
| 159 | + $startedAt = $this->timeFactory->now()->getTimestamp(); |
| 160 | + |
| 161 | + // Guarded transition SCHEDULED -> RUNNING. The row is locked for this |
| 162 | + // transaction, so the guard is belt-and-braces rather than strictly required. |
| 163 | + $update = $this->db->getQueryBuilder(); |
| 164 | + $update->update($this->tableName) |
| 165 | + ->set('status', $update->createPositionalParameter(\OCP\TaskProcessing\Task::STATUS_RUNNING, IQueryBuilder::PARAM_INT)) |
| 166 | + ->set('started_at', $update->createPositionalParameter($startedAt, IQueryBuilder::PARAM_INT)) |
| 167 | + ->where($update->expr()->eq('id', $update->createPositionalParameter($task->getId(), IQueryBuilder::PARAM_INT))) |
| 168 | + ->andWhere($update->expr()->eq('status', $update->createPositionalParameter(\OCP\TaskProcessing\Task::STATUS_SCHEDULED, IQueryBuilder::PARAM_INT))); |
| 169 | + $affected = $update->executeStatement(); |
| 170 | + |
| 171 | + $this->db->commit(); |
| 172 | + |
| 173 | + if ($affected === 0) { |
| 174 | + // Lost the race (should not happen under SKIP LOCKED); leave the task SCHEDULED. |
| 175 | + return null; |
| 176 | + } |
| 177 | + |
| 178 | + $task->setStatus(\OCP\TaskProcessing\Task::STATUS_RUNNING); |
| 179 | + $task->setStartedAt($startedAt); |
| 180 | + return $task; |
| 181 | + } catch (\Throwable $e) { |
| 182 | + $this->db->rollBack(); |
| 183 | + throw $e; |
| 184 | + } |
| 185 | + } |
| 186 | + |
| 187 | + /** |
| 188 | + * Fallback claim for databases without SKIP LOCKED (SQLite). |
| 189 | + * |
| 190 | + * Repeatedly fetches the oldest scheduled task and attempts the atomic |
| 191 | + * UPDATE ... WHERE status = SCHEDULED. Tasks lost to another worker are added to a |
| 192 | + * short ignore list so the next iteration moves on. Bounded to avoid unbounded |
| 193 | + * looping under contention. |
| 194 | + * |
| 195 | + * @param list<string> $taskTypes |
| 196 | + * @return Task|null |
| 197 | + * @throws Exception |
| 198 | + */ |
| 199 | + private function claimWithBoundedRetry(array $taskTypes): ?Task { |
| 200 | + $taskIdsToIgnore = []; |
| 201 | + // A handful of attempts is plenty: on SQLite writers are serialised, so at most |
| 202 | + // a few rows can be claimed out from under us before we either win or run dry. |
| 203 | + for ($attempt = 0; $attempt < 10; $attempt++) { |
| 204 | + try { |
| 205 | + $task = $this->findOldestScheduledByType($taskTypes, $taskIdsToIgnore); |
| 206 | + } catch (DoesNotExistException) { |
| 207 | + return null; |
| 208 | + } |
| 209 | + |
| 210 | + if ($this->lockTask($task) !== 0) { |
| 211 | + $task->setStatus(\OCP\TaskProcessing\Task::STATUS_RUNNING); |
| 212 | + // Record the start time at claim time. lockTask only flips the status (and is |
| 213 | + // shared with other callers), so persist started_at with a targeted follow-up |
| 214 | + // UPDATE rather than changing lockTask's behaviour. The worker receives the task |
| 215 | + // already RUNNING, so Manager::setTaskStatus would otherwise never write it. |
| 216 | + $startedAt = $this->timeFactory->now()->getTimestamp(); |
| 217 | + $update = $this->db->getQueryBuilder(); |
| 218 | + $update->update($this->tableName) |
| 219 | + ->set('started_at', $update->createPositionalParameter($startedAt, IQueryBuilder::PARAM_INT)) |
| 220 | + ->where($update->expr()->eq('id', $update->createPositionalParameter($task->getId(), IQueryBuilder::PARAM_INT))); |
| 221 | + $update->executeStatement(); |
| 222 | + $task->setStartedAt($startedAt); |
| 223 | + return $task; |
| 224 | + } |
| 225 | + |
| 226 | + // Another worker took it; skip this id and try the next oldest. |
| 227 | + $taskIdsToIgnore[] = $task->getId(); |
| 228 | + } |
| 229 | + |
| 230 | + return null; |
| 231 | + } |
| 232 | + |
78 | 233 | /** |
79 | 234 | * @param int $id |
80 | 235 | * @param string|null $userId |
|
0 commit comments