Skip to content

Commit 66fbf98

Browse files
authored
Retry failed firestore transaction if re-run requested while check-run is not yet completed. (#5003)
Retry failed firestore transaction if re-run requested while check-run is not yet completed. Decreased `delayFactor` to default 200 milliseconds and increased `maxAttempts` from 8 by default to 10 to lover first retry delay and potentially decrease response while survive flood of change requested from scheduled to in progress. fix: flutter/flutter#184215
1 parent fa4cacc commit 66fbf98

4 files changed

Lines changed: 31 additions & 24 deletions

File tree

app_dart/lib/src/request_handlers/rerun_all_failed_jobs.dart

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import 'dart:async';
66

77
import 'package:github/github.dart';
8+
import 'package:retry/retry.dart';
89

910
import '../../cocoon_service.dart';
1011
import '../model/ci_yaml/target.dart';
@@ -72,15 +73,20 @@ final class RerunAllFailedJobs extends ApiRequestHandler {
7273
);
7374
}
7475

75-
final failedChecks = await UnifiedCheckRun.reInitializeFailedJobs(
76-
firestoreService: _firestore,
77-
slug: slug,
78-
prNum: prNumber,
79-
guardCheckRunId: guard.checkRunId,
76+
// We're doing a transactional update, which could fail if multiple tasks
77+
// are running at the same time so retry a sane amount of times before
78+
// giving up.
79+
final failedChecks = await const RetryOptions().retry(
80+
() => UnifiedCheckRun.reInitializeFailedJobs(
81+
firestoreService: _firestore,
82+
slug: slug,
83+
prNum: prNumber,
84+
guardCheckRunId: guard.checkRunId,
85+
),
8086
);
8187

8288
if (failedChecks == null) {
83-
return Response.json({'message': 'No failed jobs found to re-run'});
89+
throw const BadRequestException('No failed jobs found to re-run');
8490
}
8591

8692
final (targets, artifacts) = await _scheduler.getAllTargetsForPullRequest(

app_dart/lib/src/request_handlers/rerun_failed_job.dart

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import 'dart:async';
66

77
import 'package:github/github.dart';
8+
import 'package:retry/retry.dart';
89

910
import '../../cocoon_service.dart';
1011
import '../request_handling/api_request_handler.dart';
@@ -74,12 +75,17 @@ final class RerunFailedJob extends ApiRequestHandler {
7475
);
7576
}
7677

77-
final rerunInfo = await UnifiedCheckRun.reInitializeFailedJob(
78-
firestoreService: _firestore,
79-
slug: slug,
80-
prNum: prNumber,
81-
guardCheckRunId: guard.checkRunId,
82-
jobName: jobName,
78+
// We're doing a transactional update, which could fail if multiple tasks
79+
// are running at the same time so retry a sane amount of times before
80+
// giving up.
81+
final rerunInfo = await const RetryOptions().retry(
82+
() => UnifiedCheckRun.reInitializeFailedJob(
83+
firestoreService: _firestore,
84+
slug: slug,
85+
prNum: prNumber,
86+
guardCheckRunId: guard.checkRunId,
87+
jobName: jobName,
88+
),
8389
);
8490

8591
if (rerunInfo == null) {

app_dart/lib/src/service/scheduler.dart

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1469,10 +1469,7 @@ $stacktrace
14691469
// We're doing a transactional update, which could fail if multiple tasks
14701470
// are running at the same time so retry a sane amount of times before
14711471
// giving up.
1472-
const r = RetryOptions(
1473-
delayFactor: Duration(seconds: 2),
1474-
maxDelay: Duration(minutes: 2),
1475-
);
1472+
const r = RetryOptions(maxAttempts: 10, maxDelay: Duration(minutes: 2));
14761473

14771474
try {
14781475
return await r.retry(() {

app_dart/test/request_handlers/rerun_all_failed_jobs_test.dart

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,13 @@
22
// Use of this source code is governed by a BSD-style license that can be
33
// found in the LICENSE file.
44

5-
import 'dart:convert';
6-
75
import 'package:cocoon_common/task_status.dart';
86
import 'package:cocoon_integration_test/testing.dart';
97
import 'package:cocoon_server_test/test_logging.dart';
108
import 'package:cocoon_service/cocoon_service.dart';
119
import 'package:cocoon_service/src/model/ci_yaml/target.dart';
1210
import 'package:cocoon_service/src/request_handlers/rerun_all_failed_jobs.dart';
11+
import 'package:cocoon_service/src/request_handling/exceptions.dart';
1312
import 'package:cocoon_service/src/service/luci_build_service/engine_artifacts.dart';
1413
import 'package:github/github.dart';
1514
import 'package:mockito/mockito.dart';
@@ -177,7 +176,7 @@ void main() {
177176
expect(response.statusCode, HttpStatus.ok);
178177
});
179178

180-
test('Re-run all failed jobs - no failures', () async {
179+
test('Re-run all failed jobs - bad request', () async {
181180
final checkRun = generateCheckRun(1, name: 'Guard');
182181
final guard = generatePresubmitGuard(
183182
checkRun: checkRun,
@@ -201,11 +200,10 @@ void main() {
201200
'pr': guard.prNum,
202201
};
203202

204-
final response = await tester.post(handler);
205-
final bodyString = await utf8.decoder.bind(response.body).join();
206-
final body = jsonDecode(bodyString) as Map<String, dynamic>;
207-
208-
expect(body['message'], contains('No failed jobs found'));
203+
await expectLater(
204+
tester.post(handler),
205+
throwsA(isA<BadRequestException>()),
206+
);
209207
});
210208
}
211209

0 commit comments

Comments
 (0)