From 39df8e1ebb9972bb3b393892a3ab839393482198 Mon Sep 17 00:00:00 2001 From: David Taylor Date: Mon, 13 Apr 2026 21:23:25 +0000 Subject: [PATCH] backup: fix flaky TestOnlineRestoreDistFlowSplitScatter The test cleanup cancels a paused download job, but the job may still be in pause-requested state when CANCEL JOB is issued, which rejects it. Wrap the cancel in SucceedsSoon to retry until the job reaches a cancelable state. Also add NewTestingKnobsWithShortIntervals to reduce the job adopt and cancel loop intervals from 30s/10s to 10ms, cutting the test from ~60s to ~2s. Fixes #167692 Release note: None --- pkg/backup/restore_online_distflow_test.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pkg/backup/restore_online_distflow_test.go b/pkg/backup/restore_online_distflow_test.go index 6bdc9fe2996d..3f4b76cc0055 100644 --- a/pkg/backup/restore_online_distflow_test.go +++ b/pkg/backup/restore_online_distflow_test.go @@ -15,11 +15,13 @@ import ( "github.com/cockroachdb/cockroach/pkg/backup/backuppb" "github.com/cockroachdb/cockroach/pkg/base" "github.com/cockroachdb/cockroach/pkg/cloud/nodelocal" + "github.com/cockroachdb/cockroach/pkg/jobs" "github.com/cockroachdb/cockroach/pkg/jobs/jobspb" "github.com/cockroachdb/cockroach/pkg/kv/kvpb" "github.com/cockroachdb/cockroach/pkg/kv/kvserver" "github.com/cockroachdb/cockroach/pkg/sql" "github.com/cockroachdb/cockroach/pkg/sql/execinfra" + "github.com/cockroachdb/cockroach/pkg/testutils" "github.com/cockroachdb/cockroach/pkg/testutils/jobutils" "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" @@ -113,6 +115,7 @@ func TestOnlineRestoreDistFlowSplitScatter(t *testing.T) { }, }, }, + JobsTestingKnobs: jobs.NewTestingKnobsWithShortIntervals(), }, }) @@ -171,7 +174,10 @@ func TestOnlineRestoreDistFlowSplitScatter(t *testing.T) { var downloadJobID jobspb.JobID runner.QueryRow(t, latestDownloadJobIDQuery).Scan(&downloadJobID) runner.Exec(t, "SET CLUSTER SETTING jobs.debug.pausepoints = ''") - runner.Exec(t, fmt.Sprintf("CANCEL JOB %d", downloadJobID)) + testutils.SucceedsSoon(t, func() error { + _, err := sqlDB.Exec(fmt.Sprintf("CANCEL JOB %d", downloadJobID)) + return err + }) jobutils.WaitForJobToCancel(t, runner, downloadJobID) runner.Exec(t, "SET CLUSTER SETTING jobs.debug.pausepoints = 'restore.before_download'") }