@@ -25,23 +25,20 @@ import (
2525 "github.com/stretchr/testify/require"
2626)
2727
28- // TestTxnModePauseOnConflict verifies that a transactional LDR job pauses when
29- // a replicated transaction violates a unique constraint on the destination,
30- // and that the conflicting row is not applied.
31- func TestTxnModePauseOnConflict (t * testing.T ) {
32- defer leaktest .AfterTest (t )()
33- skip .UnderDeadlock (t )
34- defer log .Scope (t ).Close (t )
35-
36- ctx := context .Background ()
28+ // setupTxnModeTest starts a server with source and destination databases
29+ // configured for low-latency replication. It is the caller's responsibility
30+ // to stop the returned server.
31+ func setupTxnModeTest (
32+ t * testing.T ,
33+ ) (serverutils.TestServerInterface , * sqlutils.SQLRunner , * sqlutils.SQLRunner ) {
34+ t .Helper ()
3735
3836 srv , conn , _ := serverutils .StartServer (t , base.TestServerArgs {
3937 DefaultTestTenant : base .TestDoesNotWorkWithExternalProcessMode (134857 ),
4038 Knobs : base.TestingKnobs {
4139 JobsTestingKnobs : jobs .NewTestingKnobsWithShortIntervals (),
4240 },
4341 })
44- defer srv .Stopper ().Stop (ctx )
4542
4643 s := srv .ApplicationLayer ()
4744 runner := sqlutils .MakeSQLRunner (conn )
@@ -55,14 +52,30 @@ func TestTxnModePauseOnConflict(t *testing.T) {
5552 sourceDB := sqlutils .MakeSQLRunner (s .SQLConn (t , serverutils .DBName ("source_db" )))
5653 destDB := sqlutils .MakeSQLRunner (s .SQLConn (t , serverutils .DBName ("dest_db" )))
5754
55+ return srv , sourceDB , destDB
56+ }
57+
58+ // setupConflictingLDR creates a table with a unique index on both source and
59+ // destination, seeds a conflicting row on the destination, starts a
60+ // transactional LDR stream, and inserts a conflicting row on the source.
61+ // The caller is responsible for waiting on the job state.
62+ func setupConflictingLDR (
63+ t * testing.T ,
64+ srv serverutils.TestServerInterface ,
65+ sourceDB * sqlutils.SQLRunner ,
66+ destDB * sqlutils.SQLRunner ,
67+ ) jobspb.JobID {
68+ t .Helper ()
69+
5870 for _ , db := range []* sqlutils.SQLRunner {sourceDB , destDB } {
5971 db .Exec (t , "CREATE TABLE tab (pk INT PRIMARY KEY, val STRING NOT NULL)" )
6072 db .Exec (t , "CREATE UNIQUE INDEX ON tab(val)" )
6173 }
6274
6375 destDB .Exec (t , "INSERT INTO tab VALUES (100, 'collide')" )
6476
65- sourceURL := replicationtestutils .GetExternalConnectionURI (t , s , s , serverutils .DBName ("source_db" ))
77+ sourceURL := replicationtestutils .GetExternalConnectionURI (
78+ t , srv .ApplicationLayer (), srv .ApplicationLayer (), serverutils .DBName ("source_db" ))
6679
6780 var jobID jobspb.JobID
6881 destDB .QueryRow (t ,
@@ -71,7 +84,21 @@ func TestTxnModePauseOnConflict(t *testing.T) {
7184 ).Scan (& jobID )
7285
7386 sourceDB .Exec (t , "INSERT INTO tab VALUES (1, 'collide')" )
87+ return jobID
88+ }
89+
90+ // TestTxnModePauseOnConflict verifies that a transactional LDR job pauses when
91+ // a replicated transaction violates a unique constraint on the destination,
92+ // and that the conflicting row is not applied.
93+ func TestTxnModePauseOnConflict (t * testing.T ) {
94+ defer leaktest .AfterTest (t )()
95+ skip .UnderDeadlock (t )
96+ defer log .Scope (t ).Close (t )
97+ ctx := context .Background ()
7498
99+ srv , sourceDB , destDB := setupTxnModeTest (t )
100+ defer srv .Stopper ().Stop (ctx )
101+ jobID := setupConflictingLDR (t , srv , sourceDB , destDB )
75102 jobutils .WaitForJobToPause (t , destDB , jobID )
76103
77104 var runningStatus string
@@ -95,36 +122,74 @@ func TestTxnModePauseOnConflict(t *testing.T) {
95122 require .Equal (t , conflictMVCC .Prev (), replicatedTime )
96123}
97124
98- // TestTxnModePauseOnEarliestConflict verifies that when multiple replicated
99- // transactions conflict at different timestamps , the job converges on the
100- // first conflict (by timestamp) and drains every prior transaction before
101- // pausing .
102- func TestTxnModePauseOnEarliestConflict (t * testing.T ) {
125+ // TestTxnModeResumeAfterFixingConflict verifies that when a transactional LDR
126+ // job pauses on a unique constraint conflict , the user can remove the
127+ // conflicting row on the destination, resume the job, and the
128+ // previously failed transaction is retried and applied successfully .
129+ func TestTxnModeResumeAfterFixingConflict (t * testing.T ) {
103130 defer leaktest .AfterTest (t )()
104131 skip .UnderDeadlock (t )
105132 defer log .Scope (t ).Close (t )
133+ ctx := context .Background ()
134+
135+ srv , sourceDB , destDB := setupTxnModeTest (t )
136+ defer srv .Stopper ().Stop (ctx )
137+ jobID := setupConflictingLDR (t , srv , sourceDB , destDB )
138+ jobutils .WaitForJobToPause (t , destDB , jobID )
139+
140+ // Remove the conflicting row on the destination so the transaction
141+ // can be retried successfully.
142+ destDB .Exec (t , "DELETE FROM tab WHERE pk = 100" )
143+
144+ destDB .Exec (t , "RESUME JOB $1" , jobID )
145+ jobutils .WaitForJobToRun (t , destDB , jobID )
106146
147+ now := srv .Clock ().Now ()
148+ ldrtestutils .WaitUntilReplicatedTime (t , now , destDB , jobID )
149+
150+ destDB .CheckQueryResults (t ,
151+ "SELECT pk, val FROM tab ORDER BY pk" ,
152+ [][]string {{"1" , "collide" }},
153+ )
154+ }
155+
156+ // TestTxnModeResumePausesAgainOnUnresolvedConflict verifies that resuming a
157+ // paused transactional LDR job without fixing the conflict causes the job to
158+ // pause again at the same replicated time.
159+ func TestTxnModeResumePausesAgainOnUnresolvedConflict (t * testing.T ) {
160+ defer leaktest .AfterTest (t )()
161+ skip .UnderDeadlock (t )
162+ defer log .Scope (t ).Close (t )
107163 ctx := context .Background ()
108164
109- srv , conn , _ := serverutils .StartServer (t , base.TestServerArgs {
110- DefaultTestTenant : base .TestDoesNotWorkWithExternalProcessMode (134857 ),
111- Knobs : base.TestingKnobs {
112- JobsTestingKnobs : jobs .NewTestingKnobsWithShortIntervals (),
113- },
114- })
165+ srv , sourceDB , destDB := setupTxnModeTest (t )
115166 defer srv .Stopper ().Stop (ctx )
167+ jobID := setupConflictingLDR (t , srv , sourceDB , destDB )
168+ jobutils .WaitForJobToPause (t , destDB , jobID )
116169
117- s := srv . ApplicationLayer ( )
118- runner := sqlutils . MakeSQLRunner ( conn )
170+ progressFirst := jobutils . GetJobProgress ( t , destDB , jobID )
171+ replicatedFirst := progressFirst . Details .( * jobspb. Progress_LogicalReplication ). LogicalReplication . ReplicatedTime
119172
120- sysRunner := sqlutils . MakeSQLRunner ( srv . SystemLayer (). SQLConn ( t ) )
121- ldrtestutils . ApplyLowLatencyReplicationSettings (t , sysRunner , runner )
173+ destDB . Exec ( t , "RESUME JOB $1" , jobID )
174+ jobutils . WaitForJobToPause (t , destDB , jobID )
122175
123- runner .Exec (t , "CREATE DATABASE source_db" )
124- runner .Exec (t , "CREATE DATABASE dest_db" )
176+ progressSecond := jobutils .GetJobProgress (t , destDB , jobID )
177+ replicatedSecond := progressSecond .Details .(* jobspb.Progress_LogicalReplication ).LogicalReplication .ReplicatedTime
178+ require .Equal (t , replicatedFirst , replicatedSecond )
179+ }
125180
126- sourceDB := sqlutils .MakeSQLRunner (s .SQLConn (t , serverutils .DBName ("source_db" )))
127- destDB := sqlutils .MakeSQLRunner (s .SQLConn (t , serverutils .DBName ("dest_db" )))
181+ // TestTxnModePauseOnEarliestConflict verifies that when multiple replicated
182+ // transactions conflict at different timestamps, the job converges on the
183+ // first conflict (by timestamp) and drains every prior transaction before
184+ // pausing.
185+ func TestTxnModePauseOnEarliestConflict (t * testing.T ) {
186+ defer leaktest .AfterTest (t )()
187+ skip .UnderDeadlock (t )
188+ defer log .Scope (t ).Close (t )
189+ ctx := context .Background ()
190+
191+ srv , sourceDB , destDB := setupTxnModeTest (t )
192+ defer srv .Stopper ().Stop (ctx )
128193
129194 for _ , db := range []* sqlutils.SQLRunner {sourceDB , destDB } {
130195 db .Exec (t , "CREATE TABLE tab (pk INT PRIMARY KEY, val STRING NOT NULL, extra STRING NOT NULL)" )
@@ -134,6 +199,7 @@ func TestTxnModePauseOnEarliestConflict(t *testing.T) {
134199
135200 destDB .Exec (t , "INSERT INTO tab VALUES (100, 'first-collide', 'pre-1'), (101, 'pre-2', 'second-collide')" )
136201
202+ s := srv .ApplicationLayer ()
137203 sourceURL := replicationtestutils .GetExternalConnectionURI (t , s , s , serverutils .DBName ("source_db" ))
138204
139205 var jobID jobspb.JobID
0 commit comments