@@ -20,8 +20,11 @@ import (
2020 "context"
2121 "encoding/json"
2222 "fmt"
23+ "os"
24+ "path"
2325 "strconv"
2426 "strings"
27+ "sync"
2528 "testing"
2629 "time"
2730
@@ -104,7 +107,8 @@ func TestErrantGTIDOnPreviousPrimary(t *testing.T) {
104107 output , err := clusterInfo .ClusterInstance .VtctldClientProcess .ExecuteCommandWithOutput (
105108 "PlannedReparentShard" ,
106109 fmt .Sprintf ("%s/%s" , keyspace .Name , shard0 .Name ),
107- "--new-primary" , replica .Alias )
110+ "--new-primary" , replica .Alias ,
111+ )
108112 require .NoError (t , err , "error in PlannedReparentShard output - %s" , output )
109113
110114 // Stop replicatin on the previous primary to simulate it not reparenting properly.
@@ -319,7 +323,8 @@ func TestVTOrcRepairs(t *testing.T) {
319323 require .NoError (t , err )
320324
321325 // Wait for problems to be set.
322- utils .WaitForDetectedProblems (t , vtOrcProcess ,
326+ utils .WaitForDetectedProblems (
327+ t , vtOrcProcess ,
323328 string (inst .PrimaryIsReadOnly ),
324329 curPrimary .Alias ,
325330 keyspace .Name ,
@@ -333,7 +338,8 @@ func TestVTOrcRepairs(t *testing.T) {
333338 assert .Equal (t , 200 , status )
334339
335340 // wait for detected problem to be cleared.
336- utils .WaitForDetectedProblems (t , vtOrcProcess ,
341+ utils .WaitForDetectedProblems (
342+ t , vtOrcProcess ,
337343 string (inst .PrimaryIsReadOnly ),
338344 curPrimary .Alias ,
339345 keyspace .Name ,
@@ -608,7 +614,8 @@ func TestVTOrcWithPrs(t *testing.T) {
608614 "PlannedReparentShard" ,
609615 fmt .Sprintf ("%s/%s" , keyspace .Name , shard0 .Name ),
610616 "--wait-replicas-timeout" , "31s" ,
611- "--new-primary" , replica .Alias )
617+ "--new-primary" , replica .Alias ,
618+ )
612619 require .NoError (t , err , "error in PlannedReparentShard output - %s" , output )
613620
614621 // check that the replica gets promoted
@@ -669,14 +676,16 @@ func TestDrainedTablet(t *testing.T) {
669676 require .NotNil (t , replica , "could not find any replica tablet" )
670677
671678 output , err := clusterInfo .ClusterInstance .VtctldClientProcess .ExecuteCommandWithOutput (
672- "ChangeTabletType" , replica .Alias , "DRAINED" )
679+ "ChangeTabletType" , replica .Alias , "DRAINED" ,
680+ )
673681 require .NoError (t , err , "error in changing tablet type output - %s" , output )
674682
675683 // Make sure VTOrc sees the drained tablets and doesn't forget them.
676684 utils .WaitForDrainedTabletInVTOrc (t , vtOrcProcess , 1 )
677685
678686 output , err = clusterInfo .ClusterInstance .VtctldClientProcess .ExecuteCommandWithOutput (
679- "ChangeTabletType" , replica .Alias , "REPLICA" )
687+ "ChangeTabletType" , replica .Alias , "REPLICA" ,
688+ )
680689 require .NoError (t , err , "error in changing tablet type output - %s" , output )
681690
682691 // We have no drained tablets anymore. Wait for VTOrc to have processed that.
@@ -817,7 +826,8 @@ func TestSemiSyncRecoveryOrdering(t *testing.T) {
817826 // Change durability to semi_sync. VTOrc should detect that replicas and primary
818827 // need semi-sync enabled, and fix them in the correct order.
819828 out , err := clusterInfo .ClusterInstance .VtctldClientProcess .ExecuteCommandWithOutput (
820- "SetKeyspaceDurabilityPolicy" , keyspace .Name , "--durability-policy=" + policy .DurabilitySemiSync )
829+ "SetKeyspaceDurabilityPolicy" , keyspace .Name , "--durability-policy=" + policy .DurabilitySemiSync ,
830+ )
821831 require .NoError (t , err , out )
822832
823833 // Poll the database-state API to verify recovery ordering.
@@ -958,3 +968,158 @@ func TestReplicationStoppedWithSemiSyncBlocked(t *testing.T) {
958968 }, 30 * time .Second , time .Second )
959969 utils .CheckReplication (t , clusterInfo , primary , allNonPrimary , 30 * time .Second )
960970}
971+
972+ // TestRecoveryDeadlocks exercises the `BeforeAnalyses` suppression mechanism
973+ // added by #19925 and extended by #20015 end-to-end: when a tablet-level
974+ // problem coexists with a shard-wide reachable-but-unhealthy primary problem,
975+ // VTOrc must dispatch the tablet-level recovery first and must NOT dispatch
976+ // ERS for the shard-wide problem.
977+ //
978+ // Pre-#19925/#20015, the shard-wide problem caused `recheckPrimaryHealth`
979+ // to abort the tablet-level recovery mid-flight, so the
980+ // `SuccessfulRecoveries[FixPrimary|FixReplica]` counter never incremented.
981+ // The fixes route the tablet-level recovery first via `BeforeAnalyses`, so
982+ // the counter ticks even while the shard-wide problem still exists.
983+ //
984+ // Coverage:
985+ //
986+ // - `PrimaryIsReadOnly × PrimarySemiSyncBlocked` (covered here, this is
987+ // the customer-facing scenario from issue #20011).
988+ //
989+ // - `PrimaryIsReadOnly × PrimaryDiskStalled` and
990+ // `ReplicationStopped × PrimaryDiskStalled` (NOT covered here). Both
991+ // pairings need `PrimaryDiskStalled` to fire, which requires
992+ // `!LastCheckValid && IsDiskStalled` simultaneously. Synthetic fault
993+ // injection (e.g. `chmod 000` on a probe dir) flips `IsDiskStalled` but
994+ // not `LastCheckValid` — the matcher does not match. Anything that
995+ // flips `LastCheckValid` (pausing vttablet, killing mysqld) also breaks
996+ // `fixPrimary`'s ability to run, so the assertion can't succeed either.
997+ // The ordering logic is identical to pair 1 (same `BeforeAnalyses`
998+ // bypass code path); coverage for these two pairings comes from unit
999+ // tests in `analysis_dao_test.go` (`TestDeclaresBefore`,
1000+ // `TestDeclaresAfter`) and `topology_recovery_test.go`
1001+ // (`TestRecheckPrimaryHealth`).
1002+ //
1003+ // - `ReplicationStopped × PrimarySemiSyncBlocked` (#19925's pairing) is
1004+ // covered separately by `TestReplicationStoppedWithSemiSyncBlocked`.
1005+ //
1006+ // The narrow window in which both problems coexist is ~1–2 analysis cycles
1007+ // (long enough for VTOrc to detect both and dispatch recovery once); we do
1008+ // not require sustained co-occurrence.
1009+ func TestRecoveryDeadlocks (t * testing.T ) {
1010+ t .Run ("PrimaryIsReadOnly+PrimarySemiSyncBlocked" , func (t * testing.T ) {
1011+ defer utils .PrintVTOrcLogsOnFailure (t , clusterInfo .ClusterInstance )
1012+ disableSemiSyncOnAllTablets (t )
1013+ utils .SetupVttabletsAndVTOrcs (t , clusterInfo , 2 , 1 , nil , cluster.VTOrcConfiguration {
1014+ PreventCrossCellFailover : true ,
1015+ }, cluster .DefaultVtorcsByCell , policy .DurabilitySemiSync )
1016+ keyspace := & clusterInfo .ClusterInstance .Keyspaces [0 ]
1017+ shard0 := & keyspace .Shards [0 ]
1018+ primary , replica , _ := waitForPrimaryAndPick (t , keyspace , shard0 )
1019+ vtorc := clusterInfo .ClusterInstance .VTOrcProcesses [0 ]
1020+ utils .WaitForSuccessfulRecoveryCount (t , vtorc , logic .ElectNewPrimaryRecoveryName , keyspace .Name , shard0 .Name , 1 )
1021+
1022+ fixPrimaryBefore := utils .GetSuccessfulRecoveryCount (t , vtorc , logic .FixPrimaryRecoveryName , keyspace .Name , shard0 .Name )
1023+ ersBefore := utils .GetSuccessfulRecoveryCount (t , vtorc , logic .RecoverDeadPrimaryRecoveryName , keyspace .Name , shard0 .Name )
1024+
1025+ // Stop the acker's IO thread so semi-sync ACKs cannot flow.
1026+ _ , err := utils .RunSQL (t , "STOP REPLICA IO_THREAD" , replica , "" )
1027+ require .NoError (t , err )
1028+
1029+ // Issue a write that will block on the semi-sync wait. The connection
1030+ // will return only once fixReplica restarts the acker's IO thread,
1031+ // after which an ACK flows and the write commits.
1032+ //
1033+ // Note: we cannot reliably assert PrimarySemiSyncBlocked is detected
1034+ // in this test (same caveat as TestReplicationStoppedWithSemiSyncBlocked).
1035+ // SemiSyncBlocked only flips when a write is waiting for acks, and
1036+ // VTOrc fixes the replica faster than we can sustain the blocking
1037+ // condition. The deadlock scenario is covered by unit tests in
1038+ // analysis_dao_test.go (TestDeclaresBefore) and
1039+ // topology_recovery_test.go (TestRecheckPrimaryHealth).
1040+ var wg sync.WaitGroup
1041+ wg .Go (func () {
1042+ _ , _ = utils .RunSQL (t , "CREATE TABLE IF NOT EXISTS test_recovery_deadlocks (id INT PRIMARY KEY)" , primary , "vt_ks" )
1043+ })
1044+ t .Cleanup (func () {
1045+ // Defensively unblock the goroutine if the test fails before
1046+ // the cluster recovers naturally.
1047+ _ , _ = utils .RunSQL (t , "SET GLOBAL super_read_only = OFF" , primary , "" )
1048+ _ , _ = utils .RunSQL (t , "START REPLICA" , replica , "" )
1049+ wg .Wait ()
1050+ })
1051+
1052+ // Set the primary read-only while the write is hanging.
1053+ _ , err = utils .RunSQL (t , "SET GLOBAL super_read_only = ON" , primary , "" )
1054+ require .NoError (t , err )
1055+
1056+ // PrimaryIsReadOnly is detected within ~1 analysis cycle.
1057+ utils .WaitForDetectedProblems (t , vtorc , string (inst .PrimaryIsReadOnly ), primary .Alias , keyspace .Name , shard0 .Name , 1 )
1058+
1059+ // fixPrimary must complete despite the shard-wide problem also being
1060+ // present. Pre-fix this counter never increments because
1061+ // recheckPrimaryHealth aborts the recovery mid-flight.
1062+ utils .WaitForSuccessfulRecoveryCount (t , vtorc , logic .FixPrimaryRecoveryName , keyspace .Name , shard0 .Name , fixPrimaryBefore + 1 )
1063+
1064+ // ERS must not have been dispatched.
1065+ ersAfter := utils .GetSuccessfulRecoveryCount (t , vtorc , logic .RecoverDeadPrimaryRecoveryName , keyspace .Name , shard0 .Name )
1066+ assert .Equal (t , ersBefore , ersAfter , "ERS should not have been dispatched" )
1067+
1068+ // Primary should no longer be read-only.
1069+ assert .True (t , utils .WaitForReadOnlyValue (t , primary , 0 ))
1070+ })
1071+ }
1072+
1073+ // disableSemiSyncOnAllTablets clears `rpl_semi_sync_source_enabled` and
1074+ // `rpl_semi_sync_replica_enabled` on every tablet's mysqld in the shared
1075+ // cluster. Required before SetupVttabletsAndVTOrcs when the previous test
1076+ // left a primary with semi-sync source on: vttablet's TearDown does not
1077+ // disable semi-sync, and `cleanAndStartVttablet` issues a DROP DATABASE
1078+ // before restarting vttablet, which hangs forever in the semi-sync wait
1079+ // because no acker is connected.
1080+ //
1081+ // Uses mysql.Connect directly (not utils.RunSQL) so that tablets whose
1082+ // mysqld is not yet running (e.g., first subtest) are silently skipped
1083+ // rather than failing the test.
1084+ func disableSemiSyncOnAllTablets (t * testing.T ) {
1085+ t .Helper ()
1086+ for _ , cellInfo := range clusterInfo .CellInfos {
1087+ all := append ([]* cluster.Vttablet {}, cellInfo .ReplicaTablets ... )
1088+ all = append (all , cellInfo .RdonlyTablets ... )
1089+ for _ , tablet := range all {
1090+ ctx , cancel := context .WithTimeout (context .Background (), 2 * time .Second )
1091+ params := mysql.ConnParams {
1092+ Uname : "vt_dba" ,
1093+ UnixSocket : path .Join (os .Getenv ("VTDATAROOT" ), fmt .Sprintf ("/vt_%010d/mysql.sock" , tablet .TabletUID )),
1094+ }
1095+ conn , err := mysql .Connect (ctx , & params )
1096+ cancel ()
1097+ if err != nil {
1098+ continue
1099+ }
1100+ _ , _ = conn .ExecuteFetch ("SET GLOBAL rpl_semi_sync_source_enabled = 0, GLOBAL rpl_semi_sync_replica_enabled = 0" , 1 , false )
1101+ conn .Close ()
1102+ }
1103+ }
1104+ }
1105+
1106+ // waitForPrimaryAndPick blocks until VTOrc has elected a primary and returns
1107+ // it along with the surviving REPLICA (semi-sync acker) and RDONLY tablets.
1108+ func waitForPrimaryAndPick (t * testing.T , keyspace * cluster.Keyspace , shard * cluster.Shard ) (primary , replica , rdonly * cluster.Vttablet ) {
1109+ t .Helper ()
1110+ primary = utils .ShardPrimaryTablet (t , clusterInfo , keyspace , shard )
1111+ require .NotNil (t , primary , "should have elected a primary" )
1112+ for _ , tablet := range shard .Vttablets {
1113+ if tablet .Alias == primary .Alias {
1114+ continue
1115+ }
1116+ if tablet .Type == "rdonly" {
1117+ rdonly = tablet
1118+ } else {
1119+ replica = tablet
1120+ }
1121+ }
1122+ require .NotNil (t , replica , "should have a REPLICA tablet" )
1123+ require .NotNil (t , rdonly , "should have an RDONLY tablet" )
1124+ return primary , replica , rdonly
1125+ }
0 commit comments