diff --git a/pkg/cmd/roachtest/tests/perturbation/framework.go b/pkg/cmd/roachtest/tests/perturbation/framework.go index e6e7e0087583..845ffce63d3e 100644 --- a/pkg/cmd/roachtest/tests/perturbation/framework.go +++ b/pkg/cmd/roachtest/tests/perturbation/framework.go @@ -419,7 +419,6 @@ func register(r registry.Registry, p perturbation, skipReason string) { func RegisterTests(r registry.Registry) { const notSkipped = "" - const skippedByBankruptcy = "#149662" register(r, restart{}, notSkipped) addLong(r, restart{}) @@ -431,17 +430,13 @@ func RegisterTests(r registry.Registry) { for _, asleep := range []bool{true, false} { register(r, splits{asleep: asleep}, notSkipped) } - - // TODO(ssd): We skipped the majority of these tests so that we can focus on - // one at a time. These are vaguely ordered by their previous pass rate - // (highest first). - register(r, intents{}, skippedByBankruptcy) - register(r, decommission{}, skippedByBankruptcy) - register(r, elasticWorkload{}, skippedByBankruptcy) - register(r, partition{}, skippedByBankruptcy) + register(r, intents{}, notSkipped) + register(r, decommission{}, notSkipped) + register(r, elasticWorkload{}, notSkipped) + register(r, partition{}, notSkipped) register(r, backfill{}, notSkipped) - register(r, &slowDisk{}, skippedByBankruptcy) - register(r, addNode{}, skippedByBankruptcy) + register(r, &slowDisk{}, notSkipped) + register(r, addNode{}, notSkipped) } func (v variations) makeClusterSpec() spec.ClusterSpec { diff --git a/pkg/cmd/roachtest/tests/perturbation/network_partition.go b/pkg/cmd/roachtest/tests/perturbation/network_partition.go index 6d50abf8669d..1f7122311b8b 100644 --- a/pkg/cmd/roachtest/tests/perturbation/network_partition.go +++ b/pkg/cmd/roachtest/tests/perturbation/network_partition.go @@ -31,7 +31,13 @@ var _ perturbation = partition{} func (p partition) setup() variations { p.partitionSite = true - v := setup(p, defaultThresholds()) + // The partition test isolates an entire region (4 of 12 nodes), removing + // 1/3 of leaseholders. Foreground throughput naturally drops sharply + // while the partition is in effect, and the meaningful pass/fail signal + // is whether the cluster returns to baseline once the partition heals. + // Skip the perturbation-interval gate; keep the default recovery gate. + v := setup(p, noImpactThresholds()) + v.recoveryImpact = defaultThresholds() v.leaseType = registry.ExpirationLeases // TODO(baptist): Remove this setting once #120073 is fixed. v.clusterSettings["kv.lease.reject_on_leader_unknown.enabled"] = "true" diff --git a/pkg/cmd/roachtest/tests/perturbation/slow_disk.go b/pkg/cmd/roachtest/tests/perturbation/slow_disk.go index ac0cae8e0a52..2231959bbd6a 100644 --- a/pkg/cmd/roachtest/tests/perturbation/slow_disk.go +++ b/pkg/cmd/roachtest/tests/perturbation/slow_disk.go @@ -32,6 +32,15 @@ var _ perturbation = &slowDisk{} func (s *slowDisk) setup() variations { s.slowLiveness = true s.walFailover = true + // With walFailover=true and 2 disks per node (the default for the full + // variant), raft log writes fail over to the non-throttled store, so + // foreground throughput is expected to stay close to baseline even + // while the staller is active. Default thresholds apply to both + // intervals; we keep the 1.25x floor (rather than tightening) only to + // avoid flakes from the slowLiveness leg, which routes liveness + // heartbeats through the slow disk. The metamorphic variant exercises + // configurations where walFailover is off and throughput can drop + // substantially -- those should override impact independently. return setup(s, defaultThresholds()) }