|
| 1 | +package multiRegion |
| 2 | + |
| 3 | +import ( |
| 4 | + "fmt" |
| 5 | + "strings" |
| 6 | + "testing" |
| 7 | + |
| 8 | + "github.com/cockroachdb/helm-charts/tests/e2e/operator" |
| 9 | + "github.com/gruntwork-io/terratest/modules/k8s" |
| 10 | + "github.com/gruntwork-io/terratest/modules/random" |
| 11 | + "github.com/stretchr/testify/require" |
| 12 | + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" |
| 13 | +) |
| 14 | + |
| 15 | +// TestWALFailoverMultiRegion tests WAL failover with different paths in each region |
| 16 | +// Region 0: WAL failover enabled with custom path |
| 17 | +// Region 1: WAL failover disabled |
| 18 | +func (r *multiRegion) TestWALFailoverMultiRegion(t *testing.T) { |
| 19 | + // Setup namespaces and CA for each region |
| 20 | + cleanup := r.SetupMultiClusterWithCA(t) |
| 21 | + defer cleanup() |
| 22 | + |
| 23 | + // Region 0: Install with WAL failover enabled |
| 24 | + cluster0 := r.Clusters[0] |
| 25 | + walPath0 := "/cockroach/wal-region-0" |
| 26 | + |
| 27 | + t.Logf("Installing region 0 (%s) with WAL failover enabled at path %s", cluster0, walPath0) |
| 28 | + config0 := operator.AdvancedInstallConfig{ |
| 29 | + WALFailoverEnabled: true, |
| 30 | + WALFailoverSize: "5Gi", |
| 31 | + CustomValues: map[string]string{ |
| 32 | + "cockroachdb.crdbCluster.walFailoverSpec.path": walPath0, |
| 33 | + }, |
| 34 | + } |
| 35 | + r.InstallChartsWithAdvancedConfig(t, cluster0, 0, config0) |
| 36 | + |
| 37 | + // Region 1: Install without WAL failover |
| 38 | + cluster1 := r.Clusters[1] |
| 39 | + t.Logf("Installing region 1 (%s) without WAL failover", cluster1) |
| 40 | + config1 := operator.AdvancedInstallConfig{} |
| 41 | + r.InstallChartsWithAdvancedConfig(t, cluster1, 1, config1) |
| 42 | + |
| 43 | + // Validate CockroachDB cluster health in both regions |
| 44 | + for _, cluster := range r.Clusters { |
| 45 | + r.ValidateCRDB(t, cluster) |
| 46 | + } |
| 47 | + |
| 48 | + // Validate multi-region setup |
| 49 | + r.ValidateMultiRegionSetup(t) |
| 50 | + |
| 51 | + // Validate WAL failover in region 0 |
| 52 | + t.Log("Validating WAL failover in region 0") |
| 53 | + r.ValidateWALFailover(t, cluster0, &operator.AdvancedValidationConfig{ |
| 54 | + WALFailover: operator.WALFailoverValidation{ |
| 55 | + CustomPath: walPath0, |
| 56 | + }, |
| 57 | + }) |
| 58 | + |
| 59 | + // Validate NO WAL failover in region 1 |
| 60 | + t.Log("Validating NO WAL failover in region 1") |
| 61 | + kubeConfig, _ := r.GetCurrentContext(t) |
| 62 | + kubectlOptions1 := k8s.NewKubectlOptions(cluster1, kubeConfig, r.Namespace[cluster1]) |
| 63 | + |
| 64 | + pods := k8s.ListPods(t, kubectlOptions1, metav1.ListOptions{ |
| 65 | + LabelSelector: operator.LabelSelector, |
| 66 | + }) |
| 67 | + require.True(t, len(pods) > 0, "No CockroachDB pods found in region 1") |
| 68 | + |
| 69 | + podCommand, err := k8s.RunKubectlAndGetOutputE(t, kubectlOptions1, |
| 70 | + "get", "pod", pods[0].Name, "-o", "jsonpath={.spec.containers[?(@.name=='cockroachdb')].command}") |
| 71 | + require.NoError(t, err) |
| 72 | + require.NotContains(t, podCommand, "--wal-failover", "Region 1 should not have WAL failover enabled") |
| 73 | + t.Log("Confirmed region 1 does not have WAL failover") |
| 74 | + |
| 75 | + t.Logf("WAL failover multi-region test completed successfully") |
| 76 | +} |
| 77 | + |
| 78 | +// TestEncryptionAtRestMultiRegion tests encryption at rest with different secrets per region |
| 79 | +// Region 0: Encryption enabled with secret "cmek-key-secret-region-0" |
| 80 | +// Region 1: Encryption disabled (no encryption) |
| 81 | +func (r *multiRegion) TestEncryptionAtRestMultiRegion(t *testing.T) { |
| 82 | + // Setup namespaces and CA for each region |
| 83 | + cleanup := r.SetupMultiClusterWithCA(t) |
| 84 | + defer cleanup() |
| 85 | + |
| 86 | + // Generate encryption key for region 0 |
| 87 | + encryptionKeyB64 := r.GenerateEncryptionKey(t) |
| 88 | + t.Logf("Generated encryption key for region 0 (base64 length: %d)", len(encryptionKeyB64)) |
| 89 | + |
| 90 | + // Region 0: Install with encryption at rest enabled |
| 91 | + cluster0 := r.Clusters[0] |
| 92 | + secretName0 := "cmek-key-secret-region-0" |
| 93 | + |
| 94 | + encryptionRegions0 := []map[string]interface{}{ |
| 95 | + { |
| 96 | + "code": r.RegionCodes[0], |
| 97 | + "cloudProvider": r.Provider, |
| 98 | + "nodes": r.NodeCount, |
| 99 | + "namespace": r.Namespace[cluster0], |
| 100 | + "domain": operator.CustomDomains[0], |
| 101 | + "encryptionAtRest": map[string]interface{}{ |
| 102 | + "platform": "UNKNOWN_KEY_TYPE", |
| 103 | + "keySecretName": secretName0, |
| 104 | + }, |
| 105 | + }, |
| 106 | + } |
| 107 | + |
| 108 | + t.Logf("Installing region 0 (%s) with encryption at rest enabled", cluster0) |
| 109 | + config0 := operator.AdvancedInstallConfig{ |
| 110 | + EncryptionEnabled: true, |
| 111 | + EncryptionKeySecret: encryptionKeyB64, |
| 112 | + EncryptionKeySecretName: secretName0, |
| 113 | + CustomRegions: encryptionRegions0, |
| 114 | + } |
| 115 | + r.InstallChartsWithAdvancedConfig(t, cluster0, 0, config0) |
| 116 | + |
| 117 | + // Region 1: Install without encryption |
| 118 | + cluster1 := r.Clusters[1] |
| 119 | + t.Logf("Installing region 1 (%s) without encryption at rest", cluster1) |
| 120 | + config1 := operator.AdvancedInstallConfig{} |
| 121 | + r.InstallChartsWithAdvancedConfig(t, cluster1, 1, config1) |
| 122 | + |
| 123 | + // Validate CockroachDB cluster health in both regions |
| 124 | + for _, cluster := range r.Clusters { |
| 125 | + r.ValidateCRDB(t, cluster) |
| 126 | + } |
| 127 | + |
| 128 | + // Validate multi-region setup |
| 129 | + r.ValidateMultiRegionSetup(t) |
| 130 | + |
| 131 | + // Validate encryption in region 0 |
| 132 | + t.Log("Validating encryption at rest in region 0") |
| 133 | + r.ValidateEncryptionAtRest(t, cluster0, &operator.AdvancedValidationConfig{ |
| 134 | + EncryptionAtRest: operator.EncryptionAtRestValidation{ |
| 135 | + SecretName: secretName0, |
| 136 | + }, |
| 137 | + }) |
| 138 | + |
| 139 | + // Validate NO encryption in region 1 |
| 140 | + t.Log("Validating NO encryption at rest in region 1") |
| 141 | + kubeConfig, _ := r.GetCurrentContext(t) |
| 142 | + kubectlOptions1 := k8s.NewKubectlOptions(cluster1, kubeConfig, r.Namespace[cluster1]) |
| 143 | + |
| 144 | + pods := k8s.ListPods(t, kubectlOptions1, metav1.ListOptions{ |
| 145 | + LabelSelector: operator.LabelSelector, |
| 146 | + }) |
| 147 | + require.True(t, len(pods) > 0, "No CockroachDB pods found in region 1") |
| 148 | + |
| 149 | + podCommand, err := k8s.RunKubectlAndGetOutputE(t, kubectlOptions1, |
| 150 | + "get", "pod", pods[0].Name, "-o", "jsonpath={.spec.containers[?(@.name=='cockroachdb')].command}") |
| 151 | + require.NoError(t, err) |
| 152 | + require.NotContains(t, podCommand, "--enterprise-encryption", "Region 1 should not have encryption enabled") |
| 153 | + t.Log("Confirmed region 1 does not have encryption at rest") |
| 154 | + |
| 155 | + t.Logf("Encryption at rest multi-region test completed successfully") |
| 156 | +} |
| 157 | + |
| 158 | +// TestPCRMultiRegion tests Physical Cluster Replication with multi-region setup |
| 159 | +// Creates a multi-region primary cluster, then creates a standby cluster and tests failover/failback |
| 160 | +func (r *multiRegion) TestPCRMultiRegion(t *testing.T) { |
| 161 | + // Creating random namespace for primary multi-region cluster |
| 162 | + for _, cluster := range r.Clusters { |
| 163 | + r.Namespace[cluster] = fmt.Sprintf("%s-primary-%s", operator.Namespace, strings.ToLower(random.UniqueId())) |
| 164 | + } |
| 165 | + |
| 166 | + // Create CA certificate once for all clusters |
| 167 | + cleanupCA := r.RequireCACertificate(t) |
| 168 | + defer cleanupCA() |
| 169 | + |
| 170 | + var standbyNamespace string |
| 171 | + |
| 172 | + // Capture primary namespaces now. During standby installation r.Namespace[Clusters[0]] |
| 173 | + // is temporarily overwritten with the standby namespace. If the test fails before |
| 174 | + // restoring it, CleanupResources would clean up the wrong namespace and leak the |
| 175 | + // primary namespace. Restoring here ensures correct cleanup regardless of failure point. |
| 176 | + primaryNS := make(map[string]string) |
| 177 | + for _, cluster := range r.Clusters { |
| 178 | + primaryNS[cluster] = r.Namespace[cluster] |
| 179 | + } |
| 180 | + defer func() { |
| 181 | + for cluster, ns := range primaryNS { |
| 182 | + r.Namespace[cluster] = ns |
| 183 | + } |
| 184 | + r.CleanupResources(t) |
| 185 | + }() |
| 186 | + defer func() { |
| 187 | + if standbyNamespace != "" { |
| 188 | + kubeConfig, _ := r.GetCurrentContext(t) |
| 189 | + // Standby is always installed on Clusters[0]; use its context explicitly. |
| 190 | + kubectlOptions := k8s.NewKubectlOptions(r.Clusters[0], kubeConfig, standbyNamespace) |
| 191 | + if err := k8s.DeleteNamespaceE(t, kubectlOptions, standbyNamespace); err != nil { |
| 192 | + t.Logf("Warning: failed to delete standby namespace %s (cluster may be unreachable): %v", standbyNamespace, err) |
| 193 | + } |
| 194 | + } |
| 195 | + }() |
| 196 | + |
| 197 | + // Step 1: Install primary multi-region cluster |
| 198 | + t.Log("Installing primary multi-region cluster") |
| 199 | + for i, cluster := range r.Clusters { |
| 200 | + primaryConfig := operator.AdvancedInstallConfig{ |
| 201 | + VirtualClusterMode: "primary", |
| 202 | + } |
| 203 | + r.InstallChartsWithAdvancedConfig(t, cluster, i, primaryConfig) |
| 204 | + } |
| 205 | + |
| 206 | + // Validate primary cluster health in all regions |
| 207 | + for _, cluster := range r.Clusters { |
| 208 | + r.ValidateCRDB(t, cluster) |
| 209 | + } |
| 210 | + |
| 211 | + // Validate multi-region setup |
| 212 | + r.ValidateMultiRegionSetup(t) |
| 213 | + t.Log("Primary multi-region cluster is healthy") |
| 214 | + |
| 215 | + // Step 2: Install standby cluster (single region for simplicity) |
| 216 | + t.Log("Installing standby cluster") |
| 217 | + standbyCluster := r.Clusters[0] // Use first cluster for standby |
| 218 | + standbyNamespace = fmt.Sprintf("%s-standby-%s", operator.Namespace, strings.ToLower(random.UniqueId())) |
| 219 | + |
| 220 | + // Temporarily update namespace for standby installation |
| 221 | + originalNamespace := r.Namespace[standbyCluster] |
| 222 | + r.Namespace[standbyCluster] = standbyNamespace |
| 223 | + |
| 224 | + standbyConfig := operator.AdvancedInstallConfig{ |
| 225 | + VirtualClusterMode: "standby", |
| 226 | + SkipOperatorInstall: true, // Operator already installed |
| 227 | + } |
| 228 | + r.InstallChartsWithAdvancedConfig(t, standbyCluster, 0, standbyConfig) |
| 229 | + |
| 230 | + // Validate standby cluster |
| 231 | + r.VirtualClusterModeStandby = true |
| 232 | + r.ValidateCRDB(t, standbyCluster) |
| 233 | + r.VirtualClusterModeStandby = false |
| 234 | + t.Log("Standby cluster is healthy") |
| 235 | + |
| 236 | + // Step 3: Set up replication and test failover/failback |
| 237 | + t.Log("Testing PCR failover and failback") |
| 238 | + r.ValidatePCR(t, &operator.AdvancedValidationConfig{ |
| 239 | + PCR: operator.PCRValidation{ |
| 240 | + Cluster: standbyCluster, |
| 241 | + PrimaryNamespace: originalNamespace, |
| 242 | + StandbyNamespace: standbyNamespace, |
| 243 | + }, |
| 244 | + }) |
| 245 | + |
| 246 | + // Restore original namespace |
| 247 | + r.Namespace[standbyCluster] = originalNamespace |
| 248 | + |
| 249 | + t.Logf("PCR multi-region test completed successfully") |
| 250 | +} |
0 commit comments