diff --git a/README.md b/README.md index e8deb26..410ac65 100644 --- a/README.md +++ b/README.md @@ -214,6 +214,13 @@ The export creates a **zip file** containing the following files: ### Configuration - **`zone_configurations.txt`** - All zone configuration SQL statements +- **`crdb_internal.cluster_settings.csv`** - All cluster settings and their current values +- **`system.settings.csv`** - Cluster settings that have been changed from defaults, including timestamps of when they changed +- ⚠️ Sensitive settings (credentials, keys, PEM data) are automatically redacted to `` in both files + +In virtualized clusters, settings are exported for each virtual cluster separately: +- **`crdb_internal.cluster_settings.csv`** / **`system.settings.csv`** — application virtual cluster +- **`crdb_internal.cluster_settings.system.csv`** / **`system.settings.system.csv`** — system virtual cluster ## Inspecting the Export @@ -241,6 +248,7 @@ ls export-contents/*.schema.txt ## Privacy and Security - **Passwords are redacted** - Connection string passwords are automatically removed from metadata +- **Sensitive settings are redacted** - Cluster settings containing credentials, keys, or PEM data (e.g. `enterprise.license`, `cluster.secret`, LDAP/OIDC/JWT config) are exported as `` - **No query parameters** - Statement statistics include query fingerprints, not actual parameter values - **Schema only** - Table schemas are exported, but **no actual table data** is included - **Read-only** - The tool only reads data and makes no modifications to your cluster diff --git a/pkg/export/exporter.go b/pkg/export/exporter.go index dd20155..2abc8f6 100644 --- a/pkg/export/exporter.go +++ b/pkg/export/exporter.go @@ -36,7 +36,10 @@ const ( // such as gossip_nodes. Auto-detection occurs on first failure. TenantScopeSystem TenantScope = "system" // TenantScopeBoth routes the query to both virtual clusters. - // Reserved for future use (e.g., cluster settings available in both tenants). + // The main virtual cluster is always exported. In virtualized clusters, the system + // virtual cluster is also exported with a ".system" filename suffix (e.g., + // crdb_internal.cluster_settings.system.csv). The system export is best-effort: if the + // system connection cannot be established, it is skipped with a warning. TenantScopeBoth TenantScope = "both" ) @@ -103,6 +106,33 @@ type Table struct { // Scope indicates which virtual cluster connection to use for this table. // Defaults to TenantScopeMain when unset. Scope TenantScope + // RedactKeyColumn is the column used to identify rows whose sensitive column should be redacted. + // Set together with RedactColumn and RedactedKeys. + RedactKeyColumn string + // RedactColumn is the column whose value is replaced with "" for matching rows. + RedactColumn string + // RedactedKeys is the set of RedactKeyColumn values for which RedactColumn is redacted. + RedactedKeys []string +} + +// sensitiveClusterSettings is the list of cluster setting names whose values are +// redacted in the export to avoid leaking secrets or credentials. +var sensitiveClusterSettings = []string{ + // Sensitive settings (contain credentials, keys, or PEM data) + "server.host_based_authentication.configuration", + "server.identity_map.configuration", + "server.jwt_authentication.issuers.custom_ca", + "server.ldap_authentication.domain.custom_ca", + "server.ldap_authentication.client.tls_certificate", + "server.ldap_authentication.client.tls_key", + "server.oidc_authentication.client_id", + "server.oidc_authentication.client_secret", + "server.oidc_authentication.provider.custom_ca", + "sql.override.allow_unsafe_internals.enabled", + // Non-reportable settings (always redacted in telemetry) + "cluster.secret", + "cluster.label", + "enterprise.license", } var exportTables = []Table{ @@ -112,6 +142,24 @@ var exportTables = []Table{ {Database: "crdb_internal", Name: "gossip_nodes", TimeColumn: "", Optional: true, Scope: TenantScopeSystem}, {Database: "", Name: "crdb_internal.table_indexes", TimeColumn: "", Scope: TenantScopeMain}, // Use "" to query across all databases {Database: "system", Name: "table_statistics", TimeColumn: "", Scope: TenantScopeMain}, + { + Database: "crdb_internal", + Name: "cluster_settings", + TimeColumn: "", + Scope: TenantScopeBoth, + RedactKeyColumn: "variable", + RedactColumn: "value", + RedactedKeys: sensitiveClusterSettings, + }, + { + Database: "system", + Name: "settings", + TimeColumn: "", + Scope: TenantScopeBoth, + RedactKeyColumn: "name", + RedactColumn: "value", + RedactedKeys: sensitiveClusterSettings, + }, } // NewExporter creates a new Exporter instance with the given configuration. @@ -188,8 +236,11 @@ func (exporter *Exporter) Close() error { // - Database schemas (CREATE statements for all user databases) // - Zone configurations // - Statistics tables (statement_statistics, transaction_statistics, transaction_contention_events, gossip_nodes, table_indexes across all databases, system.table_statistics) +// - Cluster settings (crdb_internal.cluster_settings, system.settings) with sensitive values redacted // // The statistics tables are filtered by the TimeRange specified in Config. +// In virtualized clusters, tables with TenantScopeBoth are exported once per virtual cluster, +// with the system virtual cluster export using a ".system" filename suffix. // All exported data is written to the OutputFile specified in Config. // // Returns an error if any step of the export process fails. @@ -474,37 +525,55 @@ func (exporter *Exporter) userDatabases() ([]string, error) { // exportTable routes the table export to the appropriate virtual cluster connection // based on the table's Scope. For TenantScopeSystem tables, it first attempts the // export using the main connection; if CockroachDB returns a virtual cluster error, -// it establishes a system connection and retries automatically. +// it establishes a system connection and retries automatically. For TenantScopeBoth +// tables, the main virtual cluster is always exported, and the system virtual cluster +// is exported with a ".system" filename suffix when in virtualized cluster mode. func (exporter *Exporter) exportTable(ctx context.Context, dir string, table Table, aggregationInterval time.Duration) error { scope := table.Scope if scope == "" { scope = TenantScopeMain } + if scope == TenantScopeBoth { + // Always export from the main virtual cluster. + if err := exporter.doExportTable(ctx, dir, table, aggregationInterval, exporter.Db, ""); err != nil { + return err + } + // Also export from the system virtual cluster (best-effort). + systemConn, err := exporter.ensureSystemConn(ctx) + if err != nil { + logrus.WithError(err).Warnf("skipping system virtual cluster export for %s.%s (could not connect to system virtual cluster)", table.Database, table.Name) + return nil + } + return exporter.doExportTable(ctx, dir, table, aggregationInterval, systemConn, ".system") + } + conn := exporter.Db if scope == TenantScopeSystem && exporter.SystemDb != nil { conn = exporter.SystemDb } - err := exporter.doExportTable(ctx, dir, table, aggregationInterval, conn) + err := exporter.doExportTable(ctx, dir, table, aggregationInterval, conn, "") if err != nil && scope == TenantScopeSystem && isVirtualClusterError(err) { systemConn, connErr := exporter.ensureSystemConn(ctx) if connErr != nil { return fmt.Errorf("failed to connect to system virtual cluster: %w", connErr) } - return exporter.doExportTable(ctx, dir, table, aggregationInterval, systemConn) + return exporter.doExportTable(ctx, dir, table, aggregationInterval, systemConn, "") } return err } // doExportTable performs the actual table export using the provided connection. -func (exporter *Exporter) doExportTable(ctx context.Context, dir string, table Table, aggregationInterval time.Duration, conn *pgx.Conn) error { +// filenameSuffix is appended before the ".csv" extension (e.g. ".system" produces +// "crdb_internal.cluster_settings.system.csv"). Pass an empty string for no suffix. +func (exporter *Exporter) doExportTable(ctx context.Context, dir string, table Table, aggregationInterval time.Duration, conn *pgx.Conn, filenameSuffix string) error { // Create filename - if database is empty, just use table name var filename string if table.Database == "" { - filename = fmt.Sprintf("%s.csv", table.Name) + filename = fmt.Sprintf("%s%s.csv", table.Name, filenameSuffix) } else { - filename = fmt.Sprintf("%s.%s.csv", table.Database, table.Name) + filename = fmt.Sprintf("%s.%s%s.csv", table.Database, table.Name, filenameSuffix) } dataFile := filepath.Join(dir, filename) @@ -558,9 +627,13 @@ func (exporter *Exporter) doExportTable(ctx context.Context, dir string, table T endTime(exporter.Config.TimeRange.End).Format("2006-01-02 15:04:05"), ) } + + // Build SELECT expression, applying column-level redaction when configured. + selectExpr := buildSelectExpr(headers, table) + copyQuery := fmt.Sprintf( - "COPY (SELECT * FROM %s %s) TO STDOUT WITH CSV", - tableRef, where) + "COPY (SELECT %s FROM %s %s) TO STDOUT WITH CSV", + selectExpr, tableRef, where) logrus.Info(copyQuery) _, err = conn.PgConn().CopyTo(ctx, file, copyQuery) if err != nil { @@ -570,6 +643,41 @@ func (exporter *Exporter) doExportTable(ctx context.Context, dir string, table T return nil } +// buildSelectExpr constructs the SELECT expression for the COPY query. +// When the table has redaction configured, it returns an explicit column list +// with a CASE expression that replaces the sensitive column value with "" +// for rows whose key column matches any entry in RedactedKeys. +// When no redaction is configured, it returns "*". +func buildSelectExpr(columns []string, table Table) string { + if table.RedactColumn == "" || len(table.RedactedKeys) == 0 { + return "*" + } + + // Build the SQL IN list from the hard-coded redacted key names. + quotedKeys := make([]string, len(table.RedactedKeys)) + for i, k := range table.RedactedKeys { + quotedKeys[i] = "'" + strings.ReplaceAll(k, "'", "''") + "'" + } + inClause := strings.Join(quotedKeys, ", ") + + keyCol := pgx.Identifier{table.RedactKeyColumn}.Sanitize() + redactCol := pgx.Identifier{table.RedactColumn}.Sanitize() + + cols := make([]string, len(columns)) + for i, col := range columns { + quotedCol := pgx.Identifier{col}.Sanitize() + if col == table.RedactColumn { + cols[i] = fmt.Sprintf( + "CASE WHEN %s IN (%s) THEN '' ELSE %s END AS %s", + keyCol, inClause, redactCol, redactCol, + ) + } else { + cols[i] = quotedCol + } + } + return strings.Join(cols, ", ") +} + func (exporter *Exporter) createZipFile(sourceDir string) error { zipFile, err := os.Create(exporter.Config.OutputFile) if err != nil { diff --git a/pkg/export/exporter_test.go b/pkg/export/exporter_test.go index 9555d47..d259679 100644 --- a/pkg/export/exporter_test.go +++ b/pkg/export/exporter_test.go @@ -2,6 +2,7 @@ package export import ( "fmt" + "strings" "testing" "time" ) @@ -194,6 +195,154 @@ func TestExportTables(t *testing.T) { } } +func TestExportTablesIncludesClusterSettings(t *testing.T) { + found := false + for _, table := range exportTables { + if table.Database == "crdb_internal" && table.Name == "cluster_settings" { + found = true + if table.Scope != TenantScopeBoth { + t.Errorf("crdb_internal.cluster_settings should have Scope TenantScopeBoth, got %q", table.Scope) + } + if table.TimeColumn != "" { + t.Errorf("crdb_internal.cluster_settings should have no TimeColumn, got %q", table.TimeColumn) + } + if table.RedactKeyColumn != "variable" { + t.Errorf("crdb_internal.cluster_settings RedactKeyColumn should be \"variable\", got %q", table.RedactKeyColumn) + } + if table.RedactColumn != "value" { + t.Errorf("crdb_internal.cluster_settings RedactColumn should be \"value\", got %q", table.RedactColumn) + } + if len(table.RedactedKeys) == 0 { + t.Error("crdb_internal.cluster_settings RedactedKeys should not be empty") + } + } + } + if !found { + t.Error("exportTables should contain crdb_internal.cluster_settings") + } +} + +func TestExportTablesIncludesSystemSettings(t *testing.T) { + found := false + for _, table := range exportTables { + if table.Database == "system" && table.Name == "settings" { + found = true + if table.Scope != TenantScopeBoth { + t.Errorf("system.settings should have Scope TenantScopeBoth, got %q", table.Scope) + } + if table.RedactKeyColumn != "name" { + t.Errorf("system.settings RedactKeyColumn should be \"name\", got %q", table.RedactKeyColumn) + } + if table.RedactColumn != "value" { + t.Errorf("system.settings RedactColumn should be \"value\", got %q", table.RedactColumn) + } + if len(table.RedactedKeys) == 0 { + t.Error("system.settings RedactedKeys should not be empty") + } + } + } + if !found { + t.Error("exportTables should contain system.settings") + } +} + +func TestSensitiveClusterSettings(t *testing.T) { + expected := []string{ + "server.host_based_authentication.configuration", + "server.identity_map.configuration", + "server.jwt_authentication.issuers.custom_ca", + "server.ldap_authentication.domain.custom_ca", + "server.ldap_authentication.client.tls_certificate", + "server.ldap_authentication.client.tls_key", + "server.oidc_authentication.client_id", + "server.oidc_authentication.client_secret", + "server.oidc_authentication.provider.custom_ca", + "sql.override.allow_unsafe_internals.enabled", + "cluster.secret", + "cluster.label", + "enterprise.license", + } + for _, want := range expected { + found := false + for _, got := range sensitiveClusterSettings { + if got == want { + found = true + break + } + } + if !found { + t.Errorf("sensitiveClusterSettings is missing %q", want) + } + } +} + +func TestBuildSelectExpr(t *testing.T) { + columns := []string{"variable", "value", "type", "description"} + + t.Run("no redaction returns star", func(t *testing.T) { + table := Table{RedactColumn: "", RedactKeyColumn: "", RedactedKeys: nil} + got := buildSelectExpr(columns, table) + if got != "*" { + t.Errorf("expected \"*\", got %q", got) + } + }) + + t.Run("empty RedactedKeys returns star", func(t *testing.T) { + table := Table{RedactColumn: "value", RedactKeyColumn: "variable", RedactedKeys: []string{}} + got := buildSelectExpr(columns, table) + if got != "*" { + t.Errorf("expected \"*\", got %q", got) + } + }) + + t.Run("redacted column gets CASE expression", func(t *testing.T) { + table := Table{ + RedactColumn: "value", + RedactKeyColumn: "variable", + RedactedKeys: []string{"cluster.secret", "enterprise.license"}, + } + got := buildSelectExpr(columns, table) + // Must contain a CASE expression for the value column + if !contains(got, "CASE WHEN") { + t.Errorf("expected CASE expression in SELECT, got %q", got) + } + // Must contain the redacted key literals + if !contains(got, "'cluster.secret'") { + t.Errorf("expected 'cluster.secret' in SELECT, got %q", got) + } + if !contains(got, "'enterprise.license'") { + t.Errorf("expected 'enterprise.license' in SELECT, got %q", got) + } + // Must contain the redaction placeholder + if !contains(got, "''") { + t.Errorf("expected '' in SELECT, got %q", got) + } + // Non-redacted columns must appear as plain identifiers + if !contains(got, `"variable"`) { + t.Errorf("expected \"variable\" column in SELECT, got %q", got) + } + if !contains(got, `"type"`) { + t.Errorf("expected \"type\" column in SELECT, got %q", got) + } + }) + + t.Run("single-quote in key is escaped", func(t *testing.T) { + table := Table{ + RedactColumn: "value", + RedactKeyColumn: "variable", + RedactedKeys: []string{"it's.a.key"}, + } + got := buildSelectExpr(columns, table) + if !contains(got, "'it''s.a.key'") { + t.Errorf("expected escaped single-quote in SELECT, got %q", got) + } + }) +} + +func contains(s, substr string) bool { + return strings.Contains(s, substr) +} + func TestIsVirtualClusterError(t *testing.T) { tests := []struct { name string