Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,13 @@ The export creates a **zip file** containing the following files:

### Configuration
- **`zone_configurations.txt`** - All zone configuration SQL statements
- **`crdb_internal.cluster_settings.csv`** - All cluster settings and their current values
- **`system.settings.csv`** - Cluster settings that have been changed from defaults, including timestamps of when they changed
- ⚠️ Sensitive settings (credentials, keys, PEM data) are automatically redacted to `<redacted>` in both files

In virtualized clusters, settings are exported for each virtual cluster separately:
- **`crdb_internal.cluster_settings.csv`** / **`system.settings.csv`** — application virtual cluster
- **`crdb_internal.cluster_settings.system.csv`** / **`system.settings.system.csv`** — system virtual cluster

## Inspecting the Export

Expand Down Expand Up @@ -241,6 +248,7 @@ ls export-contents/*.schema.txt
## Privacy and Security

- **Passwords are redacted** - Connection string passwords are automatically removed from metadata
- **Sensitive settings are redacted** - Cluster settings containing credentials, keys, or PEM data (e.g. `enterprise.license`, `cluster.secret`, LDAP/OIDC/JWT config) are exported as `<redacted>`
- **No query parameters** - Statement statistics include query fingerprints, not actual parameter values
- **Schema only** - Table schemas are exported, but **no actual table data** is included
- **Read-only** - The tool only reads data and makes no modifications to your cluster
Expand Down
126 changes: 117 additions & 9 deletions pkg/export/exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,10 @@ const (
// such as gossip_nodes. Auto-detection occurs on first failure.
TenantScopeSystem TenantScope = "system"
// TenantScopeBoth routes the query to both virtual clusters.
// Reserved for future use (e.g., cluster settings available in both tenants).
// The main virtual cluster is always exported. In virtualized clusters, the system
// virtual cluster is also exported with a ".system" filename suffix (e.g.,
// crdb_internal.cluster_settings.system.csv). The system export is best-effort: if the
// system connection cannot be established, it is skipped with a warning.
TenantScopeBoth TenantScope = "both"
)

Expand Down Expand Up @@ -103,6 +106,33 @@ type Table struct {
// Scope indicates which virtual cluster connection to use for this table.
// Defaults to TenantScopeMain when unset.
Scope TenantScope
// RedactKeyColumn is the column used to identify rows whose sensitive column should be redacted.
// Set together with RedactColumn and RedactedKeys.
RedactKeyColumn string
// RedactColumn is the column whose value is replaced with "<redacted>" for matching rows.
RedactColumn string
// RedactedKeys is the set of RedactKeyColumn values for which RedactColumn is redacted.
RedactedKeys []string
}

// sensitiveClusterSettings is the list of cluster setting names whose values are
// redacted in the export to avoid leaking secrets or credentials.
var sensitiveClusterSettings = []string{
// Sensitive settings (contain credentials, keys, or PEM data)
"server.host_based_authentication.configuration",
"server.identity_map.configuration",
"server.jwt_authentication.issuers.custom_ca",
"server.ldap_authentication.domain.custom_ca",
"server.ldap_authentication.client.tls_certificate",
"server.ldap_authentication.client.tls_key",
"server.oidc_authentication.client_id",
"server.oidc_authentication.client_secret",
"server.oidc_authentication.provider.custom_ca",
"sql.override.allow_unsafe_internals.enabled",
// Non-reportable settings (always redacted in telemetry)
"cluster.secret",
"cluster.label",
"enterprise.license",
}

var exportTables = []Table{
Expand All @@ -112,6 +142,24 @@ var exportTables = []Table{
{Database: "crdb_internal", Name: "gossip_nodes", TimeColumn: "", Optional: true, Scope: TenantScopeSystem},
{Database: "", Name: "crdb_internal.table_indexes", TimeColumn: "", Scope: TenantScopeMain}, // Use "" to query across all databases
{Database: "system", Name: "table_statistics", TimeColumn: "", Scope: TenantScopeMain},
{
Database: "crdb_internal",
Name: "cluster_settings",
TimeColumn: "",
Scope: TenantScopeBoth,
RedactKeyColumn: "variable",
RedactColumn: "value",
RedactedKeys: sensitiveClusterSettings,
},
{
Database: "system",
Name: "settings",
TimeColumn: "",
Scope: TenantScopeBoth,
RedactKeyColumn: "name",
RedactColumn: "value",
RedactedKeys: sensitiveClusterSettings,
},
}

// NewExporter creates a new Exporter instance with the given configuration.
Expand Down Expand Up @@ -188,8 +236,11 @@ func (exporter *Exporter) Close() error {
// - Database schemas (CREATE statements for all user databases)
// - Zone configurations
// - Statistics tables (statement_statistics, transaction_statistics, transaction_contention_events, gossip_nodes, table_indexes across all databases, system.table_statistics)
// - Cluster settings (crdb_internal.cluster_settings, system.settings) with sensitive values redacted
//
// The statistics tables are filtered by the TimeRange specified in Config.
// In virtualized clusters, tables with TenantScopeBoth are exported once per virtual cluster,
// with the system virtual cluster export using a ".system" filename suffix.
// All exported data is written to the OutputFile specified in Config.
//
// Returns an error if any step of the export process fails.
Expand Down Expand Up @@ -474,37 +525,55 @@ func (exporter *Exporter) userDatabases() ([]string, error) {
// exportTable routes the table export to the appropriate virtual cluster connection
// based on the table's Scope. For TenantScopeSystem tables, it first attempts the
// export using the main connection; if CockroachDB returns a virtual cluster error,
// it establishes a system connection and retries automatically.
// it establishes a system connection and retries automatically. For TenantScopeBoth
// tables, the main virtual cluster is always exported, and the system virtual cluster
// is exported with a ".system" filename suffix when in virtualized cluster mode.
func (exporter *Exporter) exportTable(ctx context.Context, dir string, table Table, aggregationInterval time.Duration) error {
scope := table.Scope
if scope == "" {
scope = TenantScopeMain
}

if scope == TenantScopeBoth {
// Always export from the main virtual cluster.
if err := exporter.doExportTable(ctx, dir, table, aggregationInterval, exporter.Db, ""); err != nil {
return err
}
// Also export from the system virtual cluster (best-effort).
systemConn, err := exporter.ensureSystemConn(ctx)
if err != nil {
logrus.WithError(err).Warnf("skipping system virtual cluster export for %s.%s (could not connect to system virtual cluster)", table.Database, table.Name)
return nil
}
return exporter.doExportTable(ctx, dir, table, aggregationInterval, systemConn, ".system")
}

conn := exporter.Db
if scope == TenantScopeSystem && exporter.SystemDb != nil {
conn = exporter.SystemDb
}

err := exporter.doExportTable(ctx, dir, table, aggregationInterval, conn)
err := exporter.doExportTable(ctx, dir, table, aggregationInterval, conn, "")
if err != nil && scope == TenantScopeSystem && isVirtualClusterError(err) {
systemConn, connErr := exporter.ensureSystemConn(ctx)
if connErr != nil {
return fmt.Errorf("failed to connect to system virtual cluster: %w", connErr)
}
return exporter.doExportTable(ctx, dir, table, aggregationInterval, systemConn)
return exporter.doExportTable(ctx, dir, table, aggregationInterval, systemConn, "")
}
return err
}

// doExportTable performs the actual table export using the provided connection.
func (exporter *Exporter) doExportTable(ctx context.Context, dir string, table Table, aggregationInterval time.Duration, conn *pgx.Conn) error {
// filenameSuffix is appended before the ".csv" extension (e.g. ".system" produces
// "crdb_internal.cluster_settings.system.csv"). Pass an empty string for no suffix.
func (exporter *Exporter) doExportTable(ctx context.Context, dir string, table Table, aggregationInterval time.Duration, conn *pgx.Conn, filenameSuffix string) error {
// Create filename - if database is empty, just use table name
var filename string
if table.Database == "" {
filename = fmt.Sprintf("%s.csv", table.Name)
filename = fmt.Sprintf("%s%s.csv", table.Name, filenameSuffix)
} else {
filename = fmt.Sprintf("%s.%s.csv", table.Database, table.Name)
filename = fmt.Sprintf("%s.%s%s.csv", table.Database, table.Name, filenameSuffix)
}
dataFile := filepath.Join(dir, filename)

Expand Down Expand Up @@ -558,9 +627,13 @@ func (exporter *Exporter) doExportTable(ctx context.Context, dir string, table T
endTime(exporter.Config.TimeRange.End).Format("2006-01-02 15:04:05"),
)
}

// Build SELECT expression, applying column-level redaction when configured.
selectExpr := buildSelectExpr(headers, table)

copyQuery := fmt.Sprintf(
"COPY (SELECT * FROM %s %s) TO STDOUT WITH CSV",
tableRef, where)
"COPY (SELECT %s FROM %s %s) TO STDOUT WITH CSV",
selectExpr, tableRef, where)
logrus.Info(copyQuery)
_, err = conn.PgConn().CopyTo(ctx, file, copyQuery)
if err != nil {
Expand All @@ -570,6 +643,41 @@ func (exporter *Exporter) doExportTable(ctx context.Context, dir string, table T
return nil
}

// buildSelectExpr constructs the SELECT expression for the COPY query.
// When the table has redaction configured, it returns an explicit column list
// with a CASE expression that replaces the sensitive column value with "<redacted>"
// for rows whose key column matches any entry in RedactedKeys.
// When no redaction is configured, it returns "*".
func buildSelectExpr(columns []string, table Table) string {
if table.RedactColumn == "" || len(table.RedactedKeys) == 0 {
return "*"
}

// Build the SQL IN list from the hard-coded redacted key names.
quotedKeys := make([]string, len(table.RedactedKeys))
for i, k := range table.RedactedKeys {
quotedKeys[i] = "'" + strings.ReplaceAll(k, "'", "''") + "'"
}
inClause := strings.Join(quotedKeys, ", ")

keyCol := pgx.Identifier{table.RedactKeyColumn}.Sanitize()
redactCol := pgx.Identifier{table.RedactColumn}.Sanitize()

cols := make([]string, len(columns))
for i, col := range columns {
quotedCol := pgx.Identifier{col}.Sanitize()
if col == table.RedactColumn {
cols[i] = fmt.Sprintf(
"CASE WHEN %s IN (%s) THEN '<redacted>' ELSE %s END AS %s",
keyCol, inClause, redactCol, redactCol,
)
} else {
cols[i] = quotedCol
}
}
return strings.Join(cols, ", ")
}

func (exporter *Exporter) createZipFile(sourceDir string) error {
zipFile, err := os.Create(exporter.Config.OutputFile)
if err != nil {
Expand Down
149 changes: 149 additions & 0 deletions pkg/export/exporter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package export

import (
"fmt"
"strings"
"testing"
"time"
)
Expand Down Expand Up @@ -194,6 +195,154 @@ func TestExportTables(t *testing.T) {
}
}

func TestExportTablesIncludesClusterSettings(t *testing.T) {
found := false
for _, table := range exportTables {
if table.Database == "crdb_internal" && table.Name == "cluster_settings" {
found = true
if table.Scope != TenantScopeBoth {
t.Errorf("crdb_internal.cluster_settings should have Scope TenantScopeBoth, got %q", table.Scope)
}
if table.TimeColumn != "" {
t.Errorf("crdb_internal.cluster_settings should have no TimeColumn, got %q", table.TimeColumn)
}
if table.RedactKeyColumn != "variable" {
t.Errorf("crdb_internal.cluster_settings RedactKeyColumn should be \"variable\", got %q", table.RedactKeyColumn)
}
if table.RedactColumn != "value" {
t.Errorf("crdb_internal.cluster_settings RedactColumn should be \"value\", got %q", table.RedactColumn)
}
if len(table.RedactedKeys) == 0 {
t.Error("crdb_internal.cluster_settings RedactedKeys should not be empty")
}
}
}
if !found {
t.Error("exportTables should contain crdb_internal.cluster_settings")
}
}

func TestExportTablesIncludesSystemSettings(t *testing.T) {
found := false
for _, table := range exportTables {
if table.Database == "system" && table.Name == "settings" {
found = true
if table.Scope != TenantScopeBoth {
t.Errorf("system.settings should have Scope TenantScopeBoth, got %q", table.Scope)
}
if table.RedactKeyColumn != "name" {
t.Errorf("system.settings RedactKeyColumn should be \"name\", got %q", table.RedactKeyColumn)
}
if table.RedactColumn != "value" {
t.Errorf("system.settings RedactColumn should be \"value\", got %q", table.RedactColumn)
}
if len(table.RedactedKeys) == 0 {
t.Error("system.settings RedactedKeys should not be empty")
}
}
}
if !found {
t.Error("exportTables should contain system.settings")
}
}

func TestSensitiveClusterSettings(t *testing.T) {
expected := []string{
"server.host_based_authentication.configuration",
"server.identity_map.configuration",
"server.jwt_authentication.issuers.custom_ca",
"server.ldap_authentication.domain.custom_ca",
"server.ldap_authentication.client.tls_certificate",
"server.ldap_authentication.client.tls_key",
"server.oidc_authentication.client_id",
"server.oidc_authentication.client_secret",
"server.oidc_authentication.provider.custom_ca",
"sql.override.allow_unsafe_internals.enabled",
"cluster.secret",
"cluster.label",
"enterprise.license",
}
for _, want := range expected {
found := false
for _, got := range sensitiveClusterSettings {
if got == want {
found = true
break
}
}
if !found {
t.Errorf("sensitiveClusterSettings is missing %q", want)
}
}
}

func TestBuildSelectExpr(t *testing.T) {
columns := []string{"variable", "value", "type", "description"}

t.Run("no redaction returns star", func(t *testing.T) {
table := Table{RedactColumn: "", RedactKeyColumn: "", RedactedKeys: nil}
got := buildSelectExpr(columns, table)
if got != "*" {
t.Errorf("expected \"*\", got %q", got)
}
})

t.Run("empty RedactedKeys returns star", func(t *testing.T) {
table := Table{RedactColumn: "value", RedactKeyColumn: "variable", RedactedKeys: []string{}}
got := buildSelectExpr(columns, table)
if got != "*" {
t.Errorf("expected \"*\", got %q", got)
}
})

t.Run("redacted column gets CASE expression", func(t *testing.T) {
table := Table{
RedactColumn: "value",
RedactKeyColumn: "variable",
RedactedKeys: []string{"cluster.secret", "enterprise.license"},
}
got := buildSelectExpr(columns, table)
// Must contain a CASE expression for the value column
if !contains(got, "CASE WHEN") {
t.Errorf("expected CASE expression in SELECT, got %q", got)
}
// Must contain the redacted key literals
if !contains(got, "'cluster.secret'") {
t.Errorf("expected 'cluster.secret' in SELECT, got %q", got)
}
if !contains(got, "'enterprise.license'") {
t.Errorf("expected 'enterprise.license' in SELECT, got %q", got)
}
// Must contain the redaction placeholder
if !contains(got, "'<redacted>'") {
t.Errorf("expected '<redacted>' in SELECT, got %q", got)
}
// Non-redacted columns must appear as plain identifiers
if !contains(got, `"variable"`) {
t.Errorf("expected \"variable\" column in SELECT, got %q", got)
}
if !contains(got, `"type"`) {
t.Errorf("expected \"type\" column in SELECT, got %q", got)
}
})

t.Run("single-quote in key is escaped", func(t *testing.T) {
table := Table{
RedactColumn: "value",
RedactKeyColumn: "variable",
RedactedKeys: []string{"it's.a.key"},
}
got := buildSelectExpr(columns, table)
if !contains(got, "'it''s.a.key'") {
t.Errorf("expected escaped single-quote in SELECT, got %q", got)
}
})
}

func contains(s, substr string) bool {
return strings.Contains(s, substr)
}

func TestIsVirtualClusterError(t *testing.T) {
tests := []struct {
name string
Expand Down
Loading