diff --git a/README.md b/README.md index 410ac65..3660a67 100644 --- a/README.md +++ b/README.md @@ -203,6 +203,7 @@ The export creates a **zip file** containing the following files: - **`crdb_internal.transaction_statistics.csv`** - Transaction execution stats - **`crdb_internal.transaction_contention_events.csv`** - Lock contention events - **`crdb_internal.gossip_nodes.csv`** - Node information and topology +- **`crdb_internal.node_cpu_mem.csv`** - Per-node vCPU count and total memory (derived from `kv_node_status`) - **`crdb_internal.table_indexes.csv`** - Table and index descriptor IDs across all databases - **`system.table_statistics.csv`** - Optimizer table statistics (column-level stats used by the query planner) diff --git a/pkg/export/exporter.go b/pkg/export/exporter.go index 2abc8f6..bae4348 100644 --- a/pkg/export/exporter.go +++ b/pkg/export/exporter.go @@ -113,6 +113,11 @@ type Table struct { RedactColumn string // RedactedKeys is the set of RedactKeyColumn values for which RedactColumn is redacted. RedactedKeys []string + // Query overrides the default SELECT for this table. When set, the query is used as-is + // for both column discovery and data export. TimeColumn, RedactKeyColumn, RedactColumn, + // and RedactedKeys are ignored when Query is set. The output filename is still derived + // from Database and Name. + Query string } // sensitiveClusterSettings is the list of cluster setting names whose values are @@ -140,6 +145,19 @@ var exportTables = []Table{ {Database: "crdb_internal", Name: "transaction_statistics", TimeColumn: "aggregated_ts", Scope: TenantScopeMain}, {Database: "crdb_internal", Name: "transaction_contention_events", TimeColumn: "collection_ts", Scope: TenantScopeMain}, {Database: "crdb_internal", Name: "gossip_nodes", TimeColumn: "", Optional: true, Scope: TenantScopeSystem}, + { + Database: "crdb_internal", + Name: "node_cpu_mem", + Optional: true, + Scope: TenantScopeSystem, + Query: `SELECT node_id, address,` + + ` ROUND(` + + `((metrics->>'sys.cpu.user.percent')::FLOAT + (metrics->>'sys.cpu.sys.percent')::FLOAT)` + + ` / NULLIF((metrics->>'sys.cpu.combined.percent-normalized')::FLOAT, 0)` + + `)::INT AS num_vcpus,` + + ` ROUND((metrics->>'sys.totalmem')::FLOAT / 1073741824, 1) AS total_mem_gib` + + ` FROM crdb_internal.kv_node_status`, + }, {Database: "", Name: "crdb_internal.table_indexes", TimeColumn: "", Scope: TenantScopeMain}, // Use "" to query across all databases {Database: "system", Name: "table_statistics", TimeColumn: "", Scope: TenantScopeMain}, { @@ -235,7 +253,7 @@ func (exporter *Exporter) Close() error { // - Cluster metadata (version, ID, name, organization, settings) // - Database schemas (CREATE statements for all user databases) // - Zone configurations -// - Statistics tables (statement_statistics, transaction_statistics, transaction_contention_events, gossip_nodes, table_indexes across all databases, system.table_statistics) +// - Statistics tables (statement_statistics, transaction_statistics, transaction_contention_events, gossip_nodes, node_cpu_mem, table_indexes across all databases, system.table_statistics) // - Cluster settings (crdb_internal.cluster_settings, system.settings) with sensitive values redacted // // The statistics tables are filtered by the TimeRange specified in Config. @@ -599,7 +617,13 @@ func (exporter *Exporter) doExportTable(ctx context.Context, dir string, table T } // Get column names - rows, err := conn.Query(ctx, fmt.Sprintf("SELECT * FROM %s LIMIT 0", tableRef)) + var colProbeSQL string + if table.Query != "" { + colProbeSQL = fmt.Sprintf("SELECT * FROM (%s) AS q LIMIT 0", table.Query) + } else { + colProbeSQL = fmt.Sprintf("SELECT * FROM %s LIMIT 0", tableRef) + } + rows, err := conn.Query(ctx, colProbeSQL) if err != nil { return err } @@ -618,22 +642,28 @@ func (exporter *Exporter) doExportTable(ctx context.Context, dir string, table T return err } - // Use a SQL query to export data in CSV format - var where string - if table.TimeColumn != "" { - where = fmt.Sprintf("WHERE %s BETWEEN '%s' and '%s'", - pgx.Identifier{table.TimeColumn}.Sanitize(), - startTime(exporter.Config.TimeRange.Start).Format("2006-01-02 15:04:05"), // offset for aggregation interval -- TODO - endTime(exporter.Config.TimeRange.End).Format("2006-01-02 15:04:05"), - ) - } + // Build and run the COPY query. + var copyQuery string + if table.Query != "" { + copyQuery = fmt.Sprintf("COPY (%s) TO STDOUT WITH CSV", table.Query) + } else { + // Use a SQL query to export data in CSV format + var where string + if table.TimeColumn != "" { + where = fmt.Sprintf("WHERE %s BETWEEN '%s' and '%s'", + pgx.Identifier{table.TimeColumn}.Sanitize(), + startTime(exporter.Config.TimeRange.Start).Format("2006-01-02 15:04:05"), // offset for aggregation interval -- TODO + endTime(exporter.Config.TimeRange.End).Format("2006-01-02 15:04:05"), + ) + } - // Build SELECT expression, applying column-level redaction when configured. - selectExpr := buildSelectExpr(headers, table) + // Build SELECT expression, applying column-level redaction when configured. + selectExpr := buildSelectExpr(headers, table) - copyQuery := fmt.Sprintf( - "COPY (SELECT %s FROM %s %s) TO STDOUT WITH CSV", - selectExpr, tableRef, where) + copyQuery = fmt.Sprintf( + "COPY (SELECT %s FROM %s %s) TO STDOUT WITH CSV", + selectExpr, tableRef, where) + } logrus.Info(copyQuery) _, err = conn.PgConn().CopyTo(ctx, file, copyQuery) if err != nil { diff --git a/pkg/export/exporter_test.go b/pkg/export/exporter_test.go index d259679..f39a7a0 100644 --- a/pkg/export/exporter_test.go +++ b/pkg/export/exporter_test.go @@ -195,6 +195,33 @@ func TestExportTables(t *testing.T) { } } +func TestExportTablesIncludesNodeCPUMem(t *testing.T) { + found := false + for _, table := range exportTables { + if table.Database == "crdb_internal" && table.Name == "node_cpu_mem" { + found = true + if table.Scope != TenantScopeSystem { + t.Errorf("crdb_internal.node_cpu_mem should have Scope TenantScopeSystem, got %q", table.Scope) + } + if !table.Optional { + t.Error("crdb_internal.node_cpu_mem should be Optional") + } + if table.Query == "" { + t.Error("crdb_internal.node_cpu_mem should have a custom Query") + } + if !strings.Contains(table.Query, "num_vcpus") { + t.Error("crdb_internal.node_cpu_mem Query should select num_vcpus") + } + if !strings.Contains(table.Query, "total_mem_gib") { + t.Error("crdb_internal.node_cpu_mem Query should select total_mem_gib") + } + } + } + if !found { + t.Error("exportTables should contain crdb_internal.node_cpu_mem") + } +} + func TestExportTablesIncludesClusterSettings(t *testing.T) { found := false for _, table := range exportTables {