Skip to content

Commit 7f7510d

Browse files
authored
feat: support virtual clusters (#6)
1 parent 38d2bd1 commit 7f7510d

4 files changed

Lines changed: 235 additions & 12 deletions

File tree

.claude/rules/development.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,15 @@
66

77
1. Add a `Table` entry to `exportTables` in `pkg/export/exporter.go`:
88
```go
9-
Table{Database: "system", Name: "my_table", TimeColumn: "created_at"},
9+
Table{Database: "system", Name: "my_table", TimeColumn: "created_at", Scope: TenantScopeMain},
1010
```
1111
- Set `TimeColumn` to the timestamp column if time-range filtering is needed, or `""` for no filtering.
1212
- Set `Optional: true` if the table may not exist in all cluster configurations (e.g. Cloud virtual clusters).
1313
- Use `Database: ""` with a dotted `Name` (e.g. `"crdb_internal.table_indexes"`) to query across all databases.
14+
- Set `Scope` to indicate which virtual cluster connection to use:
15+
- `TenantScopeMain` — application virtual cluster (default for most tables)
16+
- `TenantScopeSystem` — system virtual cluster only (e.g. `gossip_nodes`); auto-detects virtual cluster mode on first failure
17+
- `TenantScopeBoth` — reserved for future use
1418

1519
2. Add or update a test in `pkg/export/exporter_test.go`.
1620

README.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ Flags:
195195
The export creates a **zip file** containing the following files:
196196

197197
### Metadata
198-
- **`metadata.json`** - Cluster version, ID, name, organization, and export configuration
198+
- **`metadata.json`** - Cluster version, ID, name, organization, export configuration, and whether the cluster is a virtual cluster
199199
- ⚠️ Note: Connection string password is automatically redacted
200200

201201
### Statistics (CSV format, time-filtered)
@@ -257,6 +257,12 @@ ls export-contents/*.schema.txt
257257
- Read access to user databases (for schema export)
258258
- *Recommended:* Admin role for simplest setup
259259

260+
### Virtual Cluster Support
261+
262+
Virtualized CockroachDB clusters (those with a system virtual cluster and one or more application virtual clusters) are supported. Connect using the URL for the application virtual cluster (typically named `main`) — the tool automatically detects the virtualized deployment and opens a second connection to the system virtual cluster to retrieve cluster-wide data such as node topology.
263+
264+
> **Note:** The user account must have access to **both** the system virtual cluster and the application virtual cluster. If your user exists only in the application virtual cluster, system-level data (such as `gossip_nodes`) will be unavailable.
265+
260266
### Grant Permissions
261267

262268
For simplest setup, grant admin role:

pkg/export/exporter.go

Lines changed: 118 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,35 @@ const ExporterVersion = "1.0.0"
2323

2424
var systemDatabases = []string{"system", "crdb_internal", "postgres"}
2525

26+
// TenantScope indicates which virtual cluster a table or query should be routed to.
27+
// In non-virtualized clusters, all queries use the single connection regardless of scope.
28+
type TenantScope string
29+
30+
const (
31+
// TenantScopeMain routes the query to the main (application) virtual cluster.
32+
// This is the default when no scope is specified.
33+
TenantScopeMain TenantScope = "main"
34+
// TenantScopeSystem routes the query to the system virtual cluster.
35+
// Used for cluster-wide data not available in application virtual clusters,
36+
// such as gossip_nodes. Auto-detection occurs on first failure.
37+
TenantScopeSystem TenantScope = "system"
38+
// TenantScopeBoth routes the query to both virtual clusters.
39+
// Reserved for future use (e.g., cluster settings available in both tenants).
40+
TenantScopeBoth TenantScope = "both"
41+
)
42+
2643
// Exporter handles the export of workload data from a CockroachDB cluster.
2744
// It manages database connections and coordinates the export of statistics,
2845
// schemas, and configurations into a zip file.
2946
type Exporter struct {
3047
// Config contains the export configuration settings
3148
Config Config
32-
// Db is the active database connection to the CockroachDB cluster
49+
// Db is the active database connection to the CockroachDB cluster (main virtual cluster)
3350
Db *pgx.Conn
51+
// SystemDb is a connection to the system virtual cluster, established lazily when a
52+
// TenantScopeSystem query fails against Db with a virtual cluster error. Nil in
53+
// non-virtualized clusters.
54+
SystemDb *pgx.Conn
3455
// CleanConnectionString is the connection string with password redacted
3556
CleanConnectionString string
3657
}
@@ -65,6 +86,7 @@ type Metadata struct {
6586
Organization string `json:"organization"`
6687
SqlStatsAggregationInterval time.Duration `json:"sql.stats.aggregation.interval"`
6788
SqlStatsFlushInterval time.Duration `json:"sql.stats.flush.interval"`
89+
VirtualCluster bool `json:"virtual_cluster"`
6890
}
6991

7092
// Table represents a CockroachDB table to be exported with optional time-based filtering.
@@ -78,15 +100,18 @@ type Table struct {
78100
// Optional indicates that export failures should be logged as warnings rather than errors.
79101
// Use this for tables that may not be available in all cluster configurations (e.g. Cloud virtual clusters).
80102
Optional bool
103+
// Scope indicates which virtual cluster connection to use for this table.
104+
// Defaults to TenantScopeMain when unset.
105+
Scope TenantScope
81106
}
82107

83108
var exportTables = []Table{
84-
Table{Database: "crdb_internal", Name: "statement_statistics", TimeColumn: "aggregated_ts"},
85-
Table{Database: "crdb_internal", Name: "transaction_statistics", TimeColumn: "aggregated_ts"},
86-
Table{Database: "crdb_internal", Name: "transaction_contention_events", TimeColumn: "collection_ts"},
87-
Table{Database: "crdb_internal", Name: "gossip_nodes", TimeColumn: "", Optional: true},
88-
Table{Database: "", Name: "crdb_internal.table_indexes", TimeColumn: ""}, // Use "" to query across all databases
89-
Table{Database: "system", Name: "table_statistics", TimeColumn: ""},
109+
{Database: "crdb_internal", Name: "statement_statistics", TimeColumn: "aggregated_ts", Scope: TenantScopeMain},
110+
{Database: "crdb_internal", Name: "transaction_statistics", TimeColumn: "aggregated_ts", Scope: TenantScopeMain},
111+
{Database: "crdb_internal", Name: "transaction_contention_events", TimeColumn: "collection_ts", Scope: TenantScopeMain},
112+
{Database: "crdb_internal", Name: "gossip_nodes", TimeColumn: "", Optional: true, Scope: TenantScopeSystem},
113+
{Database: "", Name: "crdb_internal.table_indexes", TimeColumn: "", Scope: TenantScopeMain}, // Use "" to query across all databases
114+
{Database: "system", Name: "table_statistics", TimeColumn: "", Scope: TenantScopeMain},
90115
}
91116

92117
// NewExporter creates a new Exporter instance with the given configuration.
@@ -148,6 +173,9 @@ func NewExporter(config Config) (*Exporter, error) {
148173
// }
149174
// defer exporter.Close()
150175
func (exporter *Exporter) Close() error {
176+
if exporter.SystemDb != nil {
177+
_ = exporter.SystemDb.Close(context.Background())
178+
}
151179
if exporter.Db != nil {
152180
return exporter.Db.Close(context.Background())
153181
}
@@ -263,6 +291,8 @@ func (exporter *Exporter) Export() error {
263291
}
264292
logrus.Info("finished table export")
265293

294+
metadata.VirtualCluster = exporter.SystemDb != nil
295+
266296
metadataFile := filepath.Join(tempDir, "metadata.json")
267297
metadataJSON, err := json.MarshalIndent(metadata, "", " ")
268298
if err != nil {
@@ -441,7 +471,34 @@ func (exporter *Exporter) userDatabases() ([]string, error) {
441471
return databases, nil
442472
}
443473

474+
// exportTable routes the table export to the appropriate virtual cluster connection
475+
// based on the table's Scope. For TenantScopeSystem tables, it first attempts the
476+
// export using the main connection; if CockroachDB returns a virtual cluster error,
477+
// it establishes a system connection and retries automatically.
444478
func (exporter *Exporter) exportTable(ctx context.Context, dir string, table Table, aggregationInterval time.Duration) error {
479+
scope := table.Scope
480+
if scope == "" {
481+
scope = TenantScopeMain
482+
}
483+
484+
conn := exporter.Db
485+
if scope == TenantScopeSystem && exporter.SystemDb != nil {
486+
conn = exporter.SystemDb
487+
}
488+
489+
err := exporter.doExportTable(ctx, dir, table, aggregationInterval, conn)
490+
if err != nil && scope == TenantScopeSystem && isVirtualClusterError(err) {
491+
systemConn, connErr := exporter.ensureSystemConn(ctx)
492+
if connErr != nil {
493+
return fmt.Errorf("failed to connect to system virtual cluster: %w", connErr)
494+
}
495+
return exporter.doExportTable(ctx, dir, table, aggregationInterval, systemConn)
496+
}
497+
return err
498+
}
499+
500+
// doExportTable performs the actual table export using the provided connection.
501+
func (exporter *Exporter) doExportTable(ctx context.Context, dir string, table Table, aggregationInterval time.Duration, conn *pgx.Conn) error {
445502
// Create filename - if database is empty, just use table name
446503
var filename string
447504
if table.Database == "" {
@@ -463,7 +520,6 @@ func (exporter *Exporter) exportTable(ctx context.Context, dir string, table Tab
463520
}
464521
}(file)
465522

466-
// Get column names
467523
// Construct table reference - handle empty database for cross-database queries
468524
var tableRef string
469525
if table.Database == "" {
@@ -473,7 +529,8 @@ func (exporter *Exporter) exportTable(ctx context.Context, dir string, table Tab
473529
tableRef = fmt.Sprintf("%s.%s", pgx.Identifier{table.Database}.Sanitize(), pgx.Identifier{table.Name}.Sanitize())
474530
}
475531

476-
rows, err := exporter.Db.Query(ctx, fmt.Sprintf("SELECT * FROM %s LIMIT 0", tableRef))
532+
// Get column names
533+
rows, err := conn.Query(ctx, fmt.Sprintf("SELECT * FROM %s LIMIT 0", tableRef))
477534
if err != nil {
478535
return err
479536
}
@@ -505,7 +562,7 @@ func (exporter *Exporter) exportTable(ctx context.Context, dir string, table Tab
505562
"COPY (SELECT * FROM %s %s) TO STDOUT WITH CSV",
506563
tableRef, where)
507564
logrus.Info(copyQuery)
508-
_, err = exporter.Db.PgConn().CopyTo(ctx, file, copyQuery)
565+
_, err = conn.PgConn().CopyTo(ctx, file, copyQuery)
509566
if err != nil {
510567
return err
511568
}
@@ -625,6 +682,57 @@ func enableUnsafeInternalsIfNeeded(ctx context.Context, conn *pgx.Conn) error {
625682
return nil
626683
}
627684

685+
// isVirtualClusterError returns true if the error indicates an operation is unsupported
686+
// within an application virtual cluster. When this occurs for a TenantScopeSystem table,
687+
// the exporter will retry the query against the system virtual cluster.
688+
//
689+
// The error string "operation is unsupported within a virtual cluster" is produced by
690+
// CockroachDB when an application tenant attempts to access system-only resources.
691+
func isVirtualClusterError(err error) bool {
692+
return err != nil && strings.Contains(err.Error(), "operation is unsupported within a virtual cluster")
693+
}
694+
695+
// buildSystemConnectionString derives a system virtual cluster connection string from
696+
// an existing connection string by appending options=-ccluster=system. If the connection
697+
// string already contains an options parameter, the cluster option is appended to it.
698+
func buildSystemConnectionString(connStr string) (string, error) {
699+
u, err := url.Parse(connStr)
700+
if err != nil {
701+
return "", fmt.Errorf("failed to parse connection string: %w", err)
702+
}
703+
q := u.Query()
704+
if existing := q.Get("options"); existing != "" {
705+
q.Set("options", existing+" -ccluster=system")
706+
} else {
707+
q.Set("options", "-ccluster=system")
708+
}
709+
u.RawQuery = q.Encode()
710+
return u.String(), nil
711+
}
712+
713+
// ensureSystemConn returns the system virtual cluster connection, creating it if needed.
714+
// It is called lazily when a TenantScopeSystem query fails with a virtual cluster error.
715+
func (exporter *Exporter) ensureSystemConn(ctx context.Context) (*pgx.Conn, error) {
716+
if exporter.SystemDb != nil {
717+
return exporter.SystemDb, nil
718+
}
719+
systemConnStr, err := buildSystemConnectionString(exporter.Config.ConnectionString)
720+
if err != nil {
721+
return nil, fmt.Errorf("failed to build system connection string: %w", err)
722+
}
723+
logrus.Info("detected virtual cluster, connecting to system virtual cluster")
724+
conn, err := pgx.Connect(ctx, systemConnStr)
725+
if err != nil {
726+
return nil, fmt.Errorf("failed to connect to system virtual cluster: %w", err)
727+
}
728+
if err := enableUnsafeInternalsIfNeeded(ctx, conn); err != nil {
729+
_ = conn.Close(ctx)
730+
return nil, err
731+
}
732+
exporter.SystemDb = conn
733+
return conn, nil
734+
}
735+
628736
// parseMajorVersion extracts the major version number from a CockroachDB version string.
629737
// Example: "CockroachDB CCL v26.1.0-beta.3 ..." -> 26
630738
func parseMajorVersion(versionStr string) (int, error) {

pkg/export/exporter_test.go

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package export
22

33
import (
4+
"fmt"
45
"testing"
56
"time"
67
)
@@ -175,12 +176,116 @@ func TestExportTables(t *testing.T) {
175176
t.Error("exportTables should not be empty")
176177
}
177178

179+
validScopes := map[TenantScope]bool{
180+
TenantScopeMain: true,
181+
TenantScopeSystem: true,
182+
TenantScopeBoth: true,
183+
}
184+
178185
for i, table := range exportTables {
179186
// Database can be empty for cross-database queries (e.g., "".crdb_internal.table_indexes)
180187
// but Name must always be present
181188
if table.Name == "" {
182189
t.Errorf("exportTables[%d].Name should not be empty", i)
183190
}
191+
if !validScopes[table.Scope] {
192+
t.Errorf("exportTables[%d] (%s.%s) has invalid or missing Scope %q", i, table.Database, table.Name, table.Scope)
193+
}
194+
}
195+
}
196+
197+
func TestIsVirtualClusterError(t *testing.T) {
198+
tests := []struct {
199+
name string
200+
err error
201+
expected bool
202+
}{
203+
{
204+
name: "nil error",
205+
err: nil,
206+
expected: false,
207+
},
208+
{
209+
name: "virtual cluster error",
210+
err: fmt.Errorf("ERROR: operation is unsupported within a virtual cluster (SQLSTATE XXUUU)"),
211+
expected: true,
212+
},
213+
{
214+
name: "wrapped virtual cluster error",
215+
err: fmt.Errorf("failed to query gossip_nodes: operation is unsupported within a virtual cluster"),
216+
expected: true,
217+
},
218+
{
219+
name: "unrelated error",
220+
err: fmt.Errorf("connection refused"),
221+
expected: false,
222+
},
223+
{
224+
name: "permission denied error",
225+
err: fmt.Errorf("ERROR: permission denied for table gossip_nodes"),
226+
expected: false,
227+
},
228+
}
229+
230+
for _, tt := range tests {
231+
t.Run(tt.name, func(t *testing.T) {
232+
got := isVirtualClusterError(tt.err)
233+
if got != tt.expected {
234+
t.Errorf("isVirtualClusterError(%v) = %v, want %v", tt.err, got, tt.expected)
235+
}
236+
})
237+
}
238+
}
239+
240+
func TestBuildSystemConnectionString(t *testing.T) {
241+
tests := []struct {
242+
name string
243+
input string
244+
expected string
245+
wantErr bool
246+
}{
247+
{
248+
name: "basic URL",
249+
input: "postgresql://user@localhost:26257/defaultdb",
250+
expected: "postgresql://user@localhost:26257/defaultdb?options=-ccluster%3Dsystem",
251+
wantErr: false,
252+
},
253+
{
254+
name: "URL with existing query params",
255+
input: "postgresql://user@localhost:26257/defaultdb?sslmode=verify-full",
256+
expected: "postgresql://user@localhost:26257/defaultdb?options=-ccluster%3Dsystem&sslmode=verify-full",
257+
wantErr: false,
258+
},
259+
{
260+
name: "URL with existing options param",
261+
input: "postgresql://user@localhost:26257/defaultdb?options=-csomething%3Dvalue",
262+
expected: "postgresql://user@localhost:26257/defaultdb?options=-csomething%3Dvalue+-ccluster%3Dsystem",
263+
wantErr: false,
264+
},
265+
{
266+
name: "URL with password is preserved",
267+
input: "postgresql://user:secret@localhost:26257/defaultdb",
268+
expected: "postgresql://user:secret@localhost:26257/defaultdb?options=-ccluster%3Dsystem",
269+
wantErr: false,
270+
},
271+
{
272+
name: "invalid URL",
273+
input: "://invalid",
274+
wantErr: true,
275+
},
276+
}
277+
278+
for _, tt := range tests {
279+
t.Run(tt.name, func(t *testing.T) {
280+
got, err := buildSystemConnectionString(tt.input)
281+
if (err != nil) != tt.wantErr {
282+
t.Errorf("buildSystemConnectionString() error = %v, wantErr %v", err, tt.wantErr)
283+
return
284+
}
285+
if got != tt.expected {
286+
t.Errorf("buildSystemConnectionString() = %q, want %q", got, tt.expected)
287+
}
288+
})
184289
}
185290
}
186291

0 commit comments

Comments
 (0)