Skip to content

Commit 4d4dc28

Browse files
roachtest: add split-file import test
Add an import roachtest that splits a dataset's data files into two halves and imports them via two separate IMPORT jobs into the same table. This verifies that successive imports produce a correct, complete dataset. Epic: None Release note: None Co-Authored-By: roachdev-claude <roachdev-claude-bot@cockroachlabs.com>
1 parent 5430025 commit 4d4dc28

1 file changed

Lines changed: 43 additions & 0 deletions

File tree

pkg/cmd/roachtest/tests/import.go

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,13 @@ var tests = []importTestSpec{
348348
datasetNames: FromFunc(anyDataset),
349349
importRunner: importPauseRunner,
350350
},
351+
// Test importing a table in two separate IMPORT jobs (split files).
352+
{
353+
subtestName: "split",
354+
nodes: []int{4},
355+
datasetNames: FromFunc(anyDataset),
356+
importRunner: splitImportRunner,
357+
},
351358
}
352359

353360
// importTestTimeout is the timeout for import roachtests. This is
@@ -885,6 +892,42 @@ func importPauseRunner(
885892
return nil
886893
}
887894

895+
// splitImportRunner imports a table's data in two separate IMPORT jobs,
896+
// splitting the data files at a random point and in a randomized order.
897+
// This verifies that successive imports into the same table produce a
898+
// correct, complete dataset.
899+
func splitImportRunner(
900+
ctx context.Context, t test.Test, c cluster.Cluster, l *logger.Logger, rng *rand.Rand, ds dataset,
901+
) error {
902+
conn := c.Conn(ctx, l, 1)
903+
defer conn.Close()
904+
905+
urls := slices.Clone(ds.getDataURLs())
906+
rng.Shuffle(len(urls), func(i, j int) { urls[i], urls[j] = urls[j], urls[i] })
907+
// Split at a random point, ensuring each half gets at least one file.
908+
splitIdx := rng.Intn(len(urls)-1) + 1
909+
first := urls[:splitIdx]
910+
second := urls[splitIdx:]
911+
912+
t.WorkerStatus(fmt.Sprintf("importing first batch (%d files) of %s",
913+
len(first), ds.getTableName()))
914+
importStmt := formatImportStmt(ds.getTableName(), first, false)
915+
l.Printf("first import: %s", importStmt)
916+
if _, err := conn.ExecContext(ctx, importStmt); err != nil {
917+
return errors.Wrapf(err, "%s", importStmt)
918+
}
919+
920+
t.WorkerStatus(fmt.Sprintf("importing second batch (%d files) of %s",
921+
len(second), ds.getTableName()))
922+
importStmt = formatImportStmt(ds.getTableName(), second, false)
923+
l.Printf("second import: %s", importStmt)
924+
if _, err := conn.ExecContext(ctx, importStmt); err != nil {
925+
return errors.Wrapf(err, "%s", importStmt)
926+
}
927+
928+
return nil
929+
}
930+
888931
// makeColumnFamilies() is a pre-test hook that changes the tables
889932
// in import_test to use column families. To do this, we iterate the
890933
// tables in the database, reading the schema for each table, modifying

0 commit comments

Comments
 (0)