Skip to content

Commit 8aeaf37

Browse files
sravottoclaude
andcommitted
generate-import-fixtures: add region and nation tables
Add the two remaining TPC-H dimension tables (region with 5 rows, nation with 25 rows) so the tool can generate complete fixture sets. Since dbgen produces these small tables as a single file (e.g. region.tbl) rather than sharded files (e.g. region.tbl.1..8), extract the shard logic into a shardFiles() function that returns one entry for single-file tables and 8 entries for sharded tables. Epic: CRDB-62435 Release note: None Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent f5223f7 commit 8aeaf37

2 files changed

Lines changed: 53 additions & 7 deletions

File tree

pkg/cmd/generate-import-fixtures/main.go

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -88,30 +88,29 @@ func main() {
8888
}
8989
fmt.Printf("processing table: %s\n", tableName)
9090

91-
for shardIdx := 1; shardIdx <= 8; shardIdx++ {
92-
fileName := fmt.Sprintf("%s.tbl.%d", tableName, shardIdx)
93-
filePath := filepath.Join(*inputDir, fileName)
91+
for _, s := range shardFiles(tableName) {
92+
filePath := filepath.Join(*inputDir, s.fileName)
9493

9594
f, err := os.Open(filePath)
9695
if err != nil {
9796
log.Fatalf("opening %s: %v", filePath, err)
9897
}
9998

100-
writer, err := format.NewWriter(tableDef, *outputDir, shardIdx)
99+
writer, err := format.NewWriter(tableDef, *outputDir, s.idx)
101100
if err != nil {
102101
f.Close()
103-
log.Fatalf("creating writer for %s shard %d: %v", tableName, shardIdx, err)
102+
log.Fatalf("creating writer for %s shard %d: %v", tableName, s.idx, err)
104103
}
105104

106105
if _, err := processShard(f, tableDef, writer); err != nil {
107106
_ = writer.Close()
108107
f.Close()
109-
log.Fatalf("processing %s shard %d: %v", tableName, shardIdx, err)
108+
log.Fatalf("processing %s shard %d: %v", tableName, s.idx, err)
110109
}
111110

112111
if err := writer.Close(); err != nil {
113112
f.Close()
114-
log.Fatalf("closing writer for %s shard %d: %v", tableName, shardIdx, err)
113+
log.Fatalf("closing writer for %s shard %d: %v", tableName, s.idx, err)
115114
}
116115
f.Close()
117116
}
@@ -128,6 +127,36 @@ func availableFormats() string {
128127
return strings.Join(names, ", ")
129128
}
130129

130+
// shardFile identifies a single input file and its shard index.
131+
type shardFile struct {
132+
fileName string
133+
idx int
134+
}
135+
136+
// singleFileTables lists tables where dbgen produces a single unsuffixed file
137+
// instead of per-shard files (e.g. region.tbl instead of region.tbl.1..8).
138+
var singleFileTables = map[string]bool{
139+
"region": true,
140+
"nation": true,
141+
}
142+
143+
// shardFiles returns the list of input files for the given table. Small
144+
// dimension tables (region, nation) have a single file; all others are split
145+
// into 8 shards by dbgen.
146+
func shardFiles(tableName string) []shardFile {
147+
if singleFileTables[tableName] {
148+
return []shardFile{{fileName: tableName + ".tbl", idx: 1}}
149+
}
150+
shards := make([]shardFile, 8)
151+
for i := range 8 {
152+
shards[i] = shardFile{
153+
fileName: fmt.Sprintf("%s.tbl.%d", tableName, i+1),
154+
idx: i + 1,
155+
}
156+
}
157+
return shards
158+
}
159+
131160
// processShard reads a pipe-delimited TPC-H file and streams parsed rows to the
132161
// writer in fixed-size batches. It returns the total number of rows processed.
133162
func processShard(r io.Reader, table TableDef, writer FormatWriter) (int, error) {

pkg/cmd/generate-import-fixtures/tpch.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,23 @@ var tpchTables = map[string]TableDef{
156156
stringCol("s_comment"),
157157
},
158158
},
159+
"region": {
160+
Name: "region",
161+
Columns: []ColumnDef{
162+
longCol("r_regionkey"),
163+
stringCol("r_name"),
164+
stringCol("r_comment"),
165+
},
166+
},
167+
"nation": {
168+
Name: "nation",
169+
Columns: []ColumnDef{
170+
longCol("n_nationkey"),
171+
stringCol("n_name"),
172+
longCol("n_regionkey"),
173+
stringCol("n_comment"),
174+
},
175+
},
159176
}
160177

161178
// allTPCHTables returns the names of all defined TPC-H tables.

0 commit comments

Comments
 (0)