Skip to content

Commit 43fa30c

Browse files
committed
Add: self-ref FK, circular dep, fix: varchar sampling
1 parent 646abf7 commit 43fa30c

11 files changed

Lines changed: 432 additions & 237 deletions

File tree

README.md

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -478,8 +478,8 @@ General:
478478
- [ ] better datetime random generation. It should be flexible over its range
479479
- [x] use more gofakeit generators with regexes to generate "legit" data when possible
480480
- [ ] helpers to get schema (generate pgdump/mysqldump commands, get index stats, ...)
481-
- [ ] protect against foreign key cycles. Both explicits and implicits (avoid generating implicits that would end up causing loops)
482-
- [ ] detect selfpointing foreign keys
481+
- [x] protect against foreign key cycles. Both explicits and implicits (avoid generating implicits that would end up causing loops)
482+
- [x] detect selfpointing foreign keys
483483
- [ ] have some graph to show --coin-flip-percent with --bulk-size
484484
- [x] using --values-freq-map to make query parameters work
485485

@@ -497,9 +497,13 @@ Without clear plan:
497497
## Version history
498498

499499
#### 0.2.3
500-
- NULL and/or fixed values can be injected at fixed rates
500+
- NULL and/or fixed values can be injected at tunable rates
501501
- --rows can be overriden per tables
502502
- improved virtual join handling to enable columns used for many foreign keys
503+
- query parameters are being inserted at tunable frequencies so that query can work as is
504+
- protection against circular dependencies
505+
- self-referencing tables handling through splitting the tables in two. Half the table will reference the other half
506+
503507

504508
#### 0.2.0
505509
- Support for postgres

cmd/run.go

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ type RunCmd struct {
2121

2222
Table string `help:"Table to insert to. When using --query, --table will be used to restrict the tables to insert to."`
2323
Rows int64 `name:"rows" required:"true" help:"Number of rows to insert"`
24-
RowsPerTable map[string]int64 `name:"rows-per-table" help:"Number of rows to insert per-table. Will have priority over --rows"`
24+
RowsPerTable map[string]int64 `name:"rows-per-table" help:"Number of rows to insert per-table. Will have priority over --rows. Format is \"{table}=X\"" default:""`
2525
BulkSize int64 `name:"bulk-size" help:"Number of rows per insert statement" default:"1000"`
2626
DryRun bool `name:"dry-run" help:"Print queries to the standard output instead of inserting them into the db"`
2727
Quiet bool `name:"quiet" help:"Do not print progress bar"`
@@ -95,8 +95,27 @@ func (cmd *RunCmd) Run() error {
9595

9696
tables = append(tables, table)
9797
}
98+
// now we have the full table list, we check for any loops
99+
for _, table := range tables {
100+
copiedTable, err := table.IdentifyAndResolveSelfReferencingConstraintLoop()
101+
if err != nil {
102+
return err
103+
}
104+
if copiedTable != nil {
105+
rows, ok := cmd.RowsPerTable[table.Name]
106+
if !ok {
107+
rows = cmd.Rows
108+
}
109+
log.Info().Str("table", table.Name).Int64("rows", rows/2).Msg("table has a self-referencing foreign key. Setting --rows to half for this table since we will insert twice to it to resolve the dependency.")
110+
cmd.RowsPerTable[table.Name] = rows / 2
111+
tables = append([]*db.Table{copiedTable}, tables...)
112+
113+
} else if table.HasAnyConstraintLoop() {
114+
return errors.Errorf("table %s has a foreign key loop", table.Name)
115+
}
116+
}
98117

99-
// now we have the full table list, we can autocomplete foreign keys
118+
// we can autocomplete foreign keys
100119
joins = append(joins, cmd.AddForeignKeys...)
101120
if len(joins) > 0 {
102121
db.AddVirtualFKs(tables, joins)

db/constraint.go

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
package db
2+
3+
import (
4+
"strings"
5+
6+
"slices"
7+
8+
"github.com/brianvoe/gofakeit/v7"
9+
"github.com/pkg/errors"
10+
"github.com/rs/zerolog/log"
11+
"github.com/ylacancellera/random-data-load/query"
12+
)
13+
14+
// Constraint holds Foreign Keys information
15+
type Constraint struct {
16+
ConstraintName string
17+
ReferencedTableSchema string
18+
ReferencedTableName string
19+
ColumnsName []string // sorted by ordinal_position
20+
ReferencedColumnsName []string
21+
Fields []Field
22+
ReferencedFields []Field
23+
ReferencedTable *Table
24+
willBeInsertedDuringThisRun bool
25+
}
26+
27+
type Constraints []*Constraint
28+
29+
func NewConstraintFromVirtualFK(table *Table, left query.VirtualJoinPart, right query.VirtualJoinPart) (*Constraint, error) {
30+
31+
constraint := &Constraint{
32+
ConstraintName: "VirtualFK_" + strings.Join(right.Columns, "_") + gofakeit.ID(), // an ID to prevent collisions
33+
ReferencedTableSchema: table.Schema, // assuming the schema is the same, good enough for now
34+
ReferencedTableName: left.Table,
35+
ColumnsName: right.Columns,
36+
ReferencedColumnsName: left.Columns,
37+
}
38+
constraint.populateFields(table)
39+
err := constraint.loadReferencedTable()
40+
return constraint, errors.Wrap(err, "NewConstraintFromVirtualFK")
41+
}
42+
43+
func (c *Constraint) IsLooping() bool {
44+
return c.constraintLoopTraverser([]string{})
45+
}
46+
47+
func (c *Constraint) constraintLoopTraverser(traversedTables []string) bool {
48+
if slices.Contains(traversedTables, c.ReferencedTable.Name) {
49+
return true
50+
}
51+
for _, childConstraints := range c.ReferencedTable.Constraints {
52+
isLooping := childConstraints.constraintLoopTraverser(append(traversedTables, c.ReferencedTable.Name))
53+
if isLooping {
54+
return true
55+
}
56+
}
57+
return false
58+
}
59+
60+
func (cs Constraints) Fields() []Field {
61+
fields := []Field{}
62+
for _, c := range cs {
63+
fields = append(fields, c.Fields...)
64+
}
65+
return fields
66+
}
67+
68+
func (c *Constraint) populateFields(targetTable *Table) error {
69+
70+
for _, colname := range c.ColumnsName {
71+
72+
field := targetTable.FieldByName(colname)
73+
if field == nil {
74+
return errors.Errorf("could not find column %s from table %s", colname, targetTable.Name)
75+
}
76+
c.Fields = append(c.Fields, *field)
77+
}
78+
return nil
79+
}
80+
81+
func (c *Constraint) loadReferencedTable() error {
82+
83+
var err error
84+
c.ReferencedTable, err = LoadTable(c.ReferencedTableSchema, c.ReferencedTableName)
85+
if err != nil {
86+
return errors.Wrapf(err, "using schema %s, table %s", c.ReferencedTableSchema, c.ReferencedTableName)
87+
}
88+
for _, colname := range c.ReferencedColumnsName {
89+
90+
refField := c.ReferencedTable.FieldByName(colname)
91+
if refField == nil {
92+
return errors.Errorf("could not find column %s from table %s", colname, c.ReferencedTable.Name)
93+
}
94+
c.ReferencedFields = append(c.ReferencedFields, *refField)
95+
}
96+
return nil
97+
}
98+
99+
func shouldSkipVirtualFK(tables []*Table, vfk query.VirtualJoin) bool {
100+
101+
// source and target is in the order of the written query, not necessarily in the logical order
102+
// source would be the parent table
103+
// target would be the child, which could have had an actual FOREIGN KEY object
104+
// so the current t *Table should be the target: it points to a dependency
105+
106+
for _, table := range tables {
107+
for _, constraint := range table.Constraints {
108+
log.Debug().
109+
Interface("left", vfk.Left).Interface("right", vfk.Right).Str("loopCurrentTable", table.Name).
110+
Str("loopReferencedTable", constraint.ReferencedTableName).Strs("loopReferencedColumnsName", constraint.ReferencedColumnsName).Strs("loopConstraintColumnsName", constraint.ColumnsName).
111+
Msg("filtering virtual keys")
112+
113+
switch {
114+
// TODO: we could "supplement" existing FKs with virtual ones, I'm not sure if that's a real use case yet
115+
case strings.ToLower(vfk.Left.Table) == strings.ToLower(table.Name) &&
116+
strings.ToLower(vfk.Right.Table) == strings.ToLower(constraint.ReferencedTableName) &&
117+
isSliceSimilar(constraint.ColumnsName, vfk.Left.Columns) &&
118+
isSliceSimilar(constraint.ReferencedColumnsName, vfk.Right.Columns):
119+
return true
120+
121+
// flipped
122+
case strings.ToLower(vfk.Right.Table) == strings.ToLower(table.Name) &&
123+
strings.ToLower(vfk.Left.Table) == strings.ToLower(constraint.ReferencedTableName) &&
124+
isSliceSimilar(constraint.ColumnsName, vfk.Right.Columns) &&
125+
isSliceSimilar(constraint.ReferencedColumnsName, vfk.Left.Columns):
126+
127+
return true
128+
}
129+
130+
}
131+
}
132+
return false
133+
}
134+
135+
func AddVirtualFKs(tables []*Table, fkeys []query.VirtualJoin) error {
136+
log.Debug().Interface("fkeys", fkeys).Str("func", "AddVirtualFKs2").Msg("adding virtual foreign keys")
137+
138+
for _, virtualJoin := range fkeys {
139+
140+
if shouldSkipVirtualFK(tables, virtualJoin) {
141+
log.Debug().Str("left", virtualJoin.Left.Table).Str("right", virtualJoin.Right.Table).Str("func", "AddVirtualFKs").Msg("already handled by schema's constraint, skipping")
142+
continue
143+
}
144+
145+
// left is parent, right is child. Constraints are on child side
146+
tableIdx := slices.IndexFunc(tables, func(t *Table) bool { return strings.ToLower(t.Name) == strings.ToLower(virtualJoin.Right.Table) })
147+
if tableIdx == -1 {
148+
log.Debug().Str("left", virtualJoin.Left.Table).Str("right", virtualJoin.Right.Table).Str("func", "AddVirtualFKs").Msg("table not loaded")
149+
continue
150+
}
151+
table := tables[tableIdx]
152+
153+
constraint, err := NewConstraintFromVirtualFK(table, virtualJoin.Left, virtualJoin.Right)
154+
if err != nil {
155+
log.Error().Str("left", virtualJoin.Left.Table).Str("right", virtualJoin.Right.Table).Str("func", "AddVirtualFKs").Err(err).Msg("could not add a virtual foreign key, skipping")
156+
return errors.Wrap(err, "AddVirtualFKs")
157+
}
158+
159+
if constraint.IsLooping() {
160+
constraint, err = NewConstraintFromVirtualFK(table, virtualJoin.Right, virtualJoin.Left)
161+
if err != nil {
162+
log.Error().Str("left", virtualJoin.Right.Table).Str("right", virtualJoin.Left.Table).Str("func", "AddVirtualFKs").Err(err).Msg("could not add a (flipped) virtual foreign key, skipping")
163+
return errors.Wrap(err, "AddVirtualFKs")
164+
}
165+
if constraint.IsLooping() {
166+
log.Debug().Str("left", virtualJoin.Left.Table).Str("right", virtualJoin.Right.Table).Str("func", "AddVirtualFKs").Msg("could not add a virtual foreign key without creating a loop, skipping")
167+
}
168+
}
169+
170+
table.Constraints = append(table.Constraints, constraint)
171+
172+
log.Debug().Str("left", virtualJoin.Left.Table).Str("right", virtualJoin.Right.Table).Str("func", "AddVirtualFKs").Msg("virtual foreign key added")
173+
}
174+
175+
return nil
176+
}

0 commit comments

Comments
 (0)