Skip to content

Commit 382a8fb

Browse files
committed
Fix cardinality and nullability config to be at foreign key target level and not top level of foreign keys
1 parent 4854f09 commit 382a8fb

24 files changed

Lines changed: 937 additions & 1830 deletions

File tree

api/src/main/java/io/github/datacatering/datacaterer/javaapi/api/PlanRun.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ public ForeignKeyRelation foreignField(String dataSource, String step, String fi
240240
* @return A ForeignKeyRelation instance.
241241
*/
242242
public ForeignKeyRelation foreignField(String dataSource, String step, List<String> fields) {
243-
return new ForeignKeyRelation(dataSource, step, toScalaList(fields));
243+
return new ForeignKeyRelation(dataSource, step, toScalaList(fields), scala.Option.empty(), scala.Option.empty(), scala.Option.empty());
244244
}
245245

246246
/**
@@ -255,7 +255,8 @@ public ForeignKeyRelation foreignField(ConnectionTaskBuilder<?> connectionTaskBu
255255
return new ForeignKeyRelation(
256256
connectionTaskBuilder.connectionConfigWithTaskBuilder().dataSourceName(),
257257
connectionTaskBuilder.getStep().step().name(),
258-
toScalaList(List.of(field))
258+
toScalaList(List.of(field)),
259+
scala.Option.empty(), scala.Option.empty(), scala.Option.empty()
259260
);
260261
}
261262

@@ -271,7 +272,8 @@ public ForeignKeyRelation foreignField(ConnectionTaskBuilder<?> connectionTaskBu
271272
return new ForeignKeyRelation(
272273
connectionTaskBuilder.connectionConfigWithTaskBuilder().dataSourceName(),
273274
connectionTaskBuilder.getStep().step().name(),
274-
toScalaList(fields)
275+
toScalaList(fields),
276+
scala.Option.empty(), scala.Option.empty(), scala.Option.empty()
275277
);
276278
}
277279

@@ -284,7 +286,7 @@ public ForeignKeyRelation foreignField(ConnectionTaskBuilder<?> connectionTaskBu
284286
* @return A ForeignKeyRelation instance.
285287
*/
286288
public ForeignKeyRelation foreignField(ConnectionTaskBuilder<?> connectionTaskBuilder, String step, List<String> fields) {
287-
return new ForeignKeyRelation(connectionTaskBuilder.connectionConfigWithTaskBuilder().dataSourceName(), step, toScalaList(fields));
289+
return new ForeignKeyRelation(connectionTaskBuilder.connectionConfigWithTaskBuilder().dataSourceName(), step, toScalaList(fields), scala.Option.empty(), scala.Option.empty(), scala.Option.empty());
288290
}
289291

290292
/**

api/src/main/scala/io/github/datacatering/datacaterer/api/SinkOptionsBuilder.scala

Lines changed: 35 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -83,55 +83,70 @@ case class SinkOptionsBuilder(sinkOptions: SinkOptions = SinkOptions()) {
8383
def foreignKey(foreignKey: ForeignKeyRelation,
8484
generationLinks: List[ForeignKeyRelation],
8585
deleteLinks: List[ForeignKeyRelation],
86-
relationshipType: Option[String],
87-
cardinality: Option[CardinalityConfig] = None,
88-
nullability: Option[NullabilityConfig] = None,
89-
generationMode: Option[String] = None): SinkOptionsBuilder =
86+
relationshipType: Option[String]): SinkOptionsBuilder =
9087
this.modify(_.sinkOptions.foreignKeys)(_ ++ List(ForeignKey(
9188
foreignKey, generationLinks, deleteLinks,
92-
relationshipType, cardinality, nullability, generationMode
89+
relationshipType
9390
)))
9491

9592
/**
96-
* Define a foreign key relationship with cardinality configuration.
93+
* DEPRECATED: Cardinality is now configured per-target on ForeignKeyRelation.
94+
* Use target.copy(cardinality = Some(CardinalityConfig(...))) instead.
9795
*
9896
* @param foreignKey Base foreign key
9997
* @param generationLinks Foreign key relations for data generation
10098
* @param cardinality Cardinality configuration builder
10199
* @return SinkOptionsBuilder
102100
*/
101+
@deprecated("Configure cardinality on individual ForeignKeyRelation targets instead", "0.18.0")
103102
def foreignKey(foreignKey: ForeignKeyRelation,
104103
generationLinks: List[ForeignKeyRelation],
105-
cardinality: CardinalityConfigBuilder): SinkOptionsBuilder =
106-
this.foreignKey(foreignKey, generationLinks, List(), None, Some(cardinality.config), None, None)
104+
cardinality: CardinalityConfigBuilder): SinkOptionsBuilder = {
105+
// Apply cardinality to all targets
106+
val targetsWithCard = generationLinks.map(_.copy(cardinality = Some(cardinality.config)))
107+
this.foreignKey(foreignKey, targetsWithCard, List())
108+
}
107109

108110
/**
109-
* Define a foreign key relationship with nullability configuration.
111+
* DEPRECATED: Nullability is now configured per-target on ForeignKeyRelation.
112+
* Use target.copy(nullability = Some(NullabilityConfig(...))) instead.
110113
*
111114
* @param foreignKey Base foreign key
112115
* @param generationLinks Foreign key relations for data generation
113116
* @param nullability Nullability configuration builder
114117
* @return SinkOptionsBuilder
115118
*/
119+
@deprecated("Configure nullability on individual ForeignKeyRelation targets instead", "0.18.0")
116120
def foreignKey(foreignKey: ForeignKeyRelation,
117121
generationLinks: List[ForeignKeyRelation],
118-
nullability: NullabilityConfigBuilder): SinkOptionsBuilder =
119-
this.foreignKey(foreignKey, generationLinks, List(), None, None, Some(nullability.config), None)
122+
nullability: NullabilityConfigBuilder): SinkOptionsBuilder = {
123+
// Apply nullability to all targets
124+
val targetsWithNull = generationLinks.map(_.copy(nullability = Some(nullability.config)))
125+
this.foreignKey(foreignKey, targetsWithNull, List())
126+
}
120127

121128
/**
122-
* Define a foreign key relationship with cardinality and nullability configuration.
129+
* DEPRECATED: Cardinality and nullability are now configured per-target on ForeignKeyRelation.
130+
* Use target.copy(cardinality = Some(...), nullability = Some(...)) instead.
123131
*
124132
* @param foreignKey Base foreign key
125133
* @param generationLinks Foreign key relations for data generation
126134
* @param cardinality Cardinality configuration builder
127135
* @param nullability Nullability configuration builder
128136
* @return SinkOptionsBuilder
129137
*/
138+
@deprecated("Configure cardinality and nullability on individual ForeignKeyRelation targets instead", "0.18.0")
130139
def foreignKey(foreignKey: ForeignKeyRelation,
131140
generationLinks: List[ForeignKeyRelation],
132141
cardinality: CardinalityConfigBuilder,
133-
nullability: NullabilityConfigBuilder): SinkOptionsBuilder =
134-
this.foreignKey(foreignKey, generationLinks, List(), None, Some(cardinality.config), Some(nullability.config), None)
142+
nullability: NullabilityConfigBuilder): SinkOptionsBuilder = {
143+
// Apply both to all targets
144+
val targetsWithConfig = generationLinks.map(_.copy(
145+
cardinality = Some(cardinality.config),
146+
nullability = Some(nullability.config)
147+
))
148+
this.foreignKey(foreignKey, targetsWithConfig, List())
149+
}
135150

136151
/**
137152
* Define a many-to-many relationship using junction table pattern.
@@ -149,9 +164,12 @@ case class SinkOptionsBuilder(sinkOptions: SinkOptions = SinkOptions()) {
149164
leftCardinality: Option[CardinalityConfig] = None,
150165
rightCardinality: Option[CardinalityConfig] = None): SinkOptionsBuilder = {
151166
// Create two foreign key relationships: left->junction and right->junction
152-
// Extract fields from junction table relation (assumes format like "junction_table_fields")
167+
// Apply cardinality to junction table targets
168+
val leftJunction = leftCardinality.map(c => junctionTable.copy(cardinality = Some(c))).getOrElse(junctionTable)
169+
val rightJunction = rightCardinality.map(c => junctionTable.copy(cardinality = Some(c))).getOrElse(junctionTable)
170+
153171
this
154-
.foreignKey(leftSource, List(junctionTable), List(), None, leftCardinality, None, None)
155-
.foreignKey(rightSource, List(junctionTable), List(), None, rightCardinality, None, None)
172+
.foreignKey(leftSource, List(leftJunction), List())
173+
.foreignKey(rightSource, List(rightJunction), List())
156174
}
157175
}

api/src/main/scala/io/github/datacatering/datacaterer/api/model/PlanModels.scala

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,10 @@ case class SinkOptions(
3131
case class ForeignKeyRelation(
3232
dataSource: String = DEFAULT_DATA_SOURCE_NAME,
3333
step: String = DEFAULT_STEP_NAME,
34-
fields: List[String] = List()
34+
fields: List[String] = List(),
35+
cardinality: Option[CardinalityConfig] = None,
36+
nullability: Option[NullabilityConfig] = None,
37+
generationMode: Option[String] = None
3538
) {
3639

3740
def this(dataSource: String, step: String, field: String) = this(dataSource, step, List(field))
@@ -43,10 +46,7 @@ case class ForeignKey(
4346
source: ForeignKeyRelation = ForeignKeyRelation(),
4447
generate: List[ForeignKeyRelation] = List(),
4548
delete: List[ForeignKeyRelation] = List(),
46-
relationshipType: Option[String] = None,
47-
cardinality: Option[CardinalityConfig] = None,
48-
nullability: Option[NullabilityConfig] = None,
49-
generationMode: Option[String] = None
49+
relationshipType: Option[String] = None
5050
)
5151

5252
/**
@@ -64,7 +64,6 @@ case class CardinalityConfig(
6464
ratio: Option[Double] = None,
6565
distribution: String = "uniform"
6666
) {
67-
def this() = this(None, None, None, "uniform")
6867
}
6968

7069
/**
@@ -78,8 +77,6 @@ case class NullabilityConfig(
7877
nullPercentage: Double = 0.0,
7978
strategy: String = "random"
8079
) {
81-
def this() = this(0.0, "random")
82-
8380
require(nullPercentage >= 0.0 && nullPercentage <= 1.0, "nullPercentage must be between 0.0 and 1.0")
8481
}
8582

@@ -99,7 +96,6 @@ case class ManyToManyRelation(
9996
leftCardinality: Option[CardinalityConfig] = None,
10097
rightCardinality: Option[CardinalityConfig] = None
10198
) {
102-
def this() = this(ForeignKeyRelation(), ForeignKeyRelation(), ForeignKeyRelation(), None, None)
10399
}
104100

105101
@JsonIgnoreProperties(ignoreUnknown = true)

app/src/integrationTest/scala/io/github/datacatering/datacaterer/core/foreignkey/ForeignKeyEndToEndIntegrationTest.scala

Lines changed: 29 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,12 @@ class ForeignKeyEndToEndIntegrationTest extends SparkSuite {
2020
// ========================================================================================
2121

2222
test("E2E: Ratio-based cardinality with uniform distribution - full flow") {
23-
// Step 1: Define plan with cardinality
23+
// Step 1: Define plan with cardinality at target level
2424
val foreignKeys = List(ForeignKey(
2525
ForeignKeyRelation("accounts", "accounts_table", List("account_id")),
26-
List(ForeignKeyRelation("transactions", "transactions_table", List("account_id"))),
27-
List(),
28-
cardinality = Some(CardinalityConfig(ratio = Some(5.0), distribution = "uniform"))
26+
List(ForeignKeyRelation("transactions", "transactions_table", List("account_id"),
27+
cardinality = Some(CardinalityConfig(ratio = Some(5.0), distribution = "uniform")))),
28+
List()
2929
))
3030

3131
val sinkOptions = SinkOptions(Some("12345"), None, foreignKeys)
@@ -58,13 +58,15 @@ class ForeignKeyEndToEndIntegrationTest extends SparkSuite {
5858
val (adjustedPlan, adjustedTasks, _) = processor.apply(plan, tasks, validations)
5959

6060
// Verify count was adjusted: 3 accounts * 5 ratio = 15 transactions
61+
// With perField, records is set to SOURCE count (3), and perField count is set to ratio (5)
62+
// This generates 3 * 5 = 15 total records
6163
val adjustedTransactionStep = adjustedTasks
6264
.find(_.name == "transaction_task")
6365
.flatMap(_.steps.headOption)
6466
.get
6567

66-
assert(adjustedTransactionStep.count.records.contains(15),
67-
s"Transaction count should be adjusted to 15, got ${adjustedTransactionStep.count.records}")
68+
assert(adjustedTransactionStep.count.records.contains(3),
69+
s"Transaction count should be set to source count (3), got ${adjustedTransactionStep.count.records}")
6870

6971
// Verify perField was set
7072
assert(adjustedTransactionStep.count.perField.isDefined,
@@ -115,7 +117,6 @@ class ForeignKeyEndToEndIntegrationTest extends SparkSuite {
115117
val result = ForeignKeyUtil.getDataFramesWithForeignKeys(
116118
adjustedPlan,
117119
dfMap,
118-
useV2 = true,
119120
executableTasks = Some(executableTasks)
120121
)
121122

@@ -143,12 +144,12 @@ class ForeignKeyEndToEndIntegrationTest extends SparkSuite {
143144
}
144145

145146
test("E2E: Bounded cardinality (min/max) - full flow") {
146-
// Step 1: Define plan with bounded cardinality
147+
// Step 1: Define plan with bounded cardinality at target level
147148
val foreignKeys = List(ForeignKey(
148149
ForeignKeyRelation("authors", "authors_table", List("author_id")),
149-
List(ForeignKeyRelation("articles", "articles_table", List("author_id"))),
150-
List(),
151-
cardinality = Some(CardinalityConfig(min = Some(2), max = Some(4), distribution = "uniform"))
150+
List(ForeignKeyRelation("articles", "articles_table", List("author_id"),
151+
cardinality = Some(CardinalityConfig(min = Some(2), max = Some(4), distribution = "uniform")))),
152+
List()
152153
))
153154

154155
val sinkOptions = SinkOptions(Some("12346"), None, foreignKeys)
@@ -190,8 +191,8 @@ class ForeignKeyEndToEndIntegrationTest extends SparkSuite {
190191
assert(adjustedArticleStep.count.perField.isDefined, "PerField should be set")
191192
val perField = adjustedArticleStep.count.perField.get
192193
assert(perField.fieldNames.contains("author_id"), "PerField should include author_id")
193-
assert(perField.options.get("min") == Some(2), "PerField min should be 2")
194-
assert(perField.options.get("max") == Some(4), "PerField max should be 4")
194+
assert(perField.options.get("min").contains(2), "PerField min should be 2")
195+
assert(perField.options.get("max").contains(4), "PerField max should be 4")
195196

196197
// Step 3: Simulate data generation with perField grouping (varying counts 2-4)
197198
val authorsDf = sparkSession.createDataFrame(Seq(
@@ -227,7 +228,6 @@ class ForeignKeyEndToEndIntegrationTest extends SparkSuite {
227228
val result = ForeignKeyUtil.getDataFramesWithForeignKeys(
228229
adjustedPlan,
229230
dfMap,
230-
useV2 = true,
231231
executableTasks = Some(executableTasks)
232232
)
233233

@@ -257,10 +257,10 @@ class ForeignKeyEndToEndIntegrationTest extends SparkSuite {
257257
test("E2E: Cardinality with all-exist mode - all FKs valid, cardinality preserved") {
258258
val foreignKeys = List(ForeignKey(
259259
ForeignKeyRelation("customers", "customers_table", List("customer_id")),
260-
List(ForeignKeyRelation("orders", "orders_table", List("customer_id"))),
261-
List(),
262-
cardinality = Some(CardinalityConfig(ratio = Some(2.0), distribution = "uniform")),
263-
generationMode = Some("all-exist")
260+
List(ForeignKeyRelation("orders", "orders_table", List("customer_id"),
261+
cardinality = Some(CardinalityConfig(ratio = Some(2.0), distribution = "uniform")),
262+
generationMode = Some("all-exist"))),
263+
List()
264264
))
265265

266266
val sinkOptions = SinkOptions(Some("12347"), None, foreignKeys)
@@ -313,7 +313,6 @@ class ForeignKeyEndToEndIntegrationTest extends SparkSuite {
313313
val result = ForeignKeyUtil.getDataFramesWithForeignKeys(
314314
adjustedPlan,
315315
dfMap,
316-
useV2 = true,
317316
executableTasks = Some(executableTasks)
318317
)
319318

@@ -339,11 +338,11 @@ class ForeignKeyEndToEndIntegrationTest extends SparkSuite {
339338
test("E2E: Cardinality with partial mode - introduces violations while preserving cardinality") {
340339
val foreignKeys = List(ForeignKey(
341340
ForeignKeyRelation("products", "products_table", List("product_id")),
342-
List(ForeignKeyRelation("reviews", "reviews_table", List("product_id"))),
343-
List(),
344-
cardinality = Some(CardinalityConfig(ratio = Some(3.0), distribution = "uniform")),
345-
nullability = Some(NullabilityConfig(0.25, "random")),
346-
generationMode = Some("partial")
341+
List(ForeignKeyRelation("reviews", "reviews_table", List("product_id"),
342+
cardinality = Some(CardinalityConfig(ratio = Some(3.0), distribution = "uniform")),
343+
nullability = Some(NullabilityConfig(0.25, "random")),
344+
generationMode = Some("partial"))),
345+
List()
347346
))
348347

349348
val sinkOptions = SinkOptions(Some("1"), None, foreignKeys)
@@ -403,7 +402,6 @@ class ForeignKeyEndToEndIntegrationTest extends SparkSuite {
403402
val result = ForeignKeyUtil.getDataFramesWithForeignKeys(
404403
adjustedPlan,
405404
dfMap,
406-
useV2 = true,
407405
executableTasks = Some(executableTasks)
408406
)
409407

@@ -441,9 +439,9 @@ class ForeignKeyEndToEndIntegrationTest extends SparkSuite {
441439
test("E2E: Composite key cardinality - full flow") {
442440
val foreignKeys = List(ForeignKey(
443441
ForeignKeyRelation("locations", "locations_table", List("country", "state")),
444-
List(ForeignKeyRelation("stores", "stores_table", List("country", "state"))),
445-
List(),
446-
cardinality = Some(CardinalityConfig(ratio = Some(3.0), distribution = "uniform"))
442+
List(ForeignKeyRelation("stores", "stores_table", List("country", "state"),
443+
cardinality = Some(CardinalityConfig(ratio = Some(3.0), distribution = "uniform")))),
444+
List()
447445
))
448446

449447
val sinkOptions = SinkOptions(Some("12348"), None, foreignKeys)
@@ -496,7 +494,6 @@ class ForeignKeyEndToEndIntegrationTest extends SparkSuite {
496494
val result = ForeignKeyUtil.getDataFramesWithForeignKeys(
497495
adjustedPlan,
498496
dfMap,
499-
useV2 = true,
500497
executableTasks = Some(executableTasks)
501498
)
502499

@@ -525,9 +522,9 @@ class ForeignKeyEndToEndIntegrationTest extends SparkSuite {
525522
test("E2E: FK with nullability (no cardinality) - standard processing") {
526523
val foreignKeys = List(ForeignKey(
527524
ForeignKeyRelation("stores", "stores_table", List("store_id")),
528-
List(ForeignKeyRelation("sales", "sales_table", List("store_id"))),
529-
List(),
530-
nullability = Some(NullabilityConfig(0.2, "random"))
525+
List(ForeignKeyRelation("sales", "sales_table", List("store_id"),
526+
nullability = Some(NullabilityConfig(0.2, "random")))),
527+
List()
531528
))
532529

533530
val sinkOptions = SinkOptions(Some("12349"), None, foreignKeys)
@@ -585,7 +582,6 @@ class ForeignKeyEndToEndIntegrationTest extends SparkSuite {
585582
val result = ForeignKeyUtil.getDataFramesWithForeignKeys(
586583
adjustedPlan,
587584
dfMap,
588-
useV2 = true,
589585
executableTasks = None // No perField
590586
)
591587

app/src/integrationTest/scala/io/github/datacatering/datacaterer/core/generator/EnhancedForeignKeyIntegrationTest.scala

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,10 @@ class EnhancedForeignKeyIntegrationTest extends SparkSuite with Matchers with Be
7171
val customerIds = customersData.map(_.getAs[String]("customer_id")).toSet
7272
val profileCustomerIds = profilesData.map(_.getAs[String]("customer_id")).toSet
7373

74-
profileCustomerIds shouldBe customerIds
74+
assert(profileCustomerIds.forall(customerIds.contains))
75+
profileCustomerIds.foreach(x => assert(customerIds.contains(x)))
76+
customerIds.foreach(x => assert(profileCustomerIds.contains(x)))
77+
assert(customerIds.forall(profileCustomerIds.contains))
7578
profilesData.groupBy(_.getAs[String]("customer_id")).foreach { case (_, profiles) =>
7679
profiles.length shouldBe 1
7780
}

0 commit comments

Comments
 (0)