Skip to content

Commit 72eccba

Browse files
fix: address all PR #31 review comments
1. hash.rs: Sort tables by (order, table_name) for deterministic hashing when multiple tables share the same order value. 2. db.rs (MySQL): Use SHA-256 generated column (row_key_hash) for the row tracking table primary key instead of row_key(255) prefix index, preventing key collisions for JSON keys exceeding 255 bytes. 3. docs/seeding.md: Clarify that reconcile mode only reconciles when the rendered spec changes (hash mismatch), and that out-of-band DB changes are not corrected until a spec change triggers reconciliation. 4. executor.rs: Add runtime guard for --reconcile-all — rejects seed sets where any table lacks unique_key, preventing wrong-row updates/deletes from identical row keys. 5. executor.rs: Skip hash-based fast path for seed sets containing @ref: expressions, since resolved reference targets can change without affecting the hash (e.g., upstream auto_id row reinserted). 6. executor.rs: Dry-run mode now treats @ref: as literals via resolve_value_dry_run(), preventing failures when refs haven't been populated (auto_id + refs within same seed set). 7. schema.rs: Reconcile validation now rejects empty/whitespace unique_key entries, reserved column names (_ref) in unique_key, and rows missing required unique_key columns. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 3eacddd commit 72eccba

6 files changed

Lines changed: 175 additions & 6 deletions

File tree

CHANGELOG.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1515
- Content hash (`content_hash` column) on the seed tracking table for fast "anything changed?" checks before row-by-row comparison.
1616
- Automatic migration of existing tracking tables: the `content_hash` column is added transparently on first run. Existing seed sets remain in `once` mode with no behavior change.
1717

18+
### Changed
19+
- Reconcile hash-skip now only applies to seed sets without `@ref:` expressions. Seed sets containing `@ref:` references always run row-level reconciliation to prevent stale foreign keys when upstream auto-generated IDs shift.
20+
- Hash computation sorts tables by `(order, table_name)` instead of just `order` for deterministic hashing when multiple tables share the same order value.
21+
- Dry-run mode treats `@ref:` expressions as literals to avoid failures when references haven't been populated yet (e.g., auto_id + refs within the same seed set).
22+
23+
### Fixed
24+
- `--reconcile-all` now rejects seed sets where any table is missing `unique_key`, preventing reconciliation from generating identical row keys and updating/deleting wrong rows.
25+
- Reconcile mode validation now rejects empty/whitespace-only `unique_key` entries and reserved column names like `_ref`.
26+
- Reconcile mode validation now checks that every row contains all `unique_key` columns, preventing incomplete row keys during reconciliation.
27+
- MySQL row tracking table now uses SHA-256 generated column (`row_key_hash`) for the primary key instead of `row_key(255)` prefix, preventing key collisions for JSON keys exceeding 255 bytes.
28+
1829
## [1.1.0] - 2026-02-26
1930

2031
### Added

docs/seeding.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,9 @@ rows:
217217

218218
### Reconcile Mode
219219

220-
By default, seed sets are applied once and never modified (`mode: once`). Reconcile mode makes seeding declarative: the spec becomes the source of truth, and initium converges the database to match it on every run.
220+
By default, seed sets are applied once and never modified (`mode: once`). Reconcile mode makes seeding declarative: the rendered spec becomes the source of truth, and initium reconciles the database to match it whenever the rendered spec changes.
221+
222+
If the rendered spec has not changed since the last run (content hash match), initium treats the seed set as already reconciled and skips it. Out-of-band database changes are not corrected until a spec change triggers reconciliation again.
221223

222224
Enable reconcile mode per seed set:
223225

src/seed/db.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1318,7 +1318,8 @@ impl Database for MysqlDb {
13181318
row_key TEXT NOT NULL,
13191319
row_values TEXT NOT NULL,
13201320
applied_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
1321-
PRIMARY KEY (seed_set, table_name, row_key(255))
1321+
row_key_hash BINARY(32) GENERATED ALWAYS AS (UNHEX(SHA2(row_key, 256))) STORED,
1322+
PRIMARY KEY (seed_set, table_name, row_key_hash)
13221323
)",
13231324
safe
13241325
);

src/seed/executor.rs

Lines changed: 66 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,17 @@ impl<'a> SeedExecutor<'a> {
195195
);
196196

197197
if is_reconcile {
198+
// Guard: reconcile requires unique_key on every table.
199+
// Schema validation catches this for mode: reconcile, but --reconcile-all
200+
// can force reconcile on mode: once seed sets that lack unique_key.
201+
for ts in &ss.tables {
202+
if ts.unique_key.is_empty() {
203+
return Err(format!(
204+
"cannot reconcile seed set '{}': table '{}' has no unique_key (required for reconcile mode)",
205+
name, ts.table
206+
));
207+
}
208+
}
198209
return self.reconcile_seed_set(ss);
199210
}
200211

@@ -361,9 +372,20 @@ impl<'a> SeedExecutor<'a> {
361372
// Compute hash of current spec (resolve env vars, keep @ref: as literals)
362373
let current_hash = compute_seed_set_hash(ss, &|val| self.resolve_value(val))?;
363374

364-
// Check stored hash for quick skip
375+
// Check stored hash for quick skip.
376+
// Only skip if the seed set has no @ref: expressions, because
377+
// compute_seed_set_hash treats @ref: values as literals. Resolved
378+
// reference targets can change without affecting the hash (e.g.,
379+
// upstream auto_id row deleted/reinserted), and skipping could leave
380+
// stale foreign keys.
365381
let stored_hash = self.db.get_seed_hash(&self.tracking_table, name)?;
366-
if stored_hash.as_deref() == Some(current_hash.as_str()) {
382+
let has_refs = ss.tables.iter().any(|ts| {
383+
ts.rows.iter().any(|row| {
384+
row.values()
385+
.any(|v| v.as_str().map(|s| s.starts_with("@ref:")).unwrap_or(false))
386+
})
387+
});
388+
if !has_refs && stored_hash.as_deref() == Some(current_hash.as_str()) {
367389
self.log.info(
368390
"seed set unchanged (hash match), skipping",
369391
&[("seed_set", name)],
@@ -633,6 +655,15 @@ impl<'a> SeedExecutor<'a> {
633655
Ok(())
634656
}
635657

658+
/// Resolve a value for dry-run: treats `@ref:` as literals to avoid failures
659+
/// when refs haven't been populated (common with auto_id + refs in same seed set).
660+
fn resolve_value_dry_run(&self, val: &serde_yaml::Value) -> Result<String, String> {
661+
match val {
662+
serde_yaml::Value::String(s) if s.starts_with("@ref:") => Ok(s.clone()),
663+
_ => self.resolve_value(val),
664+
}
665+
}
666+
636667
/// Dry-run: compute what reconciliation would do without modifying the DB.
637668
fn dry_run_reconcile_tables(&mut self, ss: &SeedSet) -> Result<(), String> {
638669
let mut tables: Vec<&TableSeed> = ss.tables.iter().collect();
@@ -659,7 +690,7 @@ impl<'a> SeedExecutor<'a> {
659690
if key == "_ref" {
660691
continue;
661692
}
662-
let resolved = self.resolve_value(val)?;
693+
let resolved = self.resolve_value_dry_run(val)?;
663694
columns.push(key.clone());
664695
values.push(resolved.clone());
665696
if ts.unique_key.contains(key) {
@@ -2362,4 +2393,36 @@ phases:
23622393
assert!(result.is_err());
23632394
assert!(result.unwrap_err().contains("invalid mode"));
23642395
}
2396+
2397+
#[test]
2398+
fn test_reconcile_all_rejects_missing_unique_key() {
2399+
let dir = tempfile::TempDir::new().unwrap();
2400+
let db_path = dir.path().join("test.db");
2401+
let db_path_str = db_path.to_str().unwrap();
2402+
2403+
let sqlite = SqliteDb::connect(db_path_str).unwrap();
2404+
setup_db_with_tables(&sqlite);
2405+
2406+
// mode: once with no unique_key + reconcile_all should error
2407+
let yaml = r#"
2408+
database:
2409+
driver: sqlite
2410+
url: ":memory:"
2411+
phases:
2412+
- name: phase1
2413+
seed_sets:
2414+
- name: no_uk
2415+
tables:
2416+
- table: departments
2417+
rows:
2418+
- name: Engineering
2419+
"#;
2420+
let plan = SeedPlan::from_yaml(yaml).unwrap();
2421+
let log = test_logger();
2422+
let mut exec = SeedExecutor::new(&log, Box::new(sqlite), "initium_seed".into(), false)
2423+
.with_reconcile_all(true);
2424+
let result = exec.execute(&plan);
2425+
assert!(result.is_err());
2426+
assert!(result.unwrap_err().contains("no unique_key"));
2427+
}
23652428
}

src/seed/hash.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ pub fn compute_seed_set_hash(
1616
let mut hasher = Sha256::new();
1717

1818
let mut tables: Vec<_> = ss.tables.iter().collect();
19-
tables.sort_by_key(|t| t.order);
19+
tables.sort_by(|a, b| a.order.cmp(&b.order).then_with(|| a.table.cmp(&b.table)));
2020

2121
for ts in &tables {
2222
hasher.update(ts.table.as_bytes());

src/seed/schema.rs

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,35 @@ impl SeedPlan {
238238
ts.table, ss.name
239239
));
240240
}
241+
if ss.is_reconcile() {
242+
if ts.unique_key.iter().any(|k| k.trim().is_empty()) {
243+
return Err(format!(
244+
"table '{}' in seed_set '{}' has empty or whitespace-only entries in unique_key when mode is 'reconcile'",
245+
ts.table, ss.name
246+
));
247+
}
248+
let reserved_keys = ["_ref"];
249+
if let Some(reserved) = ts
250+
.unique_key
251+
.iter()
252+
.find(|k| reserved_keys.contains(&k.as_str()))
253+
{
254+
return Err(format!(
255+
"table '{}' in seed_set '{}' uses reserved column '{}' in unique_key when mode is 'reconcile'",
256+
ts.table, ss.name, reserved
257+
));
258+
}
259+
for (row_idx, row) in ts.rows.iter().enumerate() {
260+
for uk in &ts.unique_key {
261+
if !row.contains_key(uk) {
262+
return Err(format!(
263+
"table '{}' in seed_set '{}': row {} is missing unique_key column '{}'",
264+
ts.table, ss.name, row_idx + 1, uk
265+
));
266+
}
267+
}
268+
}
269+
}
241270
}
242271
Ok(())
243272
}
@@ -646,4 +675,67 @@ phases:
646675
let plan = SeedPlan::from_yaml(yaml).unwrap();
647676
assert!(plan.phases[0].seed_sets.is_empty());
648677
}
678+
679+
#[test]
680+
fn test_reconcile_rejects_empty_unique_key_entry() {
681+
let yaml = r#"
682+
database:
683+
driver: sqlite
684+
url: ":memory:"
685+
phases:
686+
- name: p
687+
seed_sets:
688+
- name: s
689+
mode: reconcile
690+
tables:
691+
- table: t
692+
unique_key: ["", "k"]
693+
rows:
694+
- k: a
695+
"#;
696+
let err = SeedPlan::from_yaml(yaml).unwrap_err();
697+
assert!(err.contains("empty or whitespace-only"));
698+
}
699+
700+
#[test]
701+
fn test_reconcile_rejects_reserved_unique_key() {
702+
let yaml = r#"
703+
database:
704+
driver: sqlite
705+
url: ":memory:"
706+
phases:
707+
- name: p
708+
seed_sets:
709+
- name: s
710+
mode: reconcile
711+
tables:
712+
- table: t
713+
unique_key: [_ref]
714+
rows:
715+
- _ref: r1
716+
"#;
717+
let err = SeedPlan::from_yaml(yaml).unwrap_err();
718+
assert!(err.contains("reserved column '_ref'"));
719+
}
720+
721+
#[test]
722+
fn test_reconcile_rejects_row_missing_unique_key_column() {
723+
let yaml = r#"
724+
database:
725+
driver: sqlite
726+
url: ":memory:"
727+
phases:
728+
- name: p
729+
seed_sets:
730+
- name: s
731+
mode: reconcile
732+
tables:
733+
- table: t
734+
unique_key: [email]
735+
rows:
736+
- name: Alice
737+
"#;
738+
let err = SeedPlan::from_yaml(yaml).unwrap_err();
739+
assert!(err.contains("missing unique_key column 'email'"));
740+
}
649741
}

0 commit comments

Comments
 (0)