From b773e8ab6d8d9febc1b8c5965f6455d2f16406a9 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Sun, 10 May 2026 12:19:45 -0700 Subject: [PATCH 1/3] Propagate nested container dirty ids --- CHANGELOG.md | 1 + core-relations/src/containers/mod.rs | 50 ++++++++++++++++++- tests/container-fail.egg | 22 ++++++++ ...files__shared_snapshot_container_fail.snap | 1 + 4 files changed, 73 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 07d095030..33d500a20 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## [Unreleased] - ReleaseDate +- Fix seminaive matching after nested containers rebuild in place by propagating dirty container ids through parent containers. - Desugar `relation`s to `constructor`s to simplify the language and implementation. Relations no longer return unit `()` values. - Refactored API to use [`TermId`] more consistently instead of `Term` where possible, simplifying egglog code. - **Typed primitive surface for seminaive safety (#772).** Custom primitives now pick one of `PurePrim` / `ReadPrim` / `WritePrim` / `FullPrim` based on what the body needs, and register via the matching `add_*_primitive`. Rust enforces capability bounds via the state wrapper passed to the body; the egglog typechecker enforces context bounds. See the `egglog::exec_state` module docs and the `*Prim` trait docs for the full picture. Migration: `rust_rule` callbacks now take `&mut WriteState` (replacing `RustRuleContext`); a new `rust_rule_full` gives action callbacks read access. Higher-order primitives over `unstable-fn` values dispatch via `state.apply_function(&fc, args)`. diff --git a/core-relations/src/containers/mod.rs b/core-relations/src/containers/mod.rs index 3c23a4f1b..497e20cf0 100644 --- a/core-relations/src/containers/mod.rs +++ b/core-relations/src/containers/mod.rs @@ -81,6 +81,8 @@ pub struct ContainerValues { /// `dirty_ids` is narrower: it records container ids whose semantics changed /// while their stored outer id stayed stable. Ordinary table rebuild already /// handles changed-id cases; these ids need a follow-up parent-row refresh. +/// This includes containers that changed directly and containers whose +/// contained containers changed in place. /// /// For example, `l(vec-of(w(k(b))))` can rebuild to `l(vec-of(k(b)))` without /// changing the `Vec` id. The row is now newly matchable, but seminaive will @@ -174,7 +176,7 @@ impl ContainerValues { // We may attempt an incremental rebuild. self.subset_tracker.recent_updates(table_id, table) }); - if parallelize_inter_container_op(self.data.next_id().index()) { + let mut summary = if parallelize_inter_container_op(self.data.next_id().index()) { self.data .iter_mut() .zip(std::iter::repeat_with(|| exec_state.clone())) @@ -202,6 +204,38 @@ impl ContainerValues { )); } summary + }; + self.expand_dirty_id_closure(&mut summary); + summary + } + + /// Add ancestor containers to the dirty-id set until it is transitively closed. + /// + /// A rebuild can change a container's semantics in place without changing + /// its id. If that container is itself stored inside another container, + /// the parent container has also changed semantically even though no direct + /// rebuild touched its contents. For example, with + /// `(p (vec-of (vec-of (w (b)))))` and `(rewrite (w x) x)`, the inner + /// `Vec` rebuilds in place to `vec-of (b)`. Without this closure, only the + /// inner `Vec` id is dirty; the outer `Vec` row is not retimestamped, so a + /// later rule like `(rewrite (p (vec-of (vec-of (b)))) (b))` can miss the + /// newly matchable parent row. + fn expand_dirty_id_closure(&self, summary: &mut ContainerRebuildSummary) { + let mut seen = summary.dirty_ids.clone(); + let mut frontier = seen.clone(); + + while !frontier.is_empty() { + let mut next = IndexSet::default(); + for (_, env) in self.data.iter() { + env.extend_containers_containing(&frontier, &mut next); + } + frontier.clear(); + for value in next { + if seen.insert(value) { + summary.note_dirty_id(value); + frontier.insert(value); + } + } } } @@ -252,6 +286,12 @@ pub trait DynamicContainerEnv: Any + dyn_clone::DynClone + Send + Sync { subset: Option, exec_state: &mut ExecutionState, ) -> ContainerRebuildSummary; + /// Add ids for containers in this environment that contain any `values`. + /// + /// This uses the container content index populated from + /// [`ContainerValue::iter`] and lets callers climb from dirty child ids to + /// all directly containing parent container ids. + fn extend_containers_containing(&self, values: &IndexSet, out: &mut IndexSet); } // Implements `Clone` for `Box`. @@ -302,6 +342,14 @@ impl DynamicContainerEnv for ContainerEnv { } self.apply_rebuild_nonincremental(rebuilder, exec_state) } + + fn extend_containers_containing(&self, values: &IndexSet, out: &mut IndexSet) { + for value in values { + if let Some(containers) = self.val_index.get(value) { + out.extend(containers.iter().copied()); + } + } + } } impl ContainerEnv { diff --git a/tests/container-fail.egg b/tests/container-fail.egg index f6379b89a..be9505467 100644 --- a/tests/container-fail.egg +++ b/tests/container-fail.egg @@ -20,3 +20,25 @@ (let $resolved (l (vec-of (k (b))))) (run-schedule (saturate (run))) (check (= $resolved (b))) + +;; Regression for nested container dirty propagation. The first case starts +;; with `p(vec-of(vec-of(w(b))))`. Rebuilding `w(b)` to `b` dirties the inner +;; Vec, but the outer Vec still contains the same inner Vec id. Without +;; transitive dirty-id closure, the `p(outer)` row is not revisited, so the +;; rewrite below misses the newly matchable `p(vec-of(vec-of(b)))`. +(sort VVE (Vec VE)) +(constructor p (VVE) E) + +(rewrite (p (vec-of (vec-of (b)))) (b)) + +(push) +(let $nested (p (vec-of (vec-of (w (b)))))) +(run-schedule (saturate (run))) +(check (= $nested (b))) +(pop) + +;; Control: the rewrite itself works when the outer Vec is inserted in its +;; already-rebuilt shape. +(let $nested-resolved (p (vec-of (vec-of (b))))) +(run-schedule (saturate (run))) +(check (= $nested-resolved (b))) diff --git a/tests/snapshots/files__shared_snapshot_container_fail.snap b/tests/snapshots/files__shared_snapshot_container_fail.snap index 10e25a614..108601dba 100644 --- a/tests/snapshots/files__shared_snapshot_container_fail.snap +++ b/tests/snapshots/files__shared_snapshot_container_fail.snap @@ -5,4 +5,5 @@ expression: snapshot_content_across_treatments ((b 1) (k 1) (l 1) + (p 1) (w 0)) From edbd6f30e0d1bfd7d541343377a2db30f308f313 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Tue, 26 May 2026 14:25:02 -0700 Subject: [PATCH 2/3] Split nested container dirty propagation test --- tests/container-fail.egg | 22 ------------------- tests/nested-container-dirty-propagation.egg | 19 ++++++++++++++++ .../files__proof_unsupported_files.snap | 1 + ...files__shared_snapshot_container_fail.snap | 1 - ...ot_nested_container_dirty_propagation.snap | 7 ++++++ 5 files changed, 27 insertions(+), 23 deletions(-) create mode 100644 tests/nested-container-dirty-propagation.egg create mode 100644 tests/snapshots/files__shared_snapshot_nested_container_dirty_propagation.snap diff --git a/tests/container-fail.egg b/tests/container-fail.egg index be9505467..f6379b89a 100644 --- a/tests/container-fail.egg +++ b/tests/container-fail.egg @@ -20,25 +20,3 @@ (let $resolved (l (vec-of (k (b))))) (run-schedule (saturate (run))) (check (= $resolved (b))) - -;; Regression for nested container dirty propagation. The first case starts -;; with `p(vec-of(vec-of(w(b))))`. Rebuilding `w(b)` to `b` dirties the inner -;; Vec, but the outer Vec still contains the same inner Vec id. Without -;; transitive dirty-id closure, the `p(outer)` row is not revisited, so the -;; rewrite below misses the newly matchable `p(vec-of(vec-of(b)))`. -(sort VVE (Vec VE)) -(constructor p (VVE) E) - -(rewrite (p (vec-of (vec-of (b)))) (b)) - -(push) -(let $nested (p (vec-of (vec-of (w (b)))))) -(run-schedule (saturate (run))) -(check (= $nested (b))) -(pop) - -;; Control: the rewrite itself works when the outer Vec is inserted in its -;; already-rebuilt shape. -(let $nested-resolved (p (vec-of (vec-of (b))))) -(run-schedule (saturate (run))) -(check (= $nested-resolved (b))) diff --git a/tests/nested-container-dirty-propagation.egg b/tests/nested-container-dirty-propagation.egg new file mode 100644 index 000000000..9a7da8f82 --- /dev/null +++ b/tests/nested-container-dirty-propagation.egg @@ -0,0 +1,19 @@ +;; Regression for PR #888, which extends the #831/#832 container refresh fix to +;; nested containers. Rebuilding `w(b)` to `b` dirties the inner Vec, but the +;; outer Vec still contains the same inner Vec id. Without transitive dirty-id +;; closure, the `p(outer)` row is not revisited, so the rewrite below misses the +;; newly matchable `p(vec-of(vec-of(b)))`. +(sort E) +(sort VE (Vec E)) +(sort VVE (Vec VE)) + +(constructor b () E) +(constructor w (E) E) +(constructor p (VVE) E) + +(rewrite (w x) x) +(rewrite (p (vec-of (vec-of (b)))) (b)) + +(let $nested (p (vec-of (vec-of (w (b)))))) +(run-schedule (saturate (run))) +(check (= $nested (b))) diff --git a/tests/snapshots/files__proof_unsupported_files.snap b/tests/snapshots/files__proof_unsupported_files.snap index 71e826188..d4fc6e54f 100644 --- a/tests/snapshots/files__proof_unsupported_files.snap +++ b/tests/snapshots/files__proof_unsupported_files.snap @@ -37,6 +37,7 @@ map.egg math.egg merge_read.egg multiset.egg +nested-container-dirty-propagation.egg pair.egg prims.egg print-function.egg diff --git a/tests/snapshots/files__shared_snapshot_container_fail.snap b/tests/snapshots/files__shared_snapshot_container_fail.snap index 108601dba..10e25a614 100644 --- a/tests/snapshots/files__shared_snapshot_container_fail.snap +++ b/tests/snapshots/files__shared_snapshot_container_fail.snap @@ -5,5 +5,4 @@ expression: snapshot_content_across_treatments ((b 1) (k 1) (l 1) - (p 1) (w 0)) diff --git a/tests/snapshots/files__shared_snapshot_nested_container_dirty_propagation.snap b/tests/snapshots/files__shared_snapshot_nested_container_dirty_propagation.snap new file mode 100644 index 000000000..b9793ed2d --- /dev/null +++ b/tests/snapshots/files__shared_snapshot_nested_container_dirty_propagation.snap @@ -0,0 +1,7 @@ +--- +source: tests/files.rs +expression: snapshot_content_across_treatments +--- +((b 1) + (p 1) + (w 1)) From 194d3ef96797a7605071e52a466af921b227f7cf Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Tue, 26 May 2026 14:30:22 -0700 Subject: [PATCH 3/3] Avoid redundant dirty-id closure clone --- core-relations/src/containers/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core-relations/src/containers/mod.rs b/core-relations/src/containers/mod.rs index 497e20cf0..64a78d24b 100644 --- a/core-relations/src/containers/mod.rs +++ b/core-relations/src/containers/mod.rs @@ -221,8 +221,8 @@ impl ContainerValues { /// later rule like `(rewrite (p (vec-of (vec-of (b)))) (b))` can miss the /// newly matchable parent row. fn expand_dirty_id_closure(&self, summary: &mut ContainerRebuildSummary) { - let mut seen = summary.dirty_ids.clone(); - let mut frontier = seen.clone(); + let mut frontier = summary.dirty_ids.clone(); + let mut seen = frontier.iter().copied().collect::>(); while !frontier.is_empty() { let mut next = IndexSet::default();