Skip to content

Commit 00bdc82

Browse files
fix: restrict prune() to non-cascade Diagrams
prune() removes tables with zero matching rows from the diagram. For cascade (delete), this is unsafe: between cascade computation and the actual DELETE, concurrent inserts could add rows to a pruned table, causing FK errors during delete. Zero-count tables in the cascade are harmless — delete_quick() on an empty result is a no-op. prune() now raises DataJointError on cascade-produced Diagrams. It remains valid for restrict() (export subsetting) and unrestricted Diagrams (showing populated tables). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 384f026 commit 00bdc82

File tree

2 files changed

+17
-19
lines changed

2 files changed

+17
-19
lines changed

src/datajoint/diagram.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -648,8 +648,10 @@ def prune(self):
648648
Remove tables with zero matching rows from the diagram.
649649
650650
Without prior restrictions, removes physically empty tables.
651-
With restrictions (``cascade()`` or ``restrict()``), removes
652-
tables where the restricted query yields zero rows.
651+
After ``restrict()``, removes tables where the restricted query
652+
yields zero rows. Cannot be used on a cascade Diagram (cascade
653+
is for delete, where zero-count tables must remain in the graph
654+
to handle concurrent inserts safely).
653655
654656
Returns
655657
-------
@@ -658,16 +660,20 @@ def prune(self):
658660
"""
659661
from .table import FreeTable
660662

663+
if self._cascade_restrictions:
664+
raise DataJointError(
665+
"prune() cannot be used on a Diagram produced by Diagram.cascade(). "
666+
"Cascade diagrams must retain all descendant tables for safe deletion."
667+
)
668+
661669
result = Diagram(self)
662-
restrictions = result._cascade_restrictions or result._restrict_conditions
663670

664-
if restrictions:
665-
# Restricted: check row counts under restriction
666-
for node in list(restrictions):
671+
if result._restrict_conditions:
672+
for node in list(result._restrict_conditions):
667673
if node.isdigit():
668674
continue
669675
if len(result._restricted_table(node)) == 0:
670-
restrictions.pop(node)
676+
result._restrict_conditions.pop(node)
671677
result._restriction_attrs.pop(node, None)
672678
result.nodes_to_show.discard(node)
673679
else:

tests/integration/test_erd.py

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -124,19 +124,11 @@ def test_prune_after_restrict(schema_simp_pop):
124124
assert table not in pruned._restrict_conditions, f"{table} had 0 rows but was not pruned"
125125

126126

127-
def test_prune_after_cascade(schema_simp_pop):
128-
"""Prune after cascade removes tables with zero matching rows."""
127+
def test_prune_raises_on_cascade(schema_simp_pop):
128+
"""prune() raises on a cascade Diagram — cascade must retain all tables for safe deletion."""
129129
cascaded = dj.Diagram.cascade(A & "id_a=0")
130-
counts = cascaded.counts()
131-
132-
pruned = cascaded.prune()
133-
pruned_counts = pruned.counts()
134-
135-
assert all(c > 0 for c in pruned_counts.values())
136-
137-
for table, count in counts.items():
138-
if count == 0:
139-
assert table not in pruned._cascade_restrictions, f"{table} had 0 rows but was not pruned"
130+
with _pytest.raises(dj.DataJointError, match="prune.*cannot be used.*cascade"):
131+
cascaded.prune()
140132

141133

142134
def test_prune_idempotent(schema_simp_pop):

0 commit comments

Comments
 (0)