up

metascroy · metascroy · commit 92c541ad4d83 · 2026-04-08T17:22:56.000-07:00
diff --git a/backends/mlx/builder/slot_manager.py b/backends/mlx/builder/slot_manager.py
@@ -30,18 +30,25 @@ class IdSpace(Enum):
     Temp = auto()
 
 
-@dataclass(frozen=True)
+@dataclass(eq=False, frozen=True)
 class Slot:
+    """Represents an allocated tensor or symbolic int slot.
+
+    Uses identity-based equality and hashing (not field-based) so that
+    two Slots with the same (id_type, id_space, idx) — which can happen
+    when the delete-as-you-go allocator recycles an idx — remain distinct
+    in sets and dicts during build().
+    """
+
     id_type: IdType
     id_space: IdSpace
     idx: Optional[int] = None
-    # Unique allocation ID — ensures Slots with the same (id_type, id_space, idx)
-    # remain distinct in sets/dicts after an idx is freed and reused.
-    # Without this, the delete-as-you-go allocator can free idx=5, then
-    # make_tmp_slot reuses idx=5, and the new Slot equals the old one.
-    # In build()'s _collect_used_slots (a set) and _create_slot_mappings (a dict),
-    # they merge into one entry and get the same global Tid — causing aliasing.
-    alloc_id: Optional[int] = None
+
+    def __eq__(self, other):
+        return self is other
+
+    def __hash__(self):
+        return id(self)
 
 
 class IdManager:
@@ -66,13 +73,6 @@ def __init__(self):
         self.tid_managers: Dict[IdSpace, IdManager] = defaultdict(IdManager)
         self.vid_managers: Dict[IdSpace, IdManager] = defaultdict(IdManager)
         self.name_to_slot: Dict[str, Slot] = {}
-        self._next_alloc_id: int = 0
-
-    def _alloc_id(self) -> int:
-        """Return a globally unique allocation ID."""
-        aid = self._next_alloc_id
-        self._next_alloc_id += 1
-        return aid
 
     def set_slot(self, node_or_name: Union[Node, str], slot: Slot):
         if isinstance(node_or_name, Node):
@@ -124,9 +124,7 @@ def make_constant_slot(self, name: str) -> Slot:
         id_space = IdSpace.Constant
         manager = self.tid_managers[id_space]
         idx = manager.get_id()
-        slot = Slot(
-            id_type=IdType.Tensor, id_space=id_space, idx=idx, alloc_id=self._alloc_id()
-        )
+        slot = Slot(id_type=IdType.Tensor, id_space=id_space, idx=idx)
         self.name_to_slot[name] = slot
         return slot
 
@@ -135,9 +133,7 @@ def make_tmp_slot(self) -> Tuple[str, Slot]:
         id_space = IdSpace.Temp
         manager = self.tid_managers[id_space]
         idx = manager.get_id()
-        slot = Slot(
-            id_type=IdType.Tensor, id_space=id_space, idx=idx, alloc_id=self._alloc_id()
-        )
+        slot = Slot(id_type=IdType.Tensor, id_space=id_space, idx=idx)
         self.name_to_slot[name] = slot
         return name, slot
 
@@ -147,9 +143,7 @@ def make_tmp_value_slot(self) -> Tuple[str, Slot]:
         id_space = IdSpace.Temp
         manager = self.vid_managers[id_space]
         idx = manager.get_id()
-        slot = Slot(
-            id_type=IdType.SymInt, id_space=id_space, idx=idx, alloc_id=self._alloc_id()
-        )
+        slot = Slot(id_type=IdType.SymInt, id_space=id_space, idx=idx)
         self.name_to_slot[name] = slot
         return name, slot
 
@@ -182,14 +176,7 @@ def make_or_get_slots(
             else:
                 manager = self.vid_managers[id_space]
             idx = manager.get_id()
-            slots.append(
-                Slot(
-                    id_type=id_type,
-                    id_space=id_space,
-                    idx=idx,
-                    alloc_id=self._alloc_id(),
-                )
-            )
+            slots.append(Slot(id_type=id_type, id_space=id_space, idx=idx))
         slots = tuple(slots)
 
         # Store in the format that matches the node's output structure
diff --git a/backends/mlx/model_ops/gated_delta_rule.py b/backends/mlx/model_ops/gated_delta_rule.py
@@ -34,11 +34,6 @@
 from torch.fx.node import Node
 
 
-# ---------------------------------------------------------------------------
-# Custom op definition
-# ---------------------------------------------------------------------------
-
-
 @torch.library.custom_op("mlx::gated_delta_rule", mutates_args=("state",))
 def gated_delta_rule(
     q: Tensor,  # [B, T, Hk, Dk]
@@ -96,11 +91,6 @@ def gated_delta_rule_fake(
 
 
 from executorch.backends.mlx.builder.op_helpers import torch_dtype_to_scalar_type
-
-# ---------------------------------------------------------------------------
-# Pattern handler
-# ---------------------------------------------------------------------------
-
 from executorch.backends.mlx.builder.op_registry import PatternHandler, REGISTRY
 from executorch.backends.mlx.builder.program_builder import MLXProgramBuilder
 from executorch.backends.mlx.builder.slot_manager import Slot
@@ -311,12 +301,9 @@ def _emit_metal_kernel(self, P: MLXProgramBuilder, n: Node) -> Slot:
         b_iov = P.to_int_or_vid(b_val)
         t_iov = P.to_int_or_vid(t_val)
 
-        # Output slot for y
-        existing = P.slot_manager.get_slot(self.getitem_0)
-        if existing is not None:
-            out = existing if not isinstance(existing, tuple) else existing[0]
-        else:
-            _, out = P.make_tmp_slot()
+        # Output slot for y — use existing IO slot if getitem_0 is a graph output,
+        # otherwise create a new temp slot.
+        out = P.make_or_get_slot(self.getitem_0)
 
         # Output slot for state_out (carry)
         _, carry = P.make_tmp_slot()
@@ -449,9 +436,6 @@ def _emit_metal_kernel(self, P: MLXProgramBuilder, n: Node) -> Slot:
     def _emit_scan(self, P: MLXProgramBuilder, n: Node) -> Slot:
         """Emit ScanNode decomposition of the gated delta recurrence."""
 
-        # With alloc_id on Slot, slot_map's _mark_read can safely free
-        # and reuse idx values — each allocation remains distinct in
-        # build()'s used_slots set and _create_slot_mappings dict.
         q_slot, k_slot, v_slot, g_slot, beta_slot, state_slot = P.slot_map(
             [
                 self.q_node,
@@ -475,15 +459,7 @@ def _emit_scan(self, P: MLXProgramBuilder, n: Node) -> Slot:
         _, beta_s = P.make_tmp_slot()
 
         # Output slot for the recurrence output.
-        # getitem_0 already has an Output slot from _make_io_slots (it's a
-        # USER_OUTPUT). Use that existing slot so the ScanNode writes directly
-        # into the output slot. Don't call make_or_get_slots on auto_func_node
-        # (deferred body node must not have slots per _verify_build).
-        existing = P.slot_manager.get_slot(self.getitem_0)
-        if existing is not None:
-            out = existing if not isinstance(existing, tuple) else existing[0]
-        else:
-            _, out = P.make_tmp_slot()
+        out = P.make_or_get_slot(self.getitem_0)
 
         # Body temp slots
         _, t0 = P.make_tmp_slot()
@@ -575,10 +551,6 @@ def _emit_scan(self, P: MLXProgramBuilder, n: Node) -> Slot:
         return carry
 
 
-# ---------------------------------------------------------------------------
-# Registration
-# ---------------------------------------------------------------------------
-
 _registered = False