1818use crate :: memory_pool:: reclaimer:: reclaimer_state;
1919use crate :: memory_pool:: {
2020 MemoryConsumer , MemoryLimit , MemoryPool , MemoryReclaimer , MemoryReservation ,
21- human_readable_size,
21+ ReclaimerHandle , human_readable_size,
2222} ;
2323use datafusion_common:: HashMap ;
2424use datafusion_common:: { DataFusionError , Result , resources_datafusion_err} ;
@@ -343,13 +343,13 @@ struct TrackedConsumer {
343343 reserved : AtomicUsize ,
344344 peak : AtomicUsize ,
345345 reclaimer : Option < Arc < dyn MemoryReclaimer > > ,
346- /// Tri-state eligibility flag for [`reclaimer`], encoded per
347- /// [`reclaimer_state`]. The pool flips ` AVAILABLE` ↔ `IN_FLIGHT`
348- /// for dedup; the reclaimer's owner may sticky-set `DISABLED` once
349- /// it can no longer free memory. Shared `Arc` so the reclaimer
350- /// side and the pool see the same cell. `None` reclaimer ⇒ flag
351- /// is unused but still allocated .
352- reclaimer_state : Arc < AtomicU8 > ,
346+ /// Eligibility handle for [`reclaimer`]. The pool flips
347+ /// ` AVAILABLE` ↔ `IN_FLIGHT` on its internal cell for dedup; the
348+ /// reclaimer's owner sticky-sets `DISABLED` via [`ReclaimerHandle`].
349+ /// For consumers without a reclaimer the pool still allocates a
350+ /// fresh handle so the `self_guard` path in `try_grow_async` is
351+ /// uniform (cheap: one `Arc<AtomicU8>`) .
352+ handle : ReclaimerHandle ,
353353}
354354
355355/// RAII guard for the [`IN_FLIGHT`] slot of a [`TrackedConsumer`]'s
@@ -628,14 +628,15 @@ impl<I: MemoryPool> MemoryPool for TrackConsumersPool<I> {
628628 self . inner . register ( consumer) ;
629629
630630 let reclaimer = consumer. reclaimer ( ) . cloned ( ) ;
631- // Reuse the reclaimer's own flag when it provides one — that
632- // way the reclaimer side can sticky-set `DISABLED` and the
633- // pool sees it on the next filter pass. Otherwise allocate a
634- // fresh `AVAILABLE` flag for in-flight dedup only.
635- let state = reclaimer
636- . as_ref ( )
637- . and_then ( |r| r. reclaimer_state ( ) )
638- . unwrap_or_else ( || Arc :: new ( AtomicU8 :: new ( reclaimer_state:: AVAILABLE ) ) ) ;
631+ // Use the operator-supplied handle when one is attached, so a
632+ // sticky `DISABLED` flip on the operator side is visible to
633+ // the pool on the next filter pass. For consumers without a
634+ // reclaimer, allocate a private handle so the in-flight guard
635+ // path stays uniform.
636+ let handle = consumer
637+ . reclaimer_handle ( )
638+ . cloned ( )
639+ . unwrap_or_else ( ReclaimerHandle :: new) ;
639640
640641 let mut guard = self . tracked_consumers . write ( ) ;
641642 let existing = guard. insert (
@@ -646,7 +647,7 @@ impl<I: MemoryPool> MemoryPool for TrackConsumersPool<I> {
646647 reserved : Default :: default ( ) ,
647648 peak : Default :: default ( ) ,
648649 reclaimer,
649- reclaimer_state : state ,
650+ handle ,
650651 } ,
651652 ) ;
652653
@@ -738,23 +739,36 @@ impl<I: MemoryPool> MemoryPool for TrackConsumersPool<I> {
738739 . tracked_consumers
739740 . read ( )
740741 . get ( & requestor_id)
741- . and_then ( |tc| ReclaimerStateGuard :: try_acquire ( & tc. reclaimer_state ) ) ;
742+ . and_then ( |tc| ReclaimerStateGuard :: try_acquire ( tc. handle . state ( ) ) ) ;
742743
743744 let mut retries: usize = 0 ;
744745 loop {
745- // Snapshot reclaimers. Only consumers strictly larger than
746- // the requestor are eligible: smaller-or-equal siblings would
747- // free less than the requestor itself can, so the requestor
748- // should self-spill instead. This rule also breaks the
749- // mutual-reclaim cycle (A targets B while B targets A) — at
750- // most one side of any pair can hold strictly more memory,
751- // so the other side has no candidates and surfaces an error
752- // for the caller's self-spill fallback. Filter out anyone
753- // whose `reclaimer_state` flag is not `AVAILABLE` (in-flight or
754- // sticky-disabled). Also count IN_FLIGHT siblings so we know
755- // whether to wait briefly for them to finish before giving up.
756- // Drop the read guard before awaiting any reclaim.
757- let requestor_reserved = {
746+ // Snapshot reclaimers. Eligibility ranks by *reclaimable*
747+ // bytes (what the reclaimer believes it can free now),
748+ // not by total consumer reservation: a sorter whose
749+ // buffer just spilled may still carry a non-zero merge
750+ // reservation, but its reclaimable size is ~0 and
751+ // picking it would burn a round trip for nothing.
752+ // Reclaimers that don't report a precise bound fall
753+ // back to their tracked `reserved` as a conservative
754+ // upper bound.
755+ //
756+ // Only consumers with *strictly more* reclaimable bytes
757+ // than the requestor are eligible: smaller-or-equal
758+ // siblings would free less than the requestor itself
759+ // could, so the requestor should self-spill instead.
760+ // This rule also breaks the mutual-reclaim cycle (A
761+ // targets B while B targets A) — at most one side of
762+ // any pair can hold strictly more, so the other side
763+ // has no candidates and surfaces an error for the
764+ // caller's self-spill fallback.
765+ //
766+ // Filter out anyone whose handle is not `AVAILABLE`
767+ // (in-flight or sticky-disabled). Also count IN_FLIGHT
768+ // siblings so we know whether to wait briefly for them
769+ // to finish before giving up. Drop the read guard
770+ // before awaiting any reclaim.
771+ let requestor_reclaimable = {
758772 let guard = self . tracked_consumers . read ( ) ;
759773 guard
760774 . get ( & requestor_id)
@@ -776,26 +790,27 @@ impl<I: MemoryPool> MemoryPool for TrackConsumersPool<I> {
776790 }
777791 // Track in-flight siblings (any size) so we can
778792 // decide whether a retry has any chance of helping.
779- let state = tc. reclaimer_state . load ( Ordering :: Acquire ) ;
793+ let state = tc. handle . state ( ) . load ( Ordering :: Acquire ) ;
780794 if state == reclaimer_state:: IN_FLIGHT {
781795 in_flight_seen += 1 ;
782796 }
783797 let reclaimer = tc. reclaimer . as_ref ( ) ?;
784- if tc. reserved ( ) <= requestor_reserved {
798+ let reclaimable = tc. reserved ( ) ;
799+ if reclaimable <= requestor_reclaimable {
785800 return None ;
786801 }
787802 if state != reclaimer_state:: AVAILABLE {
788803 return None ;
789804 }
790805 Some ( (
791- tc . reserved ( ) ,
806+ reclaimable ,
792807 Arc :: clone ( reclaimer) ,
793- Arc :: clone ( & tc. reclaimer_state ) ,
808+ Arc :: clone ( tc. handle . state ( ) ) ,
794809 ) )
795810 } )
796811 . collect ( )
797812 } ;
798- // Order: priority desc, then reservation size desc.
813+ // Order: priority desc, then reclaimable bytes desc.
799814 candidates. sort_by ( |( lr, l, _) , ( rr, r, _) | {
800815 r. priority ( ) . cmp ( & l. priority ( ) ) . then_with ( || rr. cmp ( lr) )
801816 } ) ;
0 commit comments