@@ -462,6 +462,13 @@ def bulk_soft_delete_documents(
462462 ``Corpus.remove_document`` produces, so trashed documents stay in the
463463 trash listing and remain restorable.
464464
465+ Scaling caveat — O(1) in *queries*, not in *memory*: this primitive
466+ still materializes both ``document_ids`` and the locked ``active_paths``
467+ fully into Python lists, so there is **no built-in document-count
468+ ceiling**. That is fine for realistic corpus sizes, but chunk/iterate
469+ the doc set here before exposing an unbounded ``empty_corpus`` / folder
470+ cascade-delete to arbitrarily large corpora (memory follow-up to #1951).
471+
465472 NOTE: This is an internal primitive that performs **no permission
466473 check** — callers (``empty_corpus``, folder cascade-delete) must already
467474 have verified corpus DELETE permission. It does not touch the folder
@@ -505,12 +512,10 @@ def bulk_soft_delete_documents(
505512 # absent here (skipped, never double-trashed) and one added after it
506513 # is left alone — the same semantics as the prior per-document loop.
507514 #
508- # PERF/MEMORY: both ``document_ids`` and ``active_paths`` are fully
509- # materialized in memory, bounded by the in-scope document count —
510- # which the current callers (empty_corpus, folder cascade-delete)
511- # leave unbounded. Fine for realistic corpus sizes; this is the
512- # allocation point to revisit (chunk/iterate the doc set) before
513- # raising any hard document-count ceiling on these paths (#1951).
515+ # PERF/MEMORY: this ``list()`` (plus the caller's ``document_ids``
516+ # snapshot) is the unbounded allocation point behind the docstring
517+ # "Scaling caveat" — chunk/iterate here to add a document-count
518+ # ceiling for the empty_corpus / cascade-delete callers (#1951).
514519 active_paths = list (
515520 DocumentPath .objects .select_for_update (of = ("self" ,))
516521 .select_related ("document" )
@@ -600,10 +605,17 @@ def bulk_soft_delete_documents(
600605 is_public = False
601606 )
602607
603- logger .info (
604- "Bulk soft-deleted %s document(s) in corpus %s by user %s" ,
605- len (trashed_doc_ids ),
606- corpus .id ,
607- user .id ,
608- )
609- return len (trashed_doc_ids )
608+ # Defer the audit log to on_commit so it reports only durably
609+ # committed trashing (never a false "soft-deleted N" on a rolled-back
610+ # block) — the rollback-safety contract the signals above rely on.
611+ trashed_count = len (trashed_doc_ids )
612+ corpus_id , user_id = corpus .id , user .id
613+ transaction .on_commit (
614+ lambda : logger .info (
615+ "Bulk soft-deleted %s document(s) in corpus %s by user %s" ,
616+ trashed_count ,
617+ corpus_id ,
618+ user_id ,
619+ )
620+ )
621+ return trashed_count
0 commit comments