22
33# PyPDFium2 bindings
44from typing import Generator , Tuple
5+ import weakref
56import pypdfium2 as pdfium
67
78from gmft .base import Rect
1718 from gmft .formatters .base import CroppedTable
1819
1920
21+ def _close_document_quietly (doc : "PyPDFium2Document" ):
22+ """Best-effort close used by pickle cleanup finalizers."""
23+ if doc is None :
24+ return
25+ try :
26+ doc .close ()
27+ except Exception :
28+ pass
29+
30+
2031class PyPDFium2Page (BasePage ):
2132 """
2233 Note: This follows PIL's convention of (0, 0) being top left.
@@ -37,6 +48,7 @@ def __init__(
3748 self .width = page .get_width ()
3849 self .height = page .get_height ()
3950 self ._positions_and_text_and_breaks = None
51+ self ._pickle_parent_close_finalizer = None
4052 super ().__init__ (page_no )
4153
4254 def get_positions_and_text (
@@ -186,10 +198,15 @@ def __getstate__(self):
186198 }
187199
188200 def __setstate__ (self , state ):
189- copy = PyPDFium2Utils . load_page_from_dict ( state )
190- # swap state
201+ # copy-and-swap idiom
202+ copy = PyPDFium2Utils . load_page_from_dict ( state ) # this opens a new document!
191203 self .__dict__ , copy .__dict__ = copy .__dict__ , self .__dict__
192- # now copy has the old state, which will be garbage collected
204+
205+ # avoid pypdfium2 memory leak #55
206+ if self .parent is not None :
207+ self ._pickle_parent_close_finalizer = weakref .finalize (
208+ self , _close_document_quietly , self .parent
209+ ) # best-effort close document
193210
194211
195212class PyPDFium2Document (BasePDFDocument ):
0 commit comments