@@ -235,3 +235,116 @@ def get_stat():
235235 # we don't actually check.
236236 #
237237 print (f'Not checking results because non-Linux behaviour is too variable.' )
238+
239+
240+ def _test_4751 ():
241+ import gc
242+ import tracemalloc
243+
244+ def analysis (stream_data , do_iter = True ):
245+ pdf_info = pymupdf .Document (stream = stream_data , filetype = 'pdf' )
246+ tmp_list = range (len (pdf_info ))
247+ for page_num in tmp_list :
248+ page = pdf_info [page_num ]
249+ raw_info = page .get_text ('rawdict' )['blocks' ]
250+ page_widgets_list = page .widgets ()
251+ if do_iter :
252+ for widget_info in page_widgets_list :
253+ print (widget_info )
254+ del page_widgets_list
255+ pdf_info .close ()
256+ pdf_info = None
257+ pymupdf .TOOLS .store_shrink (100 )
258+
259+ file_path = os .path .normpath (f'{ __file__ } /../../tests/resources/test_4751.pdf' )
260+
261+ def log (text ):
262+ print (text , flush = 1 )
263+
264+ # We filter out all allocations where leaf-most frame is in tracemalloc
265+ # itself, or in test_memory.py itself, because these are not relevant
266+ # to finding leaks in pymupdf.
267+ #
268+ tm_filters = [
269+ tracemalloc .Filter (inclusive = False , filename_pattern = tracemalloc .__file__ , all_frames = True ),
270+ tracemalloc .Filter (inclusive = False , filename_pattern = __file__ ),
271+ ]
272+
273+ def get_snapshot ():
274+ '''
275+ Wrapper for tracemalloc.take_snapshot() that filters out blocks with
276+ backtraces that we are not interested in.
277+ '''
278+ ret = tracemalloc .take_snapshot ()
279+ ret2 = ret .filter_traces (tm_filters )
280+ #log(f' {len(ret.traces)=} => {len(ret2.traces)=}')
281+ return ret2
282+
283+ # Check that `analysis()` does not leak.
284+ #
285+ num_leaks = 0
286+ with open (file_path ,'rb' ) as f :
287+ bytes_data = f .read ()
288+
289+ tracemalloc .start (30 )
290+ snapshot_prev = get_snapshot ()
291+
292+ for it in range (2 ):
293+ log ('' )
294+ log (f'{ it = } ' )
295+
296+ current , peak = tracemalloc .get_traced_memory ()
297+ log (f' { current = } { peak = } ' )
298+
299+ analysis (bytes_data )
300+ gc .collect ()
301+ snapshot = get_snapshot ()
302+
303+ top_stats = snapshot .compare_to (snapshot_prev , 'traceback' )
304+ snapshot_prev = snapshot
305+
306+ top_stats = sorted (top_stats , key = lambda x : - x .size_diff )
307+ for block_num , stat in enumerate (top_stats [0 :10 ]):
308+ if stat .size_diff > 0 :
309+ log (f' Leak detected' )
310+ log (f' { block_num = } { stat .size_diff = } : { stat } ' )
311+ bt = ''
312+ for frame in stat .traceback :
313+ bt += f' { frame .filename } :{ frame .lineno } \n '
314+ log (bt )
315+ # We ignore extra allocations in the first iteration.
316+ if it != 0 :
317+ num_leaks += 1
318+
319+ assert not num_leaks , f'{ num_leaks = } '
320+
321+
322+ def test_4751 ():
323+ # We run the actual test in a child process, because otherwise previous
324+ # tests seem to effect the leak detection causing false positives. It's
325+ # possible that these could be real leaks, but they are not the ones
326+ # we are testing for here.
327+ #
328+ if os .path .basename (__file__ ).startswith (f'test_fitz_' ):
329+ # Don't test the `fitz` alias, because we assume our leafname.
330+ print (f'test_4751(): Not testing with fitz alias.' )
331+ return
332+
333+ if os .environ .get ('PYODIDE_ROOT' ):
334+ print ('test_4751(): not running on Pyodide - cannot run child processes.' )
335+ return
336+
337+ python_version = [int (i ) for i in platform .python_version_tuple ()[:2 ]]
338+ python_version_tuple = tuple (python_version )
339+ if python_version_tuple < (3 , 13 ):
340+ # We see additional leaks with python-3.12.
341+ print (f'test_4751(): not running because known to fail on python < 3.13: { platform .python_version_tuple ()= } ' )
342+ return
343+
344+ import subprocess
345+ env_extra = dict (PYTHONPATH = os .path .abspath (f'{ __file__ } /..' ))
346+ command = f'{ sys .executable } -c "import test_memory; test_memory._test_4751()"'
347+ print ('' , flush = 1 )
348+ print (f'test_4751(): Running: { command !r} ' , flush = 1 )
349+ print (f'test_4751(): With: { env_extra = } ' , flush = 1 )
350+ subprocess .run (command , shell = 1 , check = 1 , env = os .environ | env_extra )
0 commit comments