@@ -591,6 +591,271 @@ def test_exposed_pdf_image_dpi(pdf_image_dpi, expected, monkeypatch):
591591 assert mock_from_image .call_args [0 ][0 ].height == expected
592592
593593
594+ def test_convert_pdf_to_image_no_output_folder ():
595+ result = layout .convert_pdf_to_image (filename = "sample-docs/loremipsum.pdf" , dpi = 72 )
596+ assert len (result ) == 1
597+ assert isinstance (result [0 ], Image .Image )
598+
599+
600+ def test_convert_pdf_to_image_output_folder_returns_images (tmp_path ):
601+ result = layout .convert_pdf_to_image (
602+ filename = "sample-docs/loremipsum.pdf" ,
603+ dpi = 72 ,
604+ output_folder = tmp_path ,
605+ path_only = False ,
606+ )
607+ assert len (result ) == 1
608+ assert isinstance (result [0 ], Image .Image )
609+ saved = list (tmp_path .glob ("*.png" ))
610+ assert len (saved ) == 1
611+
612+
613+ def test_convert_pdf_to_image_path_only (tmp_path ):
614+ result = layout .convert_pdf_to_image (
615+ filename = "sample-docs/loremipsum.pdf" ,
616+ dpi = 72 ,
617+ output_folder = tmp_path ,
618+ path_only = True ,
619+ )
620+ assert len (result ) == 1
621+ assert all (isinstance (p , str ) for p in result )
622+ for p in result :
623+ assert os .path .exists (p )
624+ assert p .endswith (".png" )
625+ saved = sorted (tmp_path .glob ("*.png" ))
626+ assert [str (s ) for s in saved ] == sorted (result )
627+
628+
629+ def test_convert_pdf_to_image_save_not_under_pdfium_lock (tmp_path ):
630+ """Verify that PIL save (disk I/O) is NOT performed while holding _pdfium_lock."""
631+ original_save = Image .Image .save
632+ lock_held_during_save = []
633+
634+ def spy_save (self , * args , ** kwargs ):
635+ lock_held_during_save .append (layout ._pdfium_lock .locked ())
636+ return original_save (self , * args , ** kwargs )
637+
638+ with patch .object (Image .Image , "save" , spy_save ):
639+ layout .convert_pdf_to_image (
640+ filename = "sample-docs/loremipsum.pdf" ,
641+ dpi = 72 ,
642+ output_folder = tmp_path ,
643+ path_only = True ,
644+ )
645+ assert lock_held_during_save , "save was never called"
646+ assert not any (lock_held_during_save ), "pil_image.save() was called while _pdfium_lock was held"
647+
648+
649+ def test_convert_pdf_to_image_concurrent_saves_not_serialized (tmp_path ):
650+ """Two concurrent callers must be able to overlap their disk writes.
651+
652+ Uses a threading.Barrier to verify both threads are inside save()
653+ simultaneously. If saves are serialized under _pdfium_lock, the second
654+ thread can never reach save() while the first is there, so the barrier
655+ times out and the test fails.
656+ """
657+ import threading
658+
659+ original_save = Image .Image .save
660+ barrier = threading .Barrier (2 , timeout = 5 )
661+ overlap_detected = threading .Event ()
662+
663+ def barrier_save (self , * args , ** kwargs ):
664+ try :
665+ barrier .wait ()
666+ overlap_detected .set ()
667+ except threading .BrokenBarrierError :
668+ pass
669+ return original_save (self , * args , ** kwargs )
670+
671+ errors : list [str ] = []
672+
673+ def run (folder ):
674+ try :
675+ layout .convert_pdf_to_image (
676+ filename = "sample-docs/loremipsum.pdf" ,
677+ dpi = 72 ,
678+ output_folder = folder ,
679+ path_only = True ,
680+ )
681+ except Exception as exc :
682+ errors .append (str (exc ))
683+
684+ dir_a = tmp_path / "a"
685+ dir_b = tmp_path / "b"
686+ dir_a .mkdir ()
687+ dir_b .mkdir ()
688+
689+ with patch .object (Image .Image , "save" , barrier_save ):
690+ t1 = threading .Thread (target = run , args = (dir_a ,))
691+ t2 = threading .Thread (target = run , args = (dir_b ,))
692+ t1 .start ()
693+ t2 .start ()
694+ t1 .join (timeout = 10 )
695+ t2 .join (timeout = 10 )
696+
697+ assert not errors , f"threads raised: { errors } "
698+ assert overlap_detected .is_set (), (
699+ "saves were serialized under _pdfium_lock — threads could not overlap"
700+ )
701+ assert list (dir_a .glob ("*.png" )), "thread A produced no output"
702+ assert list (dir_b .glob ("*.png" )), "thread B produced no output"
703+
704+
705+ def test_render_can_proceed_while_other_thread_saves (tmp_path ):
706+ """Thread B can acquire _pdfium_lock and render while thread A is in save().
707+
708+ Blocks thread A inside save() (outside the lock), then starts thread B.
709+ If B completes entirely while A is still blocked, the lock was not held
710+ during save — rendering and saving can overlap across callers.
711+ """
712+ import threading
713+
714+ original_save = Image .Image .save
715+ a_in_save = threading .Event ()
716+ b_done = threading .Event ()
717+
718+ dir_a = tmp_path / "a"
719+ dir_b = tmp_path / "b"
720+ dir_a .mkdir ()
721+ dir_b .mkdir ()
722+
723+ def gated_save (self , * args , ** kwargs ):
724+ fp = str (args [0 ]) if args else ""
725+ if str (dir_a ) in fp :
726+ a_in_save .set ()
727+ b_done .wait (timeout = 5 )
728+ return original_save (self , * args , ** kwargs )
729+
730+ errors : list [str ] = []
731+
732+ def run (folder , done_event = None ):
733+ try :
734+ layout .convert_pdf_to_image (
735+ filename = "sample-docs/loremipsum.pdf" ,
736+ dpi = 72 ,
737+ output_folder = folder ,
738+ path_only = True ,
739+ )
740+ except Exception as exc :
741+ errors .append (str (exc ))
742+ finally :
743+ if done_event :
744+ done_event .set ()
745+
746+ with patch .object (Image .Image , "save" , gated_save ):
747+ t_a = threading .Thread (target = run , args = (dir_a ,))
748+ t_b = threading .Thread (target = run , args = (dir_b , b_done ))
749+ t_a .start ()
750+ a_in_save .wait (timeout = 5 )
751+ # A is now blocked in save (outside lock). B should render + save freely.
752+ t_b .start ()
753+ t_b .join (timeout = 10 )
754+ t_a .join (timeout = 10 )
755+
756+ assert not errors , f"threads raised: { errors } "
757+ assert b_done .is_set (), "Thread B could not complete while A was saving"
758+ assert list (dir_a .glob ("*.png" )), "thread A produced no output"
759+ assert list (dir_b .glob ("*.png" )), "thread B produced no output"
760+
761+
762+ def test_multi_page_concurrent_output_complete (tmp_path ):
763+ """Two threads processing a multi-page PDF both produce correct, complete output."""
764+ import threading
765+
766+ errors : list [str ] = []
767+
768+ def run (folder ):
769+ try :
770+ layout .convert_pdf_to_image (
771+ filename = "sample-docs/loremipsum_multipage.pdf" ,
772+ dpi = 72 ,
773+ output_folder = folder ,
774+ path_only = True ,
775+ )
776+ except Exception as exc :
777+ errors .append (str (exc ))
778+
779+ dir_a = tmp_path / "a"
780+ dir_b = tmp_path / "b"
781+ dir_a .mkdir ()
782+ dir_b .mkdir ()
783+
784+ t1 = threading .Thread (target = run , args = (dir_a ,))
785+ t2 = threading .Thread (target = run , args = (dir_b ,))
786+ t1 .start ()
787+ t2 .start ()
788+ t1 .join (timeout = 60 )
789+ t2 .join (timeout = 60 )
790+
791+ assert not errors , f"threads raised: { errors } "
792+ a_files = sorted (dir_a .glob ("*.png" ))
793+ b_files = sorted (dir_b .glob ("*.png" ))
794+ assert len (a_files ) == 10 , f"thread A produced { len (a_files )} files, expected 10"
795+ assert len (b_files ) == 10 , f"thread B produced { len (b_files )} files, expected 10"
796+ for i in range (1 , 11 ):
797+ assert (dir_a / f"page_{ i } .png" ).exists (), f"thread A missing page_{ i } .png"
798+ assert (dir_b / f"page_{ i } .png" ).exists (), f"thread B missing page_{ i } .png"
799+
800+
801+ def test_error_in_one_thread_does_not_block_other (tmp_path ):
802+ """If one thread fails mid-processing, the other still completes."""
803+ import threading
804+
805+ original_save = Image .Image .save
806+
807+ dir_a = tmp_path / "a"
808+ dir_b = tmp_path / "b"
809+ dir_a .mkdir ()
810+ dir_b .mkdir ()
811+
812+ def failing_save (self , * args , ** kwargs ):
813+ fp = str (args [0 ]) if args else ""
814+ if str (dir_a ) in fp :
815+ raise OSError ("simulated disk failure" )
816+ return original_save (self , * args , ** kwargs )
817+
818+ a_error : list [Exception ] = []
819+ b_result : list [str ] = []
820+ b_error : list [Exception ] = []
821+
822+ def run_a ():
823+ try :
824+ layout .convert_pdf_to_image (
825+ filename = "sample-docs/loremipsum.pdf" ,
826+ dpi = 72 ,
827+ output_folder = dir_a ,
828+ path_only = True ,
829+ )
830+ except Exception as exc :
831+ a_error .append (exc )
832+
833+ def run_b ():
834+ try :
835+ result = layout .convert_pdf_to_image (
836+ filename = "sample-docs/loremipsum.pdf" ,
837+ dpi = 72 ,
838+ output_folder = dir_b ,
839+ path_only = True ,
840+ )
841+ b_result .extend (result )
842+ except Exception as exc :
843+ b_error .append (exc )
844+
845+ with patch .object (Image .Image , "save" , failing_save ):
846+ t_a = threading .Thread (target = run_a )
847+ t_b = threading .Thread (target = run_b )
848+ t_a .start ()
849+ t_b .start ()
850+ t_a .join (timeout = 10 )
851+ t_b .join (timeout = 10 )
852+
853+ assert a_error , "Thread A should have failed"
854+ assert not b_error , f"Thread B should have succeeded: { b_error } "
855+ assert b_result , "Thread B produced no result"
856+ assert list (dir_b .glob ("*.png" )), "Thread B produced no output files"
857+
858+
594859@pytest .mark .parametrize (
595860 ("filename" , "img_num" , "should_complete" ),
596861 [
0 commit comments