@@ -67,50 +67,65 @@ def _render_pdf_pages(
6767 last_page : Optional [int ] = None ,
6868 password : Optional [str ] = None ,
6969) -> Union [List [Image .Image ], List [str ]]:
70- """
71- Centralized function to render PDF pages using pypdfium.
72- """
7370 if path_only and not output_folder :
7471 raise ValueError ("output_folder must be specified if path_only is True" )
7572 exactly_one (filename = filename , file = file )
73+
74+ if dpi is None :
75+ dpi = env_config .PDF_RENDER_DPI
76+ scale = dpi / 72.0
77+
78+ if output_folder :
79+ assert Path (output_folder ).exists ()
80+ assert Path (output_folder ).is_dir ()
81+
7682 with _pdfium_lock :
7783 pdf = pdfium .PdfDocument (filename or file , password = password )
78- try :
79- images : dict [int , Image .Image ] = {}
80- if dpi is None :
81- dpi = env_config .PDF_RENDER_DPI
82- scale = dpi / 72.0
83- for i , page in enumerate (pdf , start = 1 ):
84- if first_page is not None and i < first_page :
85- continue
86- if last_page is not None and i > last_page :
87- break
88- bitmap = page .render (
89- scale = scale ,
90- no_smoothtext = False ,
91- no_smoothimage = False ,
92- no_smoothpath = False ,
93- optimize_mode = "print" ,
94- )
84+ n_pages = len (pdf )
85+
86+ try :
87+ images : dict [int , Image .Image ] = {}
88+ filenames : list [str ] = []
89+ for i in range (n_pages ):
90+ page_num = i + 1
91+ if first_page is not None and page_num < first_page :
92+ continue
93+ if last_page is not None and page_num > last_page :
94+ break
95+
96+ with _pdfium_lock :
97+ page = pdf [i ]
9598 try :
96- images [i ] = bitmap .to_pil ()
99+ bitmap = page .render (
100+ scale = scale ,
101+ no_smoothtext = False ,
102+ no_smoothimage = False ,
103+ no_smoothpath = False ,
104+ optimize_mode = "print" ,
105+ )
106+ try :
107+ pil_image = bitmap .to_pil ()
108+ finally :
109+ bitmap .close ()
97110 finally :
98- bitmap .close ()
99- if not output_folder :
100- return list (images .values ())
111+ page .close ()
112+
113+ if output_folder :
114+ fn : str = os .path .join (str (output_folder ), f"page_{ page_num } .png" )
115+ pil_image .save (fn , format = "PNG" , compress_level = 1 , optimize = False )
116+ filenames .append (fn )
117+ if not path_only :
118+ images [page_num ] = pil_image
101119 else :
102- # Save images to output_folder
103- filenames : list [str ] = []
104- assert Path (output_folder ).exists ()
105- assert Path (output_folder ).is_dir ()
106- for i , image in images .items ():
107- fn : str = os .path .join (str (output_folder ), f"page_{ i } .png" )
108- image .save (fn , format = "PNG" , compress_level = 1 , optimize = False )
109- filenames .append (fn )
110- return filenames if path_only else list (images .values ())
111- finally :
120+ images [page_num ] = pil_image
121+ finally :
122+ with _pdfium_lock :
112123 pdf .close ()
113124
125+ if path_only :
126+ return filenames
127+ return list (images .values ())
128+
114129
115130def convert_pdf_to_image (
116131 filename : str ,
0 commit comments