1111from PIL import Image
1212from pypdfium2 import PdfImage , PdfBitmap , PdfDocument
1313
14+ __version__ = '0.2.0'
15+
1416__all__ = [
15- 'merge_pdfs ' , 'remove_pages ' , 'pdf_to_image ' , 'extract_text ' , 'image_to_pdf ' ,
16- 'split_pdf' , 'watermark_pdf'
17+ 'bundle ' , 'merge_pdfs ' , 'remove_pages ' , 'pdf_to_image ' , 'extract_text ' ,
18+ 'image_to_pdf' , ' split_pdf' , 'watermark_pdf'
1719]
1820
1921
22+ def bundle (
23+ input_files : Sequence [str | bytes | Path | os .PathLike [str ] | io .BytesIO ],
24+ output_stream : str | Path | io .BytesIO | io .BufferedWriter
25+ ) -> int :
26+ """Bundle multiple files together.
27+
28+ :param input_files: List of files to bundle together. Each file can be a PDF or an
29+ image. Supported image formats are those supported by Pillow.
30+ :param output_stream: Output stream to write to.
31+ :return: Number of pages in the bundled PDF.
32+ """
33+ writer = PdfDocument .new ()
34+ for input_file in input_files :
35+ log21 .info (f'Adding { input_file } ...' )
36+ if isinstance (input_file , (str , bytes , Path , os .PathLike )):
37+ if str (input_file ).lower ().endswith ('.pdf' ):
38+ reader = PdfDocument (input_file )
39+ writer .import_pages (reader )
40+ else :
41+ image = Image .open (input_file )
42+ bitmap = PdfBitmap .from_pil (image )
43+ pdf_image = PdfImage .new (writer )
44+ pdf_image .set_bitmap (bitmap )
45+ matrix = pdfium .PdfMatrix ().scale (bitmap .width , bitmap .height )
46+ pdf_image .set_matrix (matrix )
47+ page = writer .new_page (bitmap .width , bitmap .height )
48+ page .insert_obj (pdf_image )
49+ page .gen_content ()
50+ page .close ()
51+ pdf_image .close ()
52+ bitmap .close ()
53+ image .close ()
54+ elif isinstance (input_file , io .BytesIO ):
55+ try :
56+ reader = PdfDocument (input_file )
57+ writer .import_pages (reader )
58+ except Exception :
59+ image = Image .open (input_file )
60+ bitmap = PdfBitmap .from_pil (image )
61+ pdf_image = PdfImage .new (writer )
62+ pdf_image .set_bitmap (bitmap )
63+ matrix = pdfium .PdfMatrix ().scale (bitmap .width , bitmap .height )
64+ pdf_image .set_matrix (matrix )
65+ page = writer .new_page (bitmap .width , bitmap .height )
66+ page .insert_obj (pdf_image )
67+ page .gen_content ()
68+ page .close ()
69+ pdf_image .close ()
70+ bitmap .close ()
71+ image .close ()
72+ else :
73+ raise ValueError (f'Unsupported input file type: { type (input_file )} ' )
74+ writer .save (output_stream )
75+ return len (writer )
76+
77+
2078def merge_pdfs (
2179 input_files : Sequence [str | Path | io .TextIOWrapper ],
2280 output_stream : str | Path | io .BytesIO | io .BufferedWriter
@@ -36,6 +94,41 @@ def merge_pdfs(
3694 return len (writer )
3795
3896
97+ def image_to_pdf (
98+ input_files : Sequence [str | bytes | Path | os .PathLike [str ] | io .BytesIO ],
99+ output_stream : str | Path | io .BytesIO | io .BufferedWriter
100+ ) -> int :
101+ """Convert images to a PDF file.
102+
103+ :param input_files: List of images to convert.
104+ :param output_stream: Output stream to write to.
105+ :return: Number of pages in the output PDF
106+ """
107+ writer = PdfDocument .new ()
108+ for input_file in input_files :
109+ log21 .info (f'Adding { input_file } ...' )
110+ # Open the image file
111+ image = Image .open (input_file )
112+ # Create a bitmap from the image
113+ bitmap = PdfBitmap .from_pil (image )
114+ # Create a PdfImage object from the bitmap
115+ pdf_image = PdfImage .new (writer )
116+ pdf_image .set_bitmap (bitmap )
117+ matrix = pdfium .PdfMatrix ().scale (bitmap .width , bitmap .height )
118+ pdf_image .set_matrix (matrix )
119+ # Create a new page and insert the PdfImage object
120+ page = writer .new_page (bitmap .width , bitmap .height )
121+ page .insert_obj (pdf_image )
122+ page .gen_content ()
123+ # Close the objects
124+ page .close ()
125+ pdf_image .close ()
126+ bitmap .close ()
127+ image .close ()
128+ writer .save (output_stream )
129+ return len (writer )
130+
131+
39132def remove_pages (
40133 input_file : str | Path | io .BytesIO | io .TextIOWrapper ,
41134 pages_to_remove : Collection [int ],
@@ -160,41 +253,6 @@ def extract_text(
160253 return text
161254
162255
163- def image_to_pdf (
164- input_files : Sequence [str | bytes | Path | os .PathLike [str ] | io .BytesIO ],
165- output_stream : str | Path | io .BytesIO | io .BufferedWriter
166- ) -> int :
167- """Convert images to a PDF file.
168-
169- :param input_files: List of images to convert.
170- :param output_stream: Output stream to write to.
171- :return: Number of pages in the output PDF
172- """
173- writer = PdfDocument .new ()
174- for input_file in input_files :
175- log21 .info (f'Adding { input_file } ...' )
176- # Open the image file
177- image = Image .open (input_file )
178- # Create a bitmap from the image
179- bitmap = PdfBitmap .from_pil (image )
180- # Create a PdfImage object from the bitmap
181- pdf_image = PdfImage .new (writer )
182- pdf_image .set_bitmap (bitmap )
183- matrix = pdfium .PdfMatrix ().scale (bitmap .width , bitmap .height )
184- pdf_image .set_matrix (matrix )
185- # Create a new page and insert the PdfImage object
186- page = writer .new_page (bitmap .width , bitmap .height )
187- page .insert_obj (pdf_image )
188- page .gen_content ()
189- # Close the objects
190- page .close ()
191- pdf_image .close ()
192- bitmap .close ()
193- image .close ()
194- writer .save (output_stream )
195- return len (writer )
196-
197-
198256def split_pdf (
199257 input_file : str | Path ,
200258 output_directory : str | Path ,
0 commit comments