Skip to content

Commit e91c0a2

Browse files
JorjMcKiejulian-smith-artifex-com
authored andcommitted
JM_convert_to_pdf() - also generate links.
1 parent 30f86ec commit e91c0a2

1 file changed

Lines changed: 43 additions & 4 deletions

File tree

src/__init__.py

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3010,7 +3010,7 @@ def __init__(self, filename=None, stream=None, filetype=None, archive=None, rect
30103010
if filetype:
30113011
suffix = filetype
30123012
else:
3013-
suffix = pathlib.Path(filename).suffix
3013+
suffix = pathlib.Path(filename).suffix.strip(".")
30143014
try:
30153015
fz_stream = mupdf.fz_open_file(filename)
30163016
doc = mupdf.fz_open_document_with_stream_and_dir(suffix, fz_stream, archive_parm)
@@ -19105,7 +19105,7 @@ def JM_copy_rectangle(page, area):
1910519105
return s
1910619106

1910719107

19108-
def JM_convert_to_pdf(doc, fp, tp, rotate):
19108+
def JM_convert_to_pdf(doc, fp, tp, rotate) -> bytes:
1910919109
'''
1911019110
Convert any MuPDF document to a PDF
1911119111
Returns bytes object containing the PDF, created via 'write' function.
@@ -19120,7 +19120,8 @@ def JM_convert_to_pdf(doc, fp, tp, rotate):
1912019120
e = fp # ... range
1912119121
rot = JM_norm_rotation(rotate)
1912219122
i = fp
19123-
while 1: # interpret & write document pages as PDF pages
19123+
internal_links = [] # collect PDF-wide internal links here
19124+
while 1: # interpret & write document pages as PDF pages
1912419125
if not _INRANGE(i, s, e):
1912519126
break
1912619127
page = mupdf.fz_load_page(doc, i)
@@ -19131,11 +19132,49 @@ def JM_convert_to_pdf(doc, fp, tp, rotate):
1913119132
dev = None
1913219133
page_obj = mupdf.pdf_add_page(pdfout, mediabox, rot, resources, contents)
1913319134
mupdf.pdf_insert_page(pdfout, -1, page_obj)
19135+
19136+
# also copy links to the output PDF page
19137+
# get the PDF page we've just created
19138+
pdf_page = mupdf.pdf_load_page(pdfout, i)
19139+
19140+
# loop through source page links
19141+
link = mupdf.fz_load_links(page) # load first link
19142+
while link.m_internal: # break loop when link is None
19143+
uri = link.uri() # URI string
19144+
rect = mupdf.FzRect(link.rect()) # link "from" rectangle
19145+
is_external = mupdf.fz_is_external_link(uri)
19146+
19147+
if is_external: # external links can be copied directly
19148+
mupdf.pdf_create_link(pdf_page, rect, uri)
19149+
else: # internal links done when PDF is complete
19150+
# find target of internal link
19151+
ret, xp, yp = mupdf.fz_resolve_link(doc, uri)
19152+
ilink={"page": i, "ret": ret, "from": rect, "h": rect.y1-rect.y0, "w": rect.x1-rect.x0, "xp": xp, "yp": yp}
19153+
internal_links.append(ilink)
19154+
link = link.next()
19155+
1913419156
i += incr
19157+
1913519158
# PDF created - now write it to Python bytearray
19159+
# insert any internal links collected before:
19160+
for ilink in internal_links:
19161+
pdf_page = mupdf.pdf_load_page(pdfout, ilink["page"])
19162+
ret = ilink["ret"]
19163+
dest = mupdf.fz_link_dest()
19164+
dest.type = 7 # XYZ destination format
19165+
dest.loc.chapter = ret.chapter
19166+
dest.loc.page = ret.page
19167+
dest.h = ilink["h"]
19168+
dest.w = ilink["w"]
19169+
dest.x = ilink["xp"]
19170+
dest.y = ilink["yp"]
19171+
dest.zoom = 0
19172+
rect=ilink["from"]
19173+
uri = mupdf.pdf_new_uri_from_explicit_dest(mupdf.FzLinkDest(dest))
19174+
mupdf.pdf_create_link(pdf_page, rect, uri)
1913619175
# prepare write options structure
1913719176
opts = mupdf.PdfWriteOptions()
19138-
opts.do_garbage = 4
19177+
opts.do_garbage = 3
1913919178
opts.do_compress = 1
1914019179
opts.do_compress_images = 1
1914119180
opts.do_compress_fonts = 1

0 commit comments

Comments
 (0)