diff --git a/src/utils.py b/src/utils.py index e764a5cd1..facb10c86 100644 --- a/src/utils.py +++ b/src/utils.py @@ -2385,7 +2385,7 @@ def insert_htmlbox( oc=0, opacity=1, overlay=True, -) -> float: +) -> tuple: """Insert text with optional HTML tags and stylings into a rectangle. Args: @@ -2448,16 +2448,21 @@ def insert_htmlbox( if not fit.big_enough: # there was no fit return (-1, scale_low) - filled = fit.filled - scale = 1 / fit.parameter # shrink factor - - spare_height = fit.rect.y1 - filled[3] # unused room at rectangle bottom - # Note: due to MuPDF's logic this may be negative even for successful fits. - if scale != 1 or spare_height < 0: # if scaling occurred, set spare_height to 0 + filled = pymupdf.Rect(fit.filled) + # final adjustment if filled rect is wider than fit rect + if filled.width > fit.rect.width: + h = filled.width / fit.rect.width * fit.rect.height + fit.rect.x1 = filled.x1 + fit.rect.y1 = h + fit.parameter = fit.rect.x1 / temp_rect.x1 + + scale = 1 / fit.parameter + spare_height = max((fit.rect.y1 - filled.y1) / fit.parameter, 0) + if scale != 1: # if scaling occurred, set spare_height to 0 spare_height = 0 def rect_function(*args): - return fit.rect, fit.rect, pymupdf.Identity + return fit.rect, fit.rect, None # draw story on temp PDF page doc = story.write_with_links(rect_function) @@ -2477,7 +2482,7 @@ def rect_function(*args): # ------------------------------------------------------------------------- # re-insert links in target rect (show_pdf_page cannot copy annotations) # ------------------------------------------------------------------------- - # scaled center point of fit.rect + # scaled center point of fit rect mp1 = (fit.rect.tl + fit.rect.br) / 2 * scale # center point of target rect diff --git a/tests/test_4613.py b/tests/test_4613.py new file mode 100644 index 000000000..986f7dbfc --- /dev/null +++ b/tests/test_4613.py @@ -0,0 +1,14 @@ +import pymupdf +import string + + +def test_4613(): + text = " ".join([string.ascii_lowercase + " " + string.ascii_uppercase] * 3) + story = pymupdf.Story(text) + doc = pymupdf.open() + page = doc.new_page() + rect = pymupdf.Rect(10, 10, 100, 100) + rc1 = page.insert_htmlbox(rect, story) + + new_text = page.get_text("text", clip=rect).replace("\n", " ") + assert text.strip() == new_text.strip()