Skip to content

Commit 3779d8c

Browse files
docs/ src/ tests/: Page.insert_htmlbox(): also down-scale to fit long words.
We do additional scaling to fit long words that would extend beyond the horizontal bounds of the rect. Fixes #4613.
1 parent eb2b1c9 commit 3779d8c

4 files changed

Lines changed: 119 additions & 25 deletions

File tree

docs/page.rst

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -847,14 +847,17 @@ In a nutshell, this is what you can do with PyMuPDF:
847847

848848
:returns: A tuple of floats `(spare_height, scale)`.
849849

850-
- `spare_height`: -1 if content did not fit, else >= 0. It is the height of the unused (still available) rectangle stripe. Positive only if scale = 1 (no down-scaling happened).
851-
- `scale`: down-scaling factor, 0 < scale <= 1.
850+
- spare_height: The height of the remaining space in `rect` below the
851+
text, or -1 if we failed to fit.
852+
- scale: The scaling required; `0 < scale <= 1`. Will be less than
853+
`scale_low` if we failed to fit.
852854

853-
Please refer to examples in this section of the recipes: :ref:`RecipesText_I_c`.
855+
Please refer to examples in this section of the recipes: :ref:`RecipesText_I_c`.
854856

855857
|history_begin|
856858

857-
* New in v1.23.8; rebased-only.
859+
* In v.1.26.5: do additional scaling to fit long words.
860+
* New in v1.23.8: rebased-only.
858861
* New in v1.23.9: `opacity` parameter.
859862

860863
|history_end|

src/__init__.py

Lines changed: 52 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -12266,7 +12266,8 @@ def insert_htmlbox(
1226612266
oc=0,
1226712267
opacity=1,
1226812268
overlay=True,
12269-
) -> float:
12269+
_scale_word_width=True,
12270+
) -> tuple:
1227012271
"""Insert text with optional HTML tags and stylings into a rectangle.
1227112272

1227212273
Args:
@@ -12282,14 +12283,16 @@ def insert_htmlbox(
1228212283
oc: (int) the xref of an OCG / OCMD (Optional Content).
1228312284
opacity: (float) set opacity of inserted content.
1228412285
overlay: (bool) put text on top of page content.
12286+
_scale_word_width: internal, for testing only.
1228512287
Returns:
1228612288
A tuple of floats (spare_height, scale).
12287-
spare_height: -1 if content did not fit, else >= 0. It is the height of the
12288-
unused (still available) rectangle stripe. Positive only if
12289-
scale_min = 1 (no down scaling).
12290-
scale: downscaling factor, 0 < scale <= 1. Set to 0 if spare_height = -1 (no fit).
12289+
spare_height:
12290+
The height of the remaining space in <rect> below the
12291+
text, or -1 if we failed to fit.
12292+
scale:
12293+
The scaling required; `0 < scale <= 1`.
12294+
Will be less than `scale_low` if we failed to fit.
1229112295
"""
12292-
1229312296
# normalize rotation angle
1229412297
if not rotate % 90 == 0:
1229512298
raise ValueError("bad rotation angle")
@@ -12321,24 +12324,50 @@ def insert_htmlbox(
1232112324
else:
1232212325
raise ValueError("'text' must be a string or a Story")
1232312326
# ----------------------------------------------------------------
12324-
# Find a scaling factor that lets our story fit in
12327+
# Find a scaling factor that lets our story fit in. Instead of scaling
12328+
# the text smaller, we instead look at how much bigger the rect needs
12329+
# to be to fit the text, then reverse the scaling to get how much we
12330+
# need to scale down the text.
1232512331
# ----------------------------------------------------------------
12326-
scale_max = None if scale_low == 0 else 1 / scale_low
12332+
rect_scale_max = None if scale_low == 0 else 1 / scale_low
1232712333

12328-
fit = story.fit_scale(temp_rect, scale_min=1, scale_max=scale_max)
12334+
fit = story.fit_scale(temp_rect, scale_min=1, scale_max=rect_scale_max)
12335+
1232912336
if not fit.big_enough: # there was no fit
12330-
return (-1, scale_low)
12331-
12332-
filled = fit.filled
12333-
scale = 1 / fit.parameter # shrink factor
12334-
12335-
spare_height = fit.rect.y1 - filled[3] # unused room at rectangle bottom
12336-
# Note: due to MuPDF's logic this may be negative even for successful fits.
12337-
if scale != 1 or spare_height < 0: # if scaling occurred, set spare_height to 0
12337+
scale = 1/fit.parameter
12338+
return (-1, scale)
12339+
12340+
# fit.filled is a tuple; we convert it in place to a Rect for
12341+
# convenience. (fit.rect is already a Rect.)
12342+
fit.filled = Rect(fit.filled)
12343+
assert (fit.rect.x0, fit.rect.y0) == (0, 0)
12344+
assert (fit.filled.x0, fit.filled.y0) == (0, 0)
12345+
12346+
# Extra scaling for width if any word is wider than the fit rect - in
12347+
# this case fit.filled.x1 will be greater than fit.rect.x1.
12348+
scale_for_width = None
12349+
if _scale_word_width and fit.filled.x1 > fit.rect.x1:
12350+
scale_for_width = fit.filled.x1 / fit.rect.x1
12351+
fit.rect.x1 *= scale_for_width
12352+
fit.rect.y1 *= scale_for_width
12353+
fit.parameter *= scale_for_width
12354+
12355+
scale = 1 / fit.parameter
12356+
if scale < scale_low:
12357+
# Extra scaling for width has reduced scale out of range, so return
12358+
# failure.
12359+
return (-1, scale)
12360+
12361+
if scale != 1 and scale_for_width is None:
12362+
# Scaling occurred but with no final width adjustment, so there
12363+
# will be no spare height.
1233812364
spare_height = 0
12365+
else:
12366+
# There will usually be spare height.
12367+
spare_height = max((fit.rect.y1 - fit.filled.y1) * scale, 0)
1233912368

1234012369
def rect_function(*args):
12341-
return fit.rect, fit.rect, Identity
12370+
return fit.rect, fit.rect, None
1234212371

1234312372
# draw story on temp PDF page
1234412373
doc = story.write_with_links(rect_function)
@@ -16045,15 +16074,17 @@ class FitResult:
1604516074
`big_enough`:
1604616075
`True` if the fit succeeded.
1604716076
`filled`:
16048-
From the last call to `Story.place()`.
16077+
Tuple (x0, y0, x1, y1) from the last call to `Story.place()`. This
16078+
will be wider than .rect if any single word (which we never split)
16079+
was too wide for .rect.
1604916080
`more`:
1605016081
`False` if the fit succeeded.
1605116082
`numcalls`:
1605216083
Number of calls made to `self.place()`.
1605316084
`parameter`:
1605416085
The successful parameter value, or the largest failing value.
1605516086
`rect`:
16056-
The rect created from `parameter`.
16087+
The pumupdf.Rect created from `parameter`.
1605716088
'''
1605816089
def __init__(self, big_enough=None, filled=None, more=None, numcalls=None, parameter=None, rect=None):
1605916090
self.big_enough = big_enough
@@ -16229,7 +16260,7 @@ def fit_scale(self, rect, scale_min=0, scale_max=None, delta=0.001, verbose=Fals
1622916260
Finds smallest value `scale` in range `scale_min..scale_max` where
1623016261
`scale * rect` is large enough to contain the story `self`.
1623116262

16232-
Returns a `Story.FitResult` instance.
16263+
Returns a `Story.FitResult` instance with `.parameter` set to `scale`.
1623316264

1623416265
:arg width:
1623516266
width of rect.

tests/resources/test_4613.png

64.8 KB
Loading

tests/test_textbox.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@
77
"""
88
import pymupdf
99

10+
import gentle_compare
11+
12+
import os
13+
import textwrap
14+
1015
# codespell:ignore-begin
1116
text = """Der Kleine Schwertwal (Pseudorca crassidens), auch bekannt als Unechter oder Schwarzer Schwertwal, ist eine Art der Delfine (Delphinidae) und der einzige rezente Vertreter der Gattung Pseudorca.
1217
@@ -286,3 +291,58 @@ def test_4400():
286291
text = '111111111'
287292
print(f'Calling writer.fill_textbox().', flush=1)
288293
writer.fill_textbox(rect=pymupdf.Rect(0, 0, 100, 20), pos=(80, 0), text=text, fontsize=8)
294+
295+
296+
def test_4613():
297+
print()
298+
text = 3 * 'abcdefghijklmnopqrstuvwxyz\nABCDEFGHIJKLMNOPQRSTUVWXYZ\n'
299+
story = pymupdf.Story(text)
300+
rect = pymupdf.Rect(10, 10, 100, 100)
301+
302+
# Test default operation where we get additional scaling down because of
303+
# the long words in our text.
304+
with pymupdf.open() as doc:
305+
page = doc.new_page()
306+
spare_height, scale = page.insert_htmlbox(rect, story)
307+
print(f'test_4613(): {spare_height=} {scale=}')
308+
# Even though we have scaled down, the additional scaling from word width
309+
# means there is spare vertical space.
310+
assert abs(spare_height - 50.53) < 0.1
311+
assert abs(scale - 0.4009) < 0.01
312+
313+
page.draw_rect(rect, (1, 0, 0))
314+
path = os.path.normpath(f'{__file__}/../../tests/test_4613.pdf')
315+
doc.save(path)
316+
317+
path_pixmap = os.path.normpath(f'{__file__}/../../tests/test_4613.png')
318+
path_pixmap_expected = os.path.normpath(f'{__file__}/../../tests/resources/test_4613.png')
319+
pixmap = page.get_pixmap(dpi=300)
320+
pixmap.save(path_pixmap)
321+
322+
new_text = page.get_text('text', clip=rect)
323+
print(f'test_4613(): new_text:')
324+
print(textwrap.indent(new_text, ' '))
325+
assert new_text == text
326+
327+
rms = gentle_compare.pixmaps_rms(pixmap, path_pixmap_expected)
328+
assert rms == 0, f'{rms=}'
329+
330+
# Check with _scale_word_width=False - ignore too-wide words.
331+
with pymupdf.open() as doc:
332+
page = doc.new_page()
333+
spare_height, scale = page.insert_htmlbox(rect, story, _scale_word_width=False)
334+
print(f'test_4613(): _scale_word_width=False: {spare_height=} {scale=}')
335+
# With _scale_word_width=False there is no additional scaling down from
336+
# word width, so we should have spare_height == 0 and only a small
337+
# amount of down-scaling.
338+
assert spare_height == 0
339+
assert abs(scale - 0.914) < 0.01
340+
341+
# Check that we get no fit if scale_low is not low enough.
342+
with pymupdf.open() as doc:
343+
page = doc.new_page()
344+
scale_low=0.6
345+
spare_height, scale = page.insert_htmlbox(rect, story, scale_low=scale_low)
346+
print(f'test_4613(): {scale_low=}: {spare_height=} {scale=}')
347+
assert spare_height == -1
348+
assert abs(scale - 0.40093) < 0.01

0 commit comments

Comments
 (0)