diff --git a/changes.txt b/changes.txt index 7bd28143a..94271868c 100644 --- a/changes.txt +++ b/changes.txt @@ -2,15 +2,17 @@ Change Log ========== -**Changes in version 1.25.6 ()** +**Changes in version 1.26.0 ()** + +* Use MuPDF-1.26.0. * Fixed issues: - * **Fixed** `4404 `_: - * **Fixed** `4439 `_: - * **Fixed** `4412 `_: - * **Fixed** `4324 `_: - * **Fixed** `4447 `_: + * **Fixed** `4324 `_: cluster_drawings() fails to cluster horizontal and vertical thin lines + * **Fixed** `4404 `_: IndexError in page.get_links() + * **Fixed** `4412 `_: Regression? Spurious error? in insert_pdf in v1.25.4 + * **Fixed** `4439 `_: New Xml class from data does not work - bug in code + * **Fixed** `4447 `_: Stroke color of annotations cannot be correctly set * Other: diff --git a/docs/document.rst b/docs/document.rst index 05b9966ff..e688972fa 100644 --- a/docs/document.rst +++ b/docs/document.rst @@ -1305,7 +1305,7 @@ For details on **embedded files** refer to Appendix 3. pair: join_duplicates; Document.insert_pdf pair: show_progress; Document.insert_pdf - .. method:: insert_pdf(docsrc, from_page=-1, to_page=-1, start_at=-1, rotate=-1, links=True, annots=True, widgets=True, join_duplicates=False, show_progress=0, final=1) + .. method:: insert_pdf(docsrc, *, from_page=-1, to_page=-1, start_at=-1, rotate=-1, links=True, annots=True, widgets=True, join_duplicates=False, show_progress=0, final=1) PDF only: Copy the page range **[from_page, to_page]** (including both) of PDF document *docsrc* into the current one. Inserts will start with page number *start_at*. Value -1 indicates default values. All pages thus copied will be rotated as specified. Links, annotations and widgets can be excluded in the target, see below. All page numbers are 0-based. diff --git a/tests/resources/test_4363.pdf b/tests/resources/test_4363.pdf new file mode 100644 index 000000000..d15940e3e Binary files /dev/null and b/tests/resources/test_4363.pdf differ diff --git a/tests/test_textextract.py b/tests/test_textextract.py index ff0dfa841..5fb9b4ef5 100644 --- a/tests/test_textextract.py +++ b/tests/test_textextract.py @@ -780,3 +780,35 @@ def test_extendable_textpage(): path3 = os.path.normpath(f'{__file__}/../../tests/test_extendable_textpage3.pdf') document.save(path3) + + +def test_4363(): + print() + print(f'{pymupdf.version=}') + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4363.pdf') + n = 0 + texts = list() + with pymupdf.open(path) as document: + assert len(document) == 1 + page = document[0] + t = page.search_for('tour') + print(f'{t=}') + n += len(t) + text = page.get_text() + texts.append(text) + print(f'{n=}') + print(f'{len(texts)=}') + text = texts[0] + print('text:') + print(f'{text=}') + text_expected = ( + 'Deal Roadshow SiteTour\n' + 'We know your process. We know your standard.\n' + 'Professional Site Tour Video Productions for the Capital Markets.\n' + '1\n' + ) + if text != text_expected: + print(f'Expected:\n {text_expected!r}') + print(f'Found:\n {text!r}') + assert 0 +