diff --git a/docs/vars.rst b/docs/vars.rst index 78cd7ac2a..7fb7bb10c 100644 --- a/docs/vars.rst +++ b/docs/vars.rst @@ -253,7 +253,7 @@ For the PyMuPDF programmer, some combination (using Python's `|` operator, or si .. py:data:: TEXT_COLLECT_STRUCTURE - 256 -- Not supported. + 256 -- Not supported yet. .. py:data:: TEXT_ACCURATE_BBOXES @@ -264,17 +264,45 @@ For the PyMuPDF programmer, some combination (using Python's `|` operator, or si .. py:data:: TEXT_COLLECT_VECTORS - 1024 -- Not supported. + 1024 -- Not supported yet. .. py:data:: TEXT_IGNORE_ACTUALTEXT - 2048 -- Ignore built-in differences between text appearing in e.g. PDF viewers versus text stored in the PDF. See :ref:`AdobeManual`, page 615 for background. If set, the **stored** ("replacement" text) is ignored in favor of the displayed text. + 2048 -- Ignore built-in differences between text appearing in e.g. PDF viewers versus text stored in the PDF. See :ref:`AdobeManual`, page 615 for background. If set, the **stored** ("replacement" text) is ignored in favor of the **displayed** text. .. py:data:: TEXT_STEXT_SEGMENT 4096 -- Attempt to segment page into different regions. -The following constants represent the default combinations of the above for text extraction and searching: +.. py:data:: TEXT_STEXT_PARAGRAPH_BREAK + + 8192 -- Not supported yet. + +.. py:data:: TEXT_STEXT_TABLE_HUNT + + 16384 -- Not supported yet. + +.. py:data:: TEXT_COLLECT_STYLES + + 32768 -- Detect underlined and strikeout text. Also detect and handle faked bold text in most cases. + +.. py:data:: TEXT_GID_FOR_UNKNOWN_UNICODE + + 65536 -- An alternative to `TEXT_CID_FOR_UNKNOWN_UNICODE` that uses the GID (glyph ID) instead of the CID (character ID). Both flags should never be used together, because results are undefined. + +.. py:data:: TEXT_CLIP_RECT + + 1 << 17 -- Not supported yet. + +.. py:data:: TEXT_ACCURATE_ASCENDERS + + 1 << 18 -- Not supported yet. + +.. py:data:: TEXT_ACCURATE_SIDE_BEARINGS + + 1 << 19 -- Not supported yet. + +The following constants represent default combinations of the above for text extraction and searching: .. py:data:: TEXTFLAGS_TEXT diff --git a/src/__init__.py b/src/__init__.py index fbcae3c44..89bc06be3 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -13516,18 +13516,18 @@ def width(self): TEXT_PRESERVE_SPANS = mupdf.FZ_STEXT_PRESERVE_SPANS TEXT_MEDIABOX_CLIP = mupdf.FZ_STEXT_MEDIABOX_CLIP TEXT_CID_FOR_UNKNOWN_UNICODE = mupdf.FZ_STEXT_USE_CID_FOR_UNKNOWN_UNICODE -if mupdf_version_tuple >= (1, 25): - TEXT_COLLECT_STRUCTURE = mupdf.FZ_STEXT_COLLECT_STRUCTURE - TEXT_ACCURATE_BBOXES = mupdf.FZ_STEXT_ACCURATE_BBOXES - TEXT_COLLECT_VECTORS = mupdf.FZ_STEXT_COLLECT_VECTORS - TEXT_IGNORE_ACTUALTEXT = mupdf.FZ_STEXT_IGNORE_ACTUALTEXT - TEXT_STEXT_SEGMENT = mupdf.FZ_STEXT_SEGMENT -else: - TEXT_COLLECT_STRUCTURE = 256 - TEXT_ACCURATE_BBOXES = 512 - TEXT_COLLECT_VECTORS = 1024 - TEXT_IGNORE_ACTUALTEXT = 2048 - TEXT_STEXT_SEGMENT = 4096 +TEXT_COLLECT_STRUCTURE = 256 # mupdf.FZ_STEXT_COLLECT_STRUCTURE +TEXT_ACCURATE_BBOXES = 512 # mupdf.FZ_STEXT_ACCURATE_BBOXES +TEXT_COLLECT_VECTORS = 1024 # mupdf.FZ_STEXT_COLLECT_VECTORS +TEXT_IGNORE_ACTUALTEXT = 2048 # mupdf.FZ_STEXT_IGNORE_ACTUALTEXT +TEXT_STEXT_SEGMENT = 4096 # mupdf.FZ_STEXT_SEGMENT +TEXT_STEXT_PARAGRAPH_BREAK = 8192 # mupdf.FZ_STEXT_PARAGRAPH_BREAK +TEXT_STEXT_TABLE_HUNT = 16384 # mupdf.FZ_STEXT_TABLE_HUNT +TEXT_COLLECT_STYLES = 32768 # mupdf.FZ_STEXT_COLLECT_STYLES +TEXT_GID_FOR_UNKNOWN_UNICODE = 65536 # mupdf.FZ_STEXT_USE_GID_FOR_UNKNOWN_UNICODE +TEXT_CLIP_RECT = 1 << 17 # mupdf.FZ_STEXT_CLIP_RECT +TEXT_ACCURATE_ASCENDERS = 1 << 18 # mupdf.FZ_STEXT_ACCURATE_ASCENDERS +TEXT_ACCURATE_SIDE_BEARINGS = 1 << 19 # mupdf.FZ_STEXT_ACCURATE_SIDE_BEARINGS TEXTFLAGS_WORDS = (0 | TEXT_PRESERVE_LIGATURES @@ -13620,14 +13620,6 @@ def width(self): PDF_BM_Screen = "Screen" PDF_BM_SoftLight = "Softlight" -# General text flags -TEXT_FONT_SUPERSCRIPT = 1 -TEXT_FONT_ITALIC = 2 -TEXT_FONT_SERIFED = 4 -TEXT_FONT_MONOSPACED = 8 -TEXT_FONT_BOLD = 16 - - annot_skel = { "goto1": lambda a, b, c, d, e: f"<>/Rect[{e}]/BS<>/Subtype/Link>>", "goto2": lambda a, b: f"<>/Rect[{b}]/BS<>/Subtype/Link>>",