Skip to content

Commit b9ae4fb

Browse files
committed
fix: resolve CI lint failure - use modern type annotations
- Replace Optional[X] with X | None for type annotations - Remove unused Optional import from typing - Apply ruff formatting This resolves the UP045 lint errors in code-checks / lint (3.12) Signed-off-by: Br1an67 <932039080@qq.com>
1 parent 760bb23 commit b9ae4fb

1 file changed

Lines changed: 29 additions & 31 deletions

File tree

docling/backend/msword_backend.py

Lines changed: 29 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from copy import deepcopy
44
from io import BytesIO
55
from pathlib import Path
6-
from typing import Any, Callable, Final, Optional, Union
6+
from typing import Any, Callable, Final, Union
77
from urllib.parse import urlparse
88

99
from docling_core.types.doc import (
@@ -82,13 +82,13 @@ def __init__(
8282
self.valid: bool = False
8383
# Initialise the parents for the hierarchy
8484
self.max_levels: int = 10
85-
self.level_at_new_list: Optional[int] = None
86-
self.parents: dict[int, Optional[NodeItem]] = {}
85+
self.level_at_new_list: int | None = None
86+
self.parents: dict[int, NodeItem | None] = {}
8787
self.numbered_headers: dict[int, int] = {}
8888
self.equation_bookends: str = "<eq>{EQ}</eq>"
8989
# Track processed textbox elements to avoid duplication
9090
self.processed_textbox_elements: list[int] = []
91-
self.docx_to_pdf_converter: Optional[Callable] = None
91+
self.docx_to_pdf_converter: Callable | None = None
9292
self.docx_to_pdf_converter_init = False
9393
self.display_drawingml_warning = True
9494

@@ -195,26 +195,26 @@ def load_msword_file(
195195
def _update_history(
196196
self,
197197
name: str,
198-
level: Optional[int],
199-
numid: Optional[int],
200-
ilevel: Optional[int],
198+
level: int | None,
199+
numid: int | None,
200+
ilevel: int | None,
201201
):
202202
self.history["names"].append(name)
203203
self.history["levels"].append(level)
204204

205205
self.history["numids"].append(numid)
206206
self.history["indents"].append(ilevel)
207207

208-
def _prev_name(self) -> Optional[str]:
208+
def _prev_name(self) -> str | None:
209209
return self.history["names"][-1]
210210

211-
def _prev_level(self) -> Optional[int]:
211+
def _prev_level(self) -> int | None:
212212
return self.history["levels"][-1]
213213

214-
def _prev_numid(self) -> Optional[int]:
214+
def _prev_numid(self) -> int | None:
215215
return self.history["numids"][-1]
216216

217-
def _prev_indent(self) -> Optional[int]:
217+
def _prev_indent(self) -> int | None:
218218
return self.history["indents"][-1]
219219

220220
def _get_level(self) -> int:
@@ -366,9 +366,7 @@ def _walk_linear(
366366

367367
return doc, added_elements
368368

369-
def _str_to_int(
370-
self, s: Optional[str], default: Optional[int] = 0
371-
) -> Optional[int]:
369+
def _str_to_int(self, s: str | None, default: int | None = 0) -> int | None:
372370
if s is None:
373371
return None
374372
try:
@@ -386,7 +384,7 @@ def _split_text_and_number(self, input_string: str) -> list[str]:
386384

387385
def _get_numId_and_ilvl(
388386
self, paragraph: Paragraph
389-
) -> tuple[Optional[int], Optional[int]]:
387+
) -> tuple[int | None, int | None]:
390388
# Access the XML element of the paragraph
391389
numPr = paragraph._element.find(
392390
".//w:numPr", namespaces=paragraph._element.nsmap
@@ -559,7 +557,7 @@ def _build_multi_level_marker(
559557

560558
return marker
561559

562-
def _get_outline_level_from_style(self, paragraph: Paragraph) -> Optional[int]:
560+
def _get_outline_level_from_style(self, paragraph: Paragraph) -> int | None:
563561
"""Extract outlineLvl from paragraph's style definition.
564562
565563
In OOXML, outlineLvl is 0-indexed (0-8 for heading levels 1-9).
@@ -584,13 +582,13 @@ def _get_outline_level_from_style(self, paragraph: Paragraph) -> Optional[int]:
584582
pass
585583
return None
586584

587-
def _get_heading_and_level(self, style_label: str) -> tuple[str, Optional[int]]:
585+
def _get_heading_and_level(self, style_label: str) -> tuple[str, int | None]:
588586
parts = self._split_text_and_number(style_label)
589587

590588
if len(parts) == 2:
591589
parts.sort()
592590
label_str: str = ""
593-
label_level: Optional[int] = 0
591+
label_level: int | None = 0
594592
if parts[0].strip().lower() == "heading":
595593
label_str = "Heading"
596594
label_level = self._str_to_int(parts[1], None)
@@ -604,14 +602,14 @@ def _get_heading_and_level(self, style_label: str) -> tuple[str, Optional[int]]:
604602

605603
return style_label, None
606604

607-
def _get_label_and_level(self, paragraph: Paragraph) -> tuple[str, Optional[int]]:
605+
def _get_label_and_level(self, paragraph: Paragraph) -> tuple[str, int | None]:
608606
if paragraph.style is None:
609607
return "Normal", None
610608

611609
label: str = paragraph.style.style_id
612610
name: str = paragraph.style.name or ""
613-
base_style_label: Optional[str] = None
614-
base_style_name: Optional[str] = None
611+
base_style_label: str | None = None
612+
base_style_name: str | None = None
615613
if isinstance(
616614
base_style := getattr(paragraph.style, "base_style", None), ParagraphStyle
617615
):
@@ -653,7 +651,7 @@ def _get_label_and_level(self, paragraph: Paragraph) -> tuple[str, Optional[int]
653651
return label, None
654652

655653
@classmethod
656-
def _get_format_from_run(cls, run: Run) -> Optional[Formatting]:
654+
def _get_format_from_run(cls, run: Run) -> Formatting | None:
657655
# The .bold and .italic properties are booleans, but .underline can be an enum
658656
# like WD_UNDERLINE.THICK (value 6), so we need to convert it to a boolean
659657
is_bold = run.bold or False
@@ -683,7 +681,7 @@ def _get_paragraph_elements(self, paragraph: Paragraph):
683681
return [("", None, None)]
684682

685683
paragraph_elements: list[
686-
tuple[str, Optional[Formatting], Optional[Union[AnyUrl, Path]]]
684+
tuple[str, Formatting | None, Union[AnyUrl, Path] | None]
687685
] = []
688686
group_text = ""
689687
previous_format = None
@@ -990,9 +988,9 @@ def _create_or_reuse_parent(
990988
self,
991989
*,
992990
doc: DoclingDocument,
993-
prev_parent: Optional[NodeItem],
991+
prev_parent: NodeItem | None,
994992
paragraph_elements: list,
995-
) -> Optional[NodeItem]:
993+
) -> NodeItem | None:
996994
return (
997995
doc.add_inline_group(parent=prev_parent, content_layer=self.content_layer)
998996
if len(paragraph_elements) > 1
@@ -1200,7 +1198,7 @@ def _handle_text_elements(
12001198
def _add_heading(
12011199
self,
12021200
doc: DoclingDocument,
1203-
curr_level: Optional[int],
1201+
curr_level: int | None,
12041202
text: str,
12051203
is_numbered_style: bool = False,
12061204
) -> list[RefItem]:
@@ -1499,7 +1497,7 @@ def _handle_tables(
14991497
cell_set.add(cell._tc)
15001498

15011499
spanned_idx = row_idx
1502-
spanned_tc: Optional[CT_Tc] = cell._tc
1500+
spanned_tc: CT_Tc | None = cell._tc
15031501
while spanned_tc == cell._tc:
15041502
spanned_idx += 1
15051503
spanned_tc = (
@@ -1640,8 +1638,8 @@ def _is_rich_table_cell(self, cell: _Cell) -> bool:
16401638
def _handle_pictures(
16411639
self, drawing_blip: Any, doc: DoclingDocument
16421640
) -> list[RefItem]:
1643-
def get_docx_image(image: Any) -> Optional[bytes]:
1644-
image_data: Optional[bytes] = None
1641+
def get_docx_image(image: Any) -> bytes | None:
1642+
image_data: bytes | None = None
16451643
rId = image.get(
16461644
"{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed"
16471645
)
@@ -1655,7 +1653,7 @@ def get_docx_image(image: Any) -> Optional[bytes]:
16551653
if drawing_blip:
16561654
level = self._get_level()
16571655
# Open the BytesIO object with PIL to create an Image
1658-
parent: Optional[NodeItem] = (
1656+
parent: NodeItem | None = (
16591657
self.parents[level - 1]
16601658
if len(drawing_blip) == 1
16611659
else doc.add_group(
@@ -1665,7 +1663,7 @@ def get_docx_image(image: Any) -> Optional[bytes]:
16651663
)
16661664
)
16671665
for image in drawing_blip:
1668-
image_data: Optional[bytes] = get_docx_image(image)
1666+
image_data: bytes | None = get_docx_image(image)
16691667
if image_data is None:
16701668
_log.warning("Warning: image cannot be found")
16711669
p1 = doc.add_picture(

0 commit comments

Comments
 (0)