elements
+ current_type = None
+ current_ol = None
+ for ol_type, start_val, item_text, indent_spaces in list_items:
+ if ol_type != current_type:
+ current_type = ol_type
+ indent_level = indent_spaces // 4
+ current_ol = soup.new_tag(
+ "ol",
+ attrs={
+ "type": ol_type,
+ "data-paren": "true",
+ "data-indent-level": str(indent_level),
+ },
+ )
+ if start_val != 1:
+ current_ol["start"] = str(start_val)
+ li.append(current_ol)
+ new_li = soup.new_tag("li")
+ new_li.string = item_text
+ current_ol.append(new_li)
+
+ # Also handle standalone elements with (a)/(1)/(i) patterns (not inside a list)
+ for p in list(soup.find_all("p", recursive=False)):
+ full_text = p.get_text()
+ if not _PAREN_ITEM_RE.search(full_text):
+ continue
+
+ lines = full_text.split("\n")
+ list_items: list[tuple[str, int, str, int]] = []
+ for line in lines:
+ stripped = line.strip()
+ if not stripped:
+ continue
+ m = _PAREN_ITEM_RE.match(stripped)
+ if m:
+ ol_type, start_val = _detect_paren_type(m.group(1))
+ indent_spaces = len(line) - len(line.lstrip())
+ list_items.append(
+ (ol_type, start_val, stripped[m.end() :], indent_spaces)
+ )
+ if not list_items:
+ continue
+
+ current_type = None
+ current_ol = None
+ for ol_type, start_val, item_text, indent_spaces in list_items:
+ if ol_type != current_type:
+ current_type = ol_type
+ indent_level = indent_spaces // 4
+ current_ol = soup.new_tag(
+ "ol",
+ attrs={
+ "type": ol_type,
+ "data-paren": "true",
+ "data-indent-level": str(indent_level),
+ },
+ )
+ if start_val != 1:
+ current_ol["start"] = str(start_val)
+ p.insert_before(current_ol)
+ new_li = soup.new_tag("li")
+ new_li.string = item_text
+ current_ol.append(new_li)
+ p.decompose()
+
+
+# ---------------------------------------------------------------------------
+# Low-level OOXML numbering helpers
+# ---------------------------------------------------------------------------
+
+
+def _numbering_root(doc):
+ """Return the root element, creating the numbering part if needed."""
+ try:
+ return doc.part.numbering_part._element
+ except Exception:
+ # No numbering part yet – force creation by adding and removing a list paragraph
+ dummy = doc.add_paragraph("", style="List Number")
+ dummy._element.getparent().remove(dummy._element)
+ return doc.part.numbering_part._element
+
+
+def _next_abstract_num_id(numbering) -> int:
+ ids = [
+ int(el.get(qn("w:abstractNumId")))
+ for el in numbering.findall(qn("w:abstractNum"))
+ ]
+ return max(ids, default=-1) + 1
+
+
+def _next_num_id(numbering) -> int:
+ ids = [int(el.get(qn("w:numId"))) for el in numbering.findall(qn("w:num"))]
+ return max(ids, default=0) + 1
+
+
+_LIST_HANGING_INDENT = 504
+"""Hanging indent in twips used for every numbered-list level.
+
+A single value is used for **all** numbering formats so that item text aligns
+at the same column across lists that use different label styles (e.g. ``1.``,
+``(a)``, ``(iii)``). 504 twips (≈ 0.35 in) is wide enough for the widest
+common parenthesized roman label ``(viii)`` while keeping the indent compact.
+"""
+
+
+def _get_or_create_abstract_num(
+ doc, num_fmt: str, lvl_text: str, nesting_levels: int = 3, start: int = 1
+) -> int:
+ """Create an abstract numbering definition for the given format.
+
+ Always creates a new definition so that each independent list gets its own
+ ``abstractNumId``. Sharing an abstract num across multiple ````
+ elements can cause Word to silently drop numbering on some lists.
+
+ Creates a multilevel abstract numbering so nested lists at different ilvl
+ values share a single definition with increasing indentation.
+
+ *start* sets the ```` value for the first level (ilvl 0).
+ This ensures renderers that ignore ``/``
+ still produce the correct numbering.
+ """
+ numbering = _numbering_root(doc)
+ abstract_num_id = _next_abstract_num_id(numbering)
+
+ abstract_num = OxmlElement("w:abstractNum")
+ abstract_num.set(qn("w:abstractNumId"), str(abstract_num_id))
+
+ multi_level_type = OxmlElement("w:multiLevelType")
+ multi_level_type.set(qn("w:val"), "multilevel")
+ abstract_num.append(multi_level_type)
+
+ hanging = _LIST_HANGING_INDENT
+
+ for ilvl in range(nesting_levels):
+ lvl = OxmlElement("w:lvl")
+ lvl.set(qn("w:ilvl"), str(ilvl))
+
+ start_el = OxmlElement("w:start")
+ start_el.set(qn("w:val"), str(start if ilvl == 0 else 1))
+ lvl.append(start_el)
+
+ fmt_el = OxmlElement("w:numFmt")
+ fmt_el.set(qn("w:val"), num_fmt)
+ lvl.append(fmt_el)
+
+ # Use the ilvl+1 placeholder for each level (e.g. %1, %2, %3)
+ actual_lvl_text = lvl_text.replace("%1", f"%{ilvl + 1}")
+ text_el = OxmlElement("w:lvlText")
+ text_el.set(qn("w:val"), actual_lvl_text)
+ lvl.append(text_el)
+
+ jc = OxmlElement("w:lvlJc")
+ jc.set(qn("w:val"), "left")
+ lvl.append(jc)
+
+ # Force a tab character after the label so text aligns at the
+ # left-indent position regardless of label width.
+ suff = OxmlElement("w:suff")
+ suff.set(qn("w:val"), "tab")
+ lvl.append(suff)
+
+ left = hanging + (hanging * ilvl)
+ ppr = OxmlElement("w:pPr")
+ ind = OxmlElement("w:ind")
+ ind.set(qn("w:left"), str(left))
+ ind.set(qn("w:hanging"), str(hanging))
+ ppr.append(ind)
+
+ # Explicit tab stop at the text position so the tab after the
+ # label lands exactly at the left indent.
+ tabs = OxmlElement("w:tabs")
+ tab = OxmlElement("w:tab")
+ tab.set(qn("w:val"), "num")
+ tab.set(qn("w:pos"), str(left))
+ tabs.append(tab)
+ ppr.append(tabs)
+
+ lvl.append(ppr)
+
+ abstract_num.append(lvl)
+
+ # OOXML requires all elements before any .
+ # Insert before the first so Word doesn't silently ignore it.
+ first_num = numbering.find(qn("w:num"))
+ if first_num is not None:
+ first_num.addprevious(abstract_num)
+ else:
+ numbering.append(abstract_num)
+ return abstract_num_id
+
+
+def _create_num(
+ doc, abstract_num_id: int, start_override: int | None = None, level: int = 0
+) -> int:
+ """Create a new referencing the given abstract numbering.
+
+ If *start_override* is provided, a ```` element is added so
+ that numbering starts at the given value rather than continuing.
+ """
+ numbering = _numbering_root(doc)
+ num_id = _next_num_id(numbering)
+
+ num = OxmlElement("w:num")
+ num.set(qn("w:numId"), str(num_id))
+
+ abstract_ref = OxmlElement("w:abstractNumId")
+ abstract_ref.set(qn("w:val"), str(abstract_num_id))
+ num.append(abstract_ref)
+
+ if start_override is not None:
+ lvl_override = OxmlElement("w:lvlOverride")
+ lvl_override.set(qn("w:ilvl"), str(level))
+
+ start_el = OxmlElement("w:startOverride")
+ start_el.set(qn("w:val"), str(start_override))
+ lvl_override.append(start_el)
+
+ num.append(lvl_override)
+
+ numbering.append(num)
+ return num_id
+
+
+def _apply_numbering(paragraph, num_id: int, level: int = 0) -> None:
+ """Apply numbering properties to a paragraph at the given nesting level."""
+ p_pr = paragraph._p.get_or_add_pPr()
+
+ num_pr = p_pr.find(qn("w:numPr"))
+ if num_pr is None:
+ num_pr = OxmlElement("w:numPr")
+ p_pr.append(num_pr)
+
+ ilvl = num_pr.find(qn("w:ilvl"))
+ if ilvl is None:
+ ilvl = OxmlElement("w:ilvl")
+ num_pr.append(ilvl)
+ ilvl.set(qn("w:val"), str(level))
+
+ num_id_el = num_pr.find(qn("w:numId"))
+ if num_id_el is None:
+ num_id_el = OxmlElement("w:numId")
+ num_pr.append(num_id_el)
+ num_id_el.set(qn("w:val"), str(num_id))
+
+
+def _patch_abstract_num_level(
+ doc, num_id: int, level: int, num_fmt: str, lvl_text: str
+) -> None:
+ """Patch the abstractNum referenced by *num_id* so that *level* uses the given format.
+
+ When a child list (e.g. ``(a)``) is nested under a parent list (e.g. ``1.``),
+ both must share the same ``numId``. This function updates the parent's
+ abstract numbering definition so that the child's ``ilvl`` has the correct
+ ``numFmt`` and ``lvlText``.
+ """
+ numbering = _numbering_root(doc)
+
+ # Find the for this numId and get its abstractNumId
+ abstract_num_id = None
+ for num_el in numbering.findall(qn("w:num")):
+ if int(num_el.get(qn("w:numId"))) == num_id:
+ abstract_num_id = int(num_el.find(qn("w:abstractNumId")).get(qn("w:val")))
+ break
+ if abstract_num_id is None:
+ return
+
+ # Find the abstractNum
+ abstract_num = None
+ for an in numbering.findall(qn("w:abstractNum")):
+ if int(an.get(qn("w:abstractNumId"))) == abstract_num_id:
+ abstract_num = an
+ break
+ if abstract_num is None:
+ return
+
+ # Find or create the for this ilvl
+ target_lvl = None
+ for lvl in abstract_num.findall(qn("w:lvl")):
+ if int(lvl.get(qn("w:ilvl"))) == level:
+ target_lvl = lvl
+ break
+
+ if target_lvl is None:
+ # Create a new level
+ target_lvl = OxmlElement("w:lvl")
+ target_lvl.set(qn("w:ilvl"), str(level))
+ start_el = OxmlElement("w:start")
+ start_el.set(qn("w:val"), "1")
+ target_lvl.append(start_el)
+ abstract_num.append(target_lvl)
+
+ # Update numFmt
+ fmt_el = target_lvl.find(qn("w:numFmt"))
+ if fmt_el is None:
+ fmt_el = OxmlElement("w:numFmt")
+ target_lvl.append(fmt_el)
+ fmt_el.set(qn("w:val"), num_fmt)
+
+ # Update lvlText
+ actual_lvl_text = lvl_text.replace("%1", f"%{level + 1}")
+ txt_el = target_lvl.find(qn("w:lvlText"))
+ if txt_el is None:
+ txt_el = OxmlElement("w:lvlText")
+ target_lvl.append(txt_el)
+ txt_el.set(qn("w:val"), actual_lvl_text)
+
+ # Ensure lvlJc exists
+ jc = target_lvl.find(qn("w:lvlJc"))
+ if jc is None:
+ jc = OxmlElement("w:lvlJc")
+ jc.set(qn("w:val"), "left")
+ target_lvl.append(jc)
+
+ # Ensure suffix is tab-based for consistent text alignment
+ suff = target_lvl.find(qn("w:suff"))
+ if suff is None:
+ suff = OxmlElement("w:suff")
+ target_lvl.append(suff)
+ suff.set(qn("w:val"), "tab")
+
+ # Ensure indentation
+ hanging = _LIST_HANGING_INDENT
+ left = hanging + (hanging * level)
+ ppr = target_lvl.find(qn("w:pPr"))
+ if ppr is None:
+ ppr = OxmlElement("w:pPr")
+ target_lvl.append(ppr)
+ ind = ppr.find(qn("w:ind"))
+ if ind is None:
+ ind = OxmlElement("w:ind")
+ ppr.append(ind)
+ ind.set(qn("w:left"), str(left))
+ ind.set(qn("w:hanging"), str(hanging))
+
+ # Explicit tab stop at the text position
+ tabs = ppr.find(qn("w:tabs"))
+ if tabs is None:
+ tabs = OxmlElement("w:tabs")
+ ppr.append(tabs)
+ tab = OxmlElement("w:tab")
+ tab.set(qn("w:val"), "num")
+ tab.set(qn("w:pos"), str(left))
+ tabs.append(tab)
+
+
+def _ol_type_to_numfmt(type_attr: str | None, paren: bool = False) -> tuple[str, str]:
+ """Map HTML to (OOXML numFmt, lvlText).
+
+ When *paren* is True the level text uses parenthesized form ``(%1)``
+ for all types. Otherwise decimal uses ``%1.`` (standard ``1. 2. 3.``).
+ """
+ type_attr = type_attr or "1"
+ fmt_map = {
+ "1": "decimal",
+ "a": "lowerLetter",
+ "A": "upperLetter",
+ "i": "lowerRoman",
+ }
+ num_fmt = fmt_map.get(type_attr, "decimal")
+ if paren or type_attr.lower() in ("a", "i"):
+ lvl_text = "(%1)"
+ else:
+ lvl_text = "%1."
+ return num_fmt, lvl_text
+
+
def parse_markdown_to_docx(doc: Document, markdown_text: str) -> None:
"""Parse markdown text and add elements to Word document"""
# Convert markdown to HTML
- html = markdown.markdown(markdown_text, extensions=["tables", "fenced_code"])
+ html = markdown.markdown(
+ markdown_text, extensions=["tables", "fenced_code", "sane_lists"]
+ )
soup = BeautifulSoup(html, "html.parser")
+ # Post-process: convert (a), (1), (i) text patterns into nested elements
+ _post_process_paren_lists(soup)
+
+ # Track ordered list numbering state for restart/continue semantics
+ list_state: dict[str, Any] = {"ordered": {}}
+
# Process each HTML element in order
for element in soup.find_all(
[
@@ -575,10 +1077,13 @@ def parse_markdown_to_docx(doc: Document, markdown_text: str) -> None:
"blockquote",
"table",
"pre",
- ]
+ ],
+ recursive=False,
):
if element.name.startswith("h"):
- # Handle headings
+ # Handle headings – reset list continuation state so lists
+ # after a heading start fresh
+ list_state["ordered"] = {}
level = int(element.name[1]) # Extract number from h1, h2, etc.
text = element.get_text().strip()
if text:
@@ -590,15 +1095,8 @@ def parse_markdown_to_docx(doc: Document, markdown_text: str) -> None:
_add_formatted_text_to_paragraph(paragraph, element)
elif element.name in ["ul", "ol"]:
- # Handle lists
- is_numbered = element.name == "ol"
- for li in element.find_all("li", recursive=False):
- text = li.get_text().strip()
- if text:
- if is_numbered:
- doc.add_paragraph(text, style="List Number")
- else:
- doc.add_paragraph(text, style="List Bullet")
+ # Handle lists (including nested)
+ _add_list_items(doc, element, level=0, list_state=list_state)
elif element.name == "blockquote":
# Handle blockquotes
@@ -620,10 +1118,125 @@ def parse_markdown_to_docx(doc: Document, markdown_text: str) -> None:
run.font.name = "Courier New"
-def _add_formatted_text_to_paragraph(paragraph, html_element):
+def _add_list_items(
+ doc: Document,
+ list_element,
+ level: int,
+ list_state: dict,
+ parent_num_id: int | None = None,
+) -> None:
+ """Recursively add list items to Word document with proper nesting.
+
+ For bullet lists, uses Word's built-in 'List Bullet' styles.
+ For ordered lists, creates low-level OOXML numbering definitions that
+ support custom formats (decimal, lowerLetter, lowerRoman) and proper
+ restart/continuation semantics.
+
+ *parent_num_id* is passed when a child ordered list should share the
+ parent's numbering instance so that Word renders all nesting levels
+ under one coherent list.
+ """
+ is_numbered = list_element.name == "ol"
+
+ # Respect original markdown indentation via data-indent-level attribute.
+ # Every 4 leading spaces in the markdown source maps to one indent level.
+ # When data-indent-level is set it already encodes the absolute nesting
+ # depth relative to the top-level list, so we must NOT add `level` (which
+ # the recursive call already incremented) on top of it — that would
+ # double-count the nesting.
+ indent_level = int(list_element.get("data-indent-level", 0))
+ effective_level = indent_level if indent_level > 0 else level
+
+ num_id = None
+ if is_numbered:
+ start = int(list_element.get("start", 1))
+ type_attr = list_element.get("type") or "1"
+
+ paren = list_element.get("data-paren") == "true"
+ num_fmt, lvl_text = _ol_type_to_numfmt(type_attr, paren=paren)
+
+ if parent_num_id is not None and level > 0:
+ # Child list: reuse parent numId but patch the abstractNum to
+ # have the correct format at this ilvl.
+ num_id = parent_num_id
+ _patch_abstract_num_level(doc, num_id, effective_level, num_fmt, lvl_text)
+ else:
+ abstract_num_id = _get_or_create_abstract_num(
+ doc, num_fmt, lvl_text, start=start
+ )
+
+ # Key for tracking continuation: lists at the same nesting level
+ # with the same format can continue numbering across boundaries
+ key = (effective_level, num_fmt, lvl_text)
+
+ if start == 1:
+ num_id = _create_num(
+ doc, abstract_num_id, start_override=1, level=effective_level
+ )
+ else:
+ num_id = list_state["ordered"].get(key)
+ if num_id is None:
+ num_id = _create_num(
+ doc,
+ abstract_num_id,
+ start_override=start,
+ level=effective_level,
+ )
+
+ list_state["ordered"][key] = num_id
+
+ else:
+ clamped_level = min(effective_level, 2)
+ bullet_style = (
+ "List Bullet" if clamped_level == 0 else f"List Bullet {clamped_level + 1}"
+ )
+
+ for li in list_element.find_all("li", recursive=False):
+ # Collect direct text of this - , ignoring nested
/
+ text_parts = []
+ for child in li.children:
+ if hasattr(child, "name") and child.name in ("ul", "ol"):
+ continue
+ text_parts.append(
+ child.get_text() if hasattr(child, "get_text") else str(child)
+ )
+ text = "".join(text_parts).strip()
+
+ if text:
+ if is_numbered:
+ p = doc.add_paragraph()
+ p.style = doc.styles["List Paragraph"]
+ _apply_numbering(p, num_id=num_id, level=effective_level)
+ _add_formatted_text_to_paragraph(p, li, skip_nested_lists=True)
+ else:
+ doc.add_paragraph(text, style=bullet_style)
+
+ # Recurse into nested or (direct children of this - )
+ # Nested ordered lists inherit the parent numId so Word keeps them
+ # under one coherent multilevel numbering instance.
+ effective_parent = num_id if is_numbered else parent_num_id
+ for nested_list in li.find_all(["ul", "ol"], recursive=False):
+ _add_list_items(
+ doc,
+ nested_list,
+ level + 1,
+ list_state=list_state,
+ parent_num_id=effective_parent,
+ )
+
+
+def _add_formatted_text_to_paragraph(
+ paragraph, html_element, skip_nested_lists: bool = False
+):
"""Add formatted text from HTML element to Word paragraph"""
# Handle direct text and formatting
for content in html_element.contents:
+ if (
+ skip_nested_lists
+ and hasattr(content, "name")
+ and content.name in ("ul", "ol")
+ ):
+ continue
if hasattr(content, "name") and content.name:
# This is an HTML tag
if content.name == "strong" or content.name == "b":
@@ -641,7 +1254,9 @@ def _add_formatted_text_to_paragraph(paragraph, html_element):
else:
# Nested elements - recursively process only if it has contents
if hasattr(content, "contents"):
- _add_formatted_text_to_paragraph(paragraph, content)
+ _add_formatted_text_to_paragraph(
+ paragraph, content, skip_nested_lists=skip_nested_lists
+ )
else:
# Just add the text content
text = content.get_text()
@@ -749,15 +1364,21 @@ async def execute(self, inputs: Dict[str, Any], context: ExecutionContext):
data={
"template_summary": {
"structure": f"{analysis['paragraphs']}p,{analysis['tables']}t",
- "fillable_total": int(analysis["fillable_paragraphs"] + analysis["fillable_cells"]),
+ "fillable_total": int(
+ analysis["fillable_paragraphs"] + analysis["fillable_cells"]
+ ),
"content_elements_hidden": int(
- analysis["total_elements"] - len(fillable_paragraphs) - len(fillable_cells)
+ analysis["total_elements"]
+ - len(fillable_paragraphs)
+ - len(fillable_cells)
),
},
"fillable_paragraphs": fillable_paragraphs,
"fillable_cells": fillable_cells,
"pattern_distribution": pattern_counts,
- "recommended_strategy": "mixed" if len(pattern_counts) > 2 else "single_method",
+ "recommended_strategy": "mixed"
+ if len(pattern_counts) > 2
+ else "single_method",
"template_ready": True,
},
cost_usd=0.0,
@@ -803,7 +1424,9 @@ async def execute(self, inputs: Dict[str, Any], context: ExecutionContext):
"markdown_processed": bool(markdown_content),
}
- return await save_and_return_document(result, document_id, context, custom_filename)
+ return await save_and_return_document(
+ result, document_id, context, custom_filename
+ )
@doc_maker.action("add_table")
@@ -861,7 +1484,8 @@ async def execute(self, inputs: Dict[str, Any], context: ExecutionContext):
# Check if it's an image by extension or content type
is_image_by_extension = any(
- filename.lower().endswith(ext) for ext in [".png", ".jpg", ".jpeg", ".gif", ".bmp", ".webp"]
+ filename.lower().endswith(ext)
+ for ext in [".png", ".jpg", ".jpeg", ".gif", ".bmp", ".webp"]
)
is_image_by_content_type = content_type.startswith("image/")
@@ -876,7 +1500,9 @@ async def execute(self, inputs: Dict[str, Any], context: ExecutionContext):
paragraph = doc.add_paragraph()
if width and height:
- paragraph.add_run().add_picture(image_file, width=Inches(width), height=Inches(height))
+ paragraph.add_run().add_picture(
+ image_file, width=Inches(width), height=Inches(height)
+ )
elif width:
paragraph.add_run().add_picture(image_file, width=Inches(width))
elif height:
@@ -942,7 +1568,11 @@ async def execute(self, inputs: Dict[str, Any], context: ExecutionContext):
new_content = update["content"]
# Get paragraph by index using iter_block_items
- paragraphs = [block for block in iter_block_items(doc) if isinstance(block, Paragraph)]
+ paragraphs = [
+ block
+ for block in iter_block_items(doc)
+ if isinstance(block, Paragraph)
+ ]
if paragraph_index < len(paragraphs):
paragraph = paragraphs[paragraph_index]
@@ -961,22 +1591,32 @@ async def execute(self, inputs: Dict[str, Any], context: ExecutionContext):
new_content = update["content"]
# Get table by index
- tables = [block for block in iter_block_items(doc) if isinstance(block, Table)]
+ tables = [
+ block for block in iter_block_items(doc) if isinstance(block, Table)
+ ]
if table_index < len(tables):
table = tables[table_index]
if row < len(table.rows) and col < len(table.columns):
cell = table.cell(row, col)
cell.text = new_content
- changes_made.append(f"Updated table {table_index} cell ({row},{col})")
+ changes_made.append(
+ f"Updated table {table_index} cell ({row},{col})"
+ )
else:
- changes_made.append(f"Cell ({row},{col}) out of range in table {table_index}")
+ changes_made.append(
+ f"Cell ({row},{col}) out of range in table {table_index}"
+ )
else:
changes_made.append(f"Table {table_index} not found")
# Create LLM-optimized response
successful_updates = [change for change in changes_made if "Updated" in change]
- failed_updates = [change for change in changes_made if "not found" in change or "out of range" in change]
+ failed_updates = [
+ change
+ for change in changes_made
+ if "not found" in change or "out of range" in change
+ ]
original_result = {
"success": len(successful_updates) > 0,
@@ -984,7 +1624,9 @@ async def execute(self, inputs: Dict[str, Any], context: ExecutionContext):
"failed": len(failed_updates),
"summary": f"Updated {len(successful_updates)} elements"
+ (f", {len(failed_updates)} failed" if failed_updates else ""),
- "failures": failed_updates[:3] if failed_updates else [], # Limit failure details
+ "failures": failed_updates[:3]
+ if failed_updates
+ else [], # Limit failure details
}
return await save_and_return_document(original_result, document_id, context)
@@ -1000,7 +1642,9 @@ async def execute(self, inputs: Dict[str, Any], context: ExecutionContext):
try:
replacements = json.loads(replacements)
except json.JSONDecodeError:
- return ActionError(message="Invalid replacements format: must be array or valid JSON string")
+ return ActionError(
+ message="Invalid replacements format: must be array or valid JSON string"
+ )
case_sensitive = inputs.get("case_sensitive", False)
files = inputs.get("files", [])
@@ -1020,7 +1664,9 @@ async def execute(self, inputs: Dict[str, Any], context: ExecutionContext):
for replacement in replacements:
find_text = replacement["find"]
- replace_text = replacement.get("replace", "") # Default to empty string if not provided
+ replace_text = replacement.get(
+ "replace", ""
+ ) # Default to empty string if not provided
replace_all = replacement.get("replace_all", False)
remove_paragraph = replacement.get("remove_paragraph", False)
@@ -1151,7 +1797,11 @@ async def execute(self, inputs: Dict[str, Any], context: ExecutionContext):
original_text = paragraph.text
is_full_paragraph_match = original_text.strip() == find_text.strip()
- if is_full_paragraph_match and replace_text.strip() == "" and remove_paragraph:
+ if (
+ is_full_paragraph_match
+ and replace_text.strip() == ""
+ and remove_paragraph
+ ):
# Mark paragraph for removal to eliminate spacing
paragraphs_to_remove.append(paragraph)
replacements_count += 1
@@ -1314,12 +1964,18 @@ async def execute(self, inputs: Dict[str, Any], context: ExecutionContext):
replacement_count += 1
if replacement_count > 0:
- changes_made.append(f"Replaced '{placeholder}' {replacement_count} times")
+ changes_made.append(
+ f"Replaced '{placeholder}' {replacement_count} times"
+ )
# 2. Position-based updates
if "position_data" in template_data:
- paragraphs = [block for block in iter_block_items(doc) if isinstance(block, Paragraph)]
- tables = [block for block in iter_block_items(doc) if isinstance(block, Table)]
+ paragraphs = [
+ block for block in iter_block_items(doc) if isinstance(block, Paragraph)
+ ]
+ tables = [
+ block for block in iter_block_items(doc) if isinstance(block, Table)
+ ]
for position_key, new_content in template_data["position_data"].items():
if position_key.startswith("paragraph_"):
@@ -1327,7 +1983,9 @@ async def execute(self, inputs: Dict[str, Any], context: ExecutionContext):
if idx < len(paragraphs):
# Use centralized parser for all content
if has_markdown_formatting(str(new_content)):
- parse_and_apply_markdown_formatting(paragraphs[idx], str(new_content))
+ parse_and_apply_markdown_formatting(
+ paragraphs[idx], str(new_content)
+ )
else:
paragraphs[idx].text = str(new_content)
changes_made.append(f"Updated paragraph {idx}")
@@ -1345,10 +2003,14 @@ async def execute(self, inputs: Dict[str, Any], context: ExecutionContext):
cell = table.cell(row_idx, col_idx)
# Use centralized parser for all content
if has_markdown_formatting(str(new_content)):
- parse_and_apply_markdown_formatting(cell, str(new_content))
+ parse_and_apply_markdown_formatting(
+ cell, str(new_content)
+ )
else:
cell.text = str(new_content)
- changes_made.append(f"Updated table {table_idx} cell ({row_idx},{col_idx})")
+ changes_made.append(
+ f"Updated table {table_idx} cell ({row_idx},{col_idx})"
+ )
# 3. Search and replace patterns (with safety analysis)
safety_warnings = []
@@ -1392,7 +2054,9 @@ async def execute(self, inputs: Dict[str, Any], context: ExecutionContext):
# Analyze safety if multiple matches
if len(matches_found) > 1 and not replace_all:
- safety_analysis = analyze_replacement_safety(find_text, matches_found)
+ safety_analysis = analyze_replacement_safety(
+ find_text, matches_found
+ )
if safety_analysis["safety_level"] == "high_risk":
# Block high-risk replacements
@@ -1431,9 +2095,15 @@ async def execute(self, inputs: Dict[str, Any], context: ExecutionContext):
for paragraph in doc.paragraphs:
if find_text.lower() in paragraph.text.lower():
original_text = paragraph.text
- is_full_match = original_text.strip().lower() == find_text.lower()
+ is_full_match = (
+ original_text.strip().lower() == find_text.lower()
+ )
- if is_full_match and replace_text.strip() == "" and remove_paragraph:
+ if (
+ is_full_match
+ and replace_text.strip() == ""
+ and remove_paragraph
+ ):
paragraphs_to_remove.append(paragraph)
replacement_count += 1
else:
@@ -1485,30 +2155,42 @@ async def execute(self, inputs: Dict[str, Any], context: ExecutionContext):
replacement_count += 1
if replacement_count > 0:
- changes_made.append(f"Found and replaced '{find_text}' {replacement_count} times")
+ changes_made.append(
+ f"Found and replaced '{find_text}' {replacement_count} times"
+ )
# Create LLM-optimized response with prominent safety warnings
- has_critical_warnings = any("CRITICAL_WARNING" in str(warning) for warning in safety_warnings)
+ has_critical_warnings = any(
+ "CRITICAL_WARNING" in str(warning) for warning in safety_warnings
+ )
blocked_operations = len([w for w in safety_warnings if "BLOCKED" in str(w)])
change_summary = {}
for change in changes_made:
if "Replaced" in change:
- change_summary["placeholders"] = change_summary.get("placeholders", 0) + 1
+ change_summary["placeholders"] = (
+ change_summary.get("placeholders", 0) + 1
+ )
elif "Found and replaced" in change:
change_summary["searches"] = change_summary.get("searches", 0) + 1
elif "Updated" in change:
change_summary["positions"] = change_summary.get("positions", 0) + 1
original_result = {
- "SAFETY_STATUS": "CRITICAL_ISSUES_DETECTED" if has_critical_warnings else "OK",
+ "SAFETY_STATUS": "CRITICAL_ISSUES_DETECTED"
+ if has_critical_warnings
+ else "OK",
"success": len(changes_made) > 0 and not has_critical_warnings,
"completed_operations": len(changes_made),
"blocked_operations": blocked_operations,
"safety_warnings": safety_warnings,
"filled_summary": change_summary,
- "template_status": "partially_complete" if blocked_operations > 0 else "complete",
- "action_required": "Review safety warnings and use more specific context" if safety_warnings else "none",
+ "template_status": "partially_complete"
+ if blocked_operations > 0
+ else "complete",
+ "action_required": "Review safety warnings and use more specific context"
+ if safety_warnings
+ else "none",
}
return await save_and_return_document(original_result, document_id, context)
diff --git a/doc-maker/tests/context.py b/doc-maker/tests/context.py
index 6058d41e..d048f14e 100644
--- a/doc-maker/tests/context.py
+++ b/doc-maker/tests/context.py
@@ -4,7 +4,9 @@
# Add paths for imports FIRST
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
-sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../dependencies")))
+sys.path.insert(
+ 0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../dependencies"))
+)
# Now we can import the doc-maker module
try:
@@ -16,7 +18,9 @@
integration_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
os.chdir(integration_dir)
- spec = importlib.util.spec_from_file_location("doc_maker", os.path.join(integration_dir, "doc_maker.py"))
+ spec = importlib.util.spec_from_file_location(
+ "doc_maker", os.path.join(integration_dir, "doc_maker.py")
+ )
doc_maker_module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(doc_maker_module)
@@ -25,7 +29,9 @@
# Export both the integration instance and the module (for unit tests)
doc_maker = doc_maker_module.doc_maker
- doc_maker_functions = doc_maker_module # For accessing utility functions in unit tests
+ doc_maker_functions = (
+ doc_maker_module # For accessing utility functions in unit tests
+ )
except ImportError as e:
print(f"Import error: {e}")
print("Available sys.path entries:")
diff --git a/doc-maker/tests/test_doc_maker_integration.py b/doc-maker/tests/test_doc_maker_integration.py
index b0fbca1e..63ccc8e4 100644
--- a/doc-maker/tests/test_doc_maker_integration.py
+++ b/doc-maker/tests/test_doc_maker_integration.py
@@ -26,7 +26,9 @@
_original_cwd = os.getcwd()
os.chdir(_parent)
-_spec = importlib.util.spec_from_file_location("doc_maker_mod_intg", os.path.join(_parent, "doc_maker.py"))
+_spec = importlib.util.spec_from_file_location(
+ "doc_maker_mod_intg", os.path.join(_parent, "doc_maker.py")
+)
_mod = importlib.util.module_from_spec(_spec)
_spec.loader.exec_module(_mod)
os.chdir(_original_cwd)
@@ -77,7 +79,9 @@ async def test_create_document_returns_valid_docx_binary(self, live_context):
assert_valid_docx(file_obj)
async def test_create_document_with_title(self, live_context):
- result = await doc_maker.execute_action("create_document", {"title": "My Integration Test Doc"}, live_context)
+ result = await doc_maker.execute_action(
+ "create_document", {"title": "My Integration Test Doc"}, live_context
+ )
assert result.type == ResultType.ACTION
data = result.result.data
@@ -85,8 +89,12 @@ async def test_create_document_with_title(self, live_context):
assert data["paragraph_count"] >= 1
async def test_create_document_with_markdown_content(self, live_context):
- markdown = "# Test Heading\n\nThis is a **bold** paragraph.\n\n- Item one\n- Item two"
- result = await doc_maker.execute_action("create_document", {"markdown_content": markdown}, live_context)
+ markdown = (
+ "# Test Heading\n\nThis is a **bold** paragraph.\n\n- Item one\n- Item two"
+ )
+ result = await doc_maker.execute_action(
+ "create_document", {"markdown_content": markdown}, live_context
+ )
assert result.type == ResultType.ACTION
data = result.result.data
@@ -96,7 +104,9 @@ async def test_create_document_with_markdown_content(self, live_context):
class TestAddTable:
async def test_add_table_to_document(self, live_context):
- create_result = await doc_maker.execute_action("create_document", {}, live_context)
+ create_result = await doc_maker.execute_action(
+ "create_document", {}, live_context
+ )
document_id = create_result.result.data["document_id"]
file_obj = create_result.result.data["file"]
@@ -124,7 +134,9 @@ class TestSaveDocument:
async def test_save_document_returns_valid_docx(self, live_context):
create_result = await doc_maker.execute_action(
"create_document",
- {"markdown_content": "# Integration Test\n\nGenerated by doc-maker integration tests."},
+ {
+ "markdown_content": "# Integration Test\n\nGenerated by doc-maker integration tests."
+ },
live_context,
)
document_id = create_result.result.data["document_id"]
@@ -161,7 +173,9 @@ async def test_save_missing_document_returns_error(self, live_context):
class TestAddMarkdownContent:
async def test_add_markdown_to_existing_document(self, live_context):
- create_result = await doc_maker.execute_action("create_document", {}, live_context)
+ create_result = await doc_maker.execute_action(
+ "create_document", {}, live_context
+ )
document_id = create_result.result.data["document_id"]
file_obj = create_result.result.data["file"]
diff --git a/doc-maker/tests/test_doc_maker_unit.py b/doc-maker/tests/test_doc_maker_unit.py
index f7dd0187..cd420aea 100644
--- a/doc-maker/tests/test_doc_maker_unit.py
+++ b/doc-maker/tests/test_doc_maker_unit.py
@@ -17,7 +17,9 @@
# Load the module from its file location (Integration.load() needs the cwd set)
_original_cwd = os.getcwd()
os.chdir(_parent)
-_spec = importlib.util.spec_from_file_location("doc_maker_mod", os.path.join(_parent, "doc_maker.py"))
+_spec = importlib.util.spec_from_file_location(
+ "doc_maker_mod", os.path.join(_parent, "doc_maker.py")
+)
_mod = importlib.util.module_from_spec(_spec)
_spec.loader.exec_module(_mod)
os.chdir(_original_cwd)
@@ -28,6 +30,7 @@
is_likely_placeholder_context = _mod.is_likely_placeholder_context
analyze_replacement_safety = _mod.analyze_replacement_safety
_save_document_to_dict = _mod._save_document_to_dict
+parse_markdown_to_docx = _mod.parse_markdown_to_docx
documents = _mod.documents
pytestmark = pytest.mark.unit
@@ -51,7 +54,9 @@ def _make_docx_bytes() -> bytes:
return buf.read()
-def _make_file_item(name: str, data: bytes, content_type: str = "application/octet-stream") -> dict:
+def _make_file_item(
+ name: str, data: bytes, content_type: str = "application/octet-stream"
+) -> dict:
return {
"name": name,
"contentType": content_type,
@@ -101,12 +106,16 @@ def test_whitespace_only(self):
assert is_ph is True
def test_real_content_not_placeholder(self):
- is_ph, pattern = detect_placeholder_patterns("This is a complete sentence with actual content.")
+ is_ph, pattern = detect_placeholder_patterns(
+ "This is a complete sentence with actual content."
+ )
assert is_ph is False
assert pattern == "content"
def test_business_content_not_placeholder(self):
- is_ph, _ = detect_placeholder_patterns("The quarterly revenue exceeded expectations.")
+ is_ph, _ = detect_placeholder_patterns(
+ "The quarterly revenue exceeded expectations."
+ )
assert is_ph is False
@@ -147,10 +156,20 @@ def test_instruction_phrase(self):
assert is_likely_placeholder_context("insert data here", "data") is True
def test_content_sentence_not_placeholder(self):
- assert is_likely_placeholder_context("The project name should be descriptive.", "name") is False
+ assert (
+ is_likely_placeholder_context(
+ "The project name should be descriptive.", "name"
+ )
+ is False
+ )
def test_complete_sentence_not_placeholder(self):
- assert is_likely_placeholder_context("The date for the meeting has been set.", "date") is False
+ assert (
+ is_likely_placeholder_context(
+ "The date for the meeting has been set.", "date"
+ )
+ is False
+ )
class TestAnalyzeReplacementSafety:
@@ -261,7 +280,9 @@ async def test_save_missing_document_returns_action_error(self, mock_context):
@pytest.mark.asyncio
async def test_save_existing_document_succeeds(self, mock_context):
# Create doc first
- create_result = await doc_maker.execute_action("create_document", {}, mock_context)
+ create_result = await doc_maker.execute_action(
+ "create_document", {}, mock_context
+ )
doc_id = create_result.result.data["document_id"]
result = await doc_maker.execute_action(
@@ -322,7 +343,9 @@ async def test_get_elements_response_shape(self, mock_context):
class TestAddTable:
@pytest.mark.asyncio
async def test_add_table_to_document(self, mock_context):
- create_result = await doc_maker.execute_action("create_document", {}, mock_context)
+ create_result = await doc_maker.execute_action(
+ "create_document", {}, mock_context
+ )
doc_id = create_result.result.data["document_id"]
docx_bytes = base64.b64decode(create_result.result.data["file"]["content"])
file_item = _make_file_item(f"{doc_id}.docx", docx_bytes)
@@ -360,7 +383,9 @@ async def test_add_table_missing_document_raises(self, mock_context):
class TestAddMarkdownContent:
@pytest.mark.asyncio
async def test_add_markdown_to_existing_document(self, mock_context):
- create_result = await doc_maker.execute_action("create_document", {}, mock_context)
+ create_result = await doc_maker.execute_action(
+ "create_document", {}, mock_context
+ )
doc_id = create_result.result.data["document_id"]
docx_bytes = base64.b64decode(create_result.result.data["file"]["content"])
file_item = _make_file_item(f"{doc_id}.docx", docx_bytes)
@@ -391,7 +416,9 @@ async def test_add_markdown_missing_document_raises(self, mock_context):
class TestAddPageBreak:
@pytest.mark.asyncio
async def test_add_page_break(self, mock_context):
- create_result = await doc_maker.execute_action("create_document", {}, mock_context)
+ create_result = await doc_maker.execute_action(
+ "create_document", {}, mock_context
+ )
doc_id = create_result.result.data["document_id"]
docx_bytes = base64.b64decode(create_result.result.data["file"]["content"])
file_item = _make_file_item(f"{doc_id}.docx", docx_bytes)
@@ -431,7 +458,9 @@ async def test_update_paragraph_by_position(self, mock_context):
"update_by_position",
{
"document_id": doc_id,
- "updates": [{"type": "paragraph", "index": 0, "content": "Updated content"}],
+ "updates": [
+ {"type": "paragraph", "index": 0, "content": "Updated content"}
+ ],
"files": [file_item],
},
mock_context,
@@ -471,7 +500,9 @@ async def test_find_and_replace_basic(self, mock_context):
"find_and_replace",
{
"document_id": doc_id,
- "replacements": [{"find": "{{NAME}}", "replace": "Alice", "replace_all": True}],
+ "replacements": [
+ {"find": "{{NAME}}", "replace": "Alice", "replace_all": True}
+ ],
"files": [file_item],
},
mock_context,
@@ -506,7 +537,9 @@ async def test_find_no_match_returns_warning(self, mock_context):
"find_and_replace",
{
"document_id": doc_id,
- "replacements": [{"find": "NONEXISTENT_TEXT_XYZ", "replace": "replacement"}],
+ "replacements": [
+ {"find": "NONEXISTENT_TEXT_XYZ", "replace": "replacement"}
+ ],
"files": [file_item],
},
mock_context,
@@ -519,7 +552,9 @@ async def test_find_no_match_returns_warning(self, mock_context):
async def test_find_and_replace_invalid_type_replacements(self, mock_context):
# The SDK validates input schema: replacements must be array, so passing a
# non-array non-string type triggers VALIDATION_ERROR before the handler runs.
- create_result = await doc_maker.execute_action("create_document", {}, mock_context)
+ create_result = await doc_maker.execute_action(
+ "create_document", {}, mock_context
+ )
doc_id = create_result.result.data["document_id"]
docx_bytes = base64.b64decode(create_result.result.data["file"]["content"])
file_item = _make_file_item(f"{doc_id}.docx", docx_bytes)
@@ -583,3 +618,1070 @@ def test_missing_document_returns_error_dict(self):
assert result["saved"] is False
assert "nonexistent-id" in result["error"]
assert result["file"]["content"] == ""
+
+
+class TestParenthesizedListNumbering:
+ """Verify that (1), (a), (i) style lists produce correct Word numbering."""
+
+ MARKDOWN = (
+ "1. Elephant\n"
+ " (a) Elephants are the largest land animals on Earth, "
+ "with African elephants weighing up to 14,000 lbs.\n"
+ " (b) They have an exceptional memory and can recognize "
+ "themselves in mirrors, indicating self-awareness.\n"
+ "2. Axolotl\n"
+ " (a) Axolotls can regenerate entire limbs, including "
+ "parts of their heart and brain.\n"
+ " (b) Unlike most amphibians, axolotls retain their larval "
+ "features throughout their entire lives, a trait called neoteny."
+ )
+
+ @staticmethod
+ def _get_numpr(paragraph):
+ """Return (numId, ilvl) from a paragraph's w:numPr, or None."""
+ from docx.oxml.ns import qn
+
+ pPr = paragraph._p.find(qn("w:pPr"))
+ if pPr is None:
+ return None
+ numPr = pPr.find(qn("w:numPr"))
+ if numPr is None:
+ return None
+ numId_el = numPr.find(qn("w:numId"))
+ ilvl_el = numPr.find(qn("w:ilvl"))
+ if numId_el is None or ilvl_el is None:
+ return None
+ return int(numId_el.get(qn("w:val"))), int(ilvl_el.get(qn("w:val")))
+
+ @staticmethod
+ def _get_abstract_num_for(doc, num_id):
+ """Return the abstractNum element referenced by a given numId."""
+ from docx.oxml.ns import qn
+
+ numbering = doc.part.numbering_part._element
+ for num_el in numbering.findall(qn("w:num")):
+ if int(num_el.get(qn("w:numId"))) == num_id:
+ abstract_ref = num_el.find(qn("w:abstractNumId"))
+ abstract_id = int(abstract_ref.get(qn("w:val")))
+ for an in numbering.findall(qn("w:abstractNum")):
+ if int(an.get(qn("w:abstractNumId"))) == abstract_id:
+ return an
+ return None
+
+ def test_produces_six_numbered_paragraphs(self):
+ from docx import Document
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+
+ numbered = [
+ (p.text.strip(), self._get_numpr(p))
+ for p in doc.paragraphs
+ if self._get_numpr(p)
+ ]
+ assert len(numbered) == 6, (
+ f"Expected 6 numbered paragraphs, got {len(numbered)}: {numbered}"
+ )
+
+ def test_top_level_items_are_at_ilvl_zero(self):
+ from docx import Document
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+
+ numbered = [
+ (p.text.strip(), self._get_numpr(p))
+ for p in doc.paragraphs
+ if self._get_numpr(p)
+ ]
+ top_items = [
+ (text, numpr)
+ for text, numpr in numbered
+ if "Elephant" == text or "Axolotl" == text
+ ]
+ assert len(top_items) == 2, f"Expected 2 top-level items, got {top_items}"
+ for text, (num_id, ilvl) in top_items:
+ assert ilvl == 0, f"'{text}' should be at ilvl 0, got {ilvl}"
+
+ def test_sub_items_are_indented(self):
+ from docx import Document
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+
+ numbered = [
+ (p.text.strip(), self._get_numpr(p))
+ for p in doc.paragraphs
+ if self._get_numpr(p)
+ ]
+ sub_items = [
+ (text, numpr)
+ for text, numpr in numbered
+ if text not in ("Elephant", "Axolotl")
+ ]
+ assert len(sub_items) == 4, f"Expected 4 sub-items, got {len(sub_items)}"
+ for text, (num_id, ilvl) in sub_items:
+ assert ilvl >= 1, (
+ f"Sub-item should be indented (ilvl >= 1), got {ilvl}: {text}"
+ )
+
+ def test_top_level_uses_decimal_numbering(self):
+ from docx import Document
+ from docx.oxml.ns import qn
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+
+ numbered = [
+ (p.text.strip(), self._get_numpr(p))
+ for p in doc.paragraphs
+ if self._get_numpr(p)
+ ]
+ elephant = next((text, numpr) for text, numpr in numbered if text == "Elephant")
+ num_id = elephant[1][0]
+ abstract = self._get_abstract_num_for(doc, num_id)
+ assert abstract is not None
+ lvl0 = abstract.find(qn("w:lvl"))
+ fmt = lvl0.find(qn("w:numFmt")).get(qn("w:val"))
+ assert fmt == "decimal", f"Top-level should be decimal, got {fmt}"
+
+ def test_sub_items_use_lower_letter_parenthesized(self):
+ from docx import Document
+ from docx.oxml.ns import qn
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+
+ numbered = [
+ (p.text.strip(), self._get_numpr(p))
+ for p in doc.paragraphs
+ if self._get_numpr(p)
+ ]
+ first_sub = next(
+ (text, numpr) for text, numpr in numbered if "Elephants are" in text
+ )
+ num_id, ilvl = first_sub[1]
+ abstract = self._get_abstract_num_for(doc, num_id)
+ assert abstract is not None
+
+ # Find the lvl element matching the ilvl used
+ target_lvl = None
+ for lvl in abstract.findall(qn("w:lvl")):
+ if int(lvl.get(qn("w:ilvl"))) == ilvl:
+ target_lvl = lvl
+ break
+ assert target_lvl is not None
+
+ fmt = target_lvl.find(qn("w:numFmt")).get(qn("w:val"))
+ assert fmt == "lowerLetter", f"Sub-items should be lowerLetter, got {fmt}"
+ lvl_text = target_lvl.find(qn("w:lvlText")).get(qn("w:val"))
+ assert "(" in lvl_text, (
+ f"Sub-items should have parenthesized format, got '{lvl_text}'"
+ )
+
+ def test_elephant_text_on_same_line_as_number(self):
+ """The parent item text must appear in the same paragraph as the numbering."""
+ from docx import Document
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+
+ numbered = [
+ (p.text.strip(), self._get_numpr(p))
+ for p in doc.paragraphs
+ if self._get_numpr(p)
+ ]
+ elephant_paras = [(t, n) for t, n in numbered if "Elephant" in t and n[1] == 0]
+ assert len(elephant_paras) >= 1
+ assert elephant_paras[0][0] == "Elephant", (
+ f"Top-level text should be exactly 'Elephant', got '{elephant_paras[0][0]}'"
+ )
+
+ def test_parent_and_children_share_same_numid(self):
+ """Word requires nested lists to share the same numId to render correctly."""
+ from docx import Document
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+
+ numbered = [
+ (p.text.strip(), self._get_numpr(p))
+ for p in doc.paragraphs
+ if self._get_numpr(p)
+ ]
+ num_ids = set(numpr[0] for _, numpr in numbered)
+ assert len(num_ids) == 1, (
+ f"All items should share one numId for coherent multilevel numbering, got {num_ids}"
+ )
+
+
+class TestMultipleParenListsAfterHeadings:
+ """Verify that multiple (1)-style lists separated by headings all display numbering
+ and are left-aligned when the markdown has no leading spaces."""
+
+ MARKDOWN = "# Animals\n(1) Elephant\n(2) Tiger\n# Fish\n(1) squid\n(2) Whale"
+
+ @staticmethod
+ def _get_numpr(paragraph):
+ from docx.oxml.ns import qn
+
+ pPr = paragraph._p.find(qn("w:pPr"))
+ if pPr is None:
+ return None
+ numPr = pPr.find(qn("w:numPr"))
+ if numPr is None:
+ return None
+ numId_el = numPr.find(qn("w:numId"))
+ ilvl_el = numPr.find(qn("w:ilvl"))
+ if numId_el is None or ilvl_el is None:
+ return None
+ return int(numId_el.get(qn("w:val"))), int(ilvl_el.get(qn("w:val")))
+
+ @staticmethod
+ def _get_abstract_num_for(doc, num_id):
+ from docx.oxml.ns import qn
+
+ numbering = doc.part.numbering_part._element
+ for num_el in numbering.findall(qn("w:num")):
+ if int(num_el.get(qn("w:numId"))) == num_id:
+ abstract_ref = num_el.find(qn("w:abstractNumId"))
+ abstract_id = int(abstract_ref.get(qn("w:val")))
+ for an in numbering.findall(qn("w:abstractNum")):
+ if int(an.get(qn("w:abstractNumId"))) == abstract_id:
+ return an
+ return None
+
+ def test_both_lists_produce_numbered_paragraphs(self):
+ from docx import Document
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+
+ numbered = [
+ (p.text.strip(), self._get_numpr(p))
+ for p in doc.paragraphs
+ if self._get_numpr(p)
+ ]
+ assert len(numbered) == 4, (
+ f"Expected 4 numbered paragraphs, got {len(numbered)}: {numbered}"
+ )
+
+ def test_each_list_has_its_own_abstract_num(self):
+ """Each independent list must get its own abstractNum to avoid Word dropping numbers."""
+ from docx import Document
+ from docx.oxml.ns import qn
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+
+ numbered = [
+ (p.text.strip(), self._get_numpr(p))
+ for p in doc.paragraphs
+ if self._get_numpr(p)
+ ]
+ animals_num_id = numbered[0][1][0]
+ fish_num_id = numbered[2][1][0]
+
+ animals_abstract = self._get_abstract_num_for(doc, animals_num_id)
+ fish_abstract = self._get_abstract_num_for(doc, fish_num_id)
+
+ assert animals_abstract is not None
+ assert fish_abstract is not None
+
+ animals_abstract_id = int(animals_abstract.get(qn("w:abstractNumId")))
+ fish_abstract_id = int(fish_abstract.get(qn("w:abstractNumId")))
+ assert animals_abstract_id != fish_abstract_id, (
+ "Each list should reference a different abstractNum to prevent Word from dropping numbers"
+ )
+
+ def test_all_items_at_ilvl_zero(self):
+ from docx import Document
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+
+ numbered = [
+ (p.text.strip(), self._get_numpr(p))
+ for p in doc.paragraphs
+ if self._get_numpr(p)
+ ]
+ for text, (num_id, ilvl) in numbered:
+ assert ilvl == 0, f"'{text}' should be at ilvl 0, got {ilvl}"
+
+ def test_level_zero_is_left_aligned(self):
+ """Level-0 paren lists with no leading spaces should be left-aligned (left=hanging, hanging=504)."""
+ from docx import Document
+ from docx.oxml.ns import qn
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+
+ numbered = [
+ (p.text.strip(), self._get_numpr(p))
+ for p in doc.paragraphs
+ if self._get_numpr(p)
+ ]
+ first_num_id = numbered[0][1][0]
+ abstract = self._get_abstract_num_for(doc, first_num_id)
+ assert abstract is not None
+
+ lvl0 = None
+ for lvl in abstract.findall(qn("w:lvl")):
+ if int(lvl.get(qn("w:ilvl"))) == 0:
+ lvl0 = lvl
+ break
+ assert lvl0 is not None
+
+ pPr = lvl0.find(qn("w:pPr"))
+ assert pPr is not None
+ ind = pPr.find(qn("w:ind"))
+ assert ind is not None
+ left = ind.get(qn("w:left"))
+ hanging = ind.get(qn("w:hanging"))
+ assert left == hanging, (
+ f"Level 0 left indent should equal hanging (left-aligned), got left={left}, hanging={hanging}"
+ )
+
+ def test_all_use_decimal_parenthesized_format(self):
+ from docx import Document
+ from docx.oxml.ns import qn
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+
+ numbered = [
+ (p.text.strip(), self._get_numpr(p))
+ for p in doc.paragraphs
+ if self._get_numpr(p)
+ ]
+ for text, (num_id, ilvl) in numbered:
+ abstract = self._get_abstract_num_for(doc, num_id)
+ assert abstract is not None
+ for lvl in abstract.findall(qn("w:lvl")):
+ if int(lvl.get(qn("w:ilvl"))) == ilvl:
+ fmt = lvl.find(qn("w:numFmt")).get(qn("w:val"))
+ assert fmt == "decimal", f"'{text}' should use decimal, got {fmt}"
+ lvl_text = lvl.find(qn("w:lvlText")).get(qn("w:val"))
+ assert "(" in lvl_text, (
+ f"'{text}' should have paren format, got '{lvl_text}'"
+ )
+
+
+class TestMixedNumberedListFormats:
+ """Verify that five different numbered-list styles all render correctly:
+ 1. standard ``1. 2. 3.`` decimal
+ 2. parenthesized decimal ``(1) (2) (3)``
+ 3. parenthesized lower letter ``(a) (b) (c)``
+ 4. parenthesized upper letter ``(A) (B) (C)``
+ 5. parenthesized lower roman ``(i) (ii) (iii)``
+
+ Each list must:
+ - be a real numbered (not bullet) list in the OOXML
+ - use the correct numFmt
+ - use left-aligned justification
+ - display the correct item text ("one", "two", "three")
+ - have a consistent hanging indent so text is aligned across items
+ whose numbering labels differ in width (e.g. ``(i)`` vs ``(iii)``).
+ """
+
+ MARKDOWN = (
+ "# numbers\n"
+ "1. one\n"
+ "2. two\n"
+ "3. three\n"
+ "# numbers in brackets\n"
+ "(1) one\n"
+ "(2) two\n"
+ "(3) three\n"
+ "# letters in brackets\n"
+ "(a) one\n"
+ "(b) two\n"
+ "(c) three\n"
+ "# capital letters in brackets\n"
+ "(A) one\n"
+ "(B) two\n"
+ "(C) three\n"
+ "# roman numerals\n"
+ "(i) one\n"
+ "(ii) two\n"
+ "(iii) three"
+ )
+
+ @staticmethod
+ def _get_numpr(paragraph):
+ from docx.oxml.ns import qn
+
+ pPr = paragraph._p.find(qn("w:pPr"))
+ if pPr is None:
+ return None
+ numPr = pPr.find(qn("w:numPr"))
+ if numPr is None:
+ return None
+ numId_el = numPr.find(qn("w:numId"))
+ ilvl_el = numPr.find(qn("w:ilvl"))
+ if numId_el is None or ilvl_el is None:
+ return None
+ return int(numId_el.get(qn("w:val"))), int(ilvl_el.get(qn("w:val")))
+
+ @staticmethod
+ def _get_abstract_num_for(doc, num_id):
+ from docx.oxml.ns import qn
+
+ numbering = doc.part.numbering_part._element
+ for num_el in numbering.findall(qn("w:num")):
+ if int(num_el.get(qn("w:numId"))) == num_id:
+ abstract_ref = num_el.find(qn("w:abstractNumId"))
+ abstract_id = int(abstract_ref.get(qn("w:val")))
+ for an in numbering.findall(qn("w:abstractNum")):
+ if int(an.get(qn("w:abstractNumId"))) == abstract_id:
+ return an
+ return None
+
+ @staticmethod
+ def _get_lvl(abstract, ilvl):
+ from docx.oxml.ns import qn
+
+ for lvl in abstract.findall(qn("w:lvl")):
+ if int(lvl.get(qn("w:ilvl"))) == ilvl:
+ return lvl
+ return None
+
+ def _build_doc(self):
+ from docx import Document
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+ return doc
+
+ def _numbered_paragraphs(self, doc):
+ return [
+ (p.text.strip(), self._get_numpr(p))
+ for p in doc.paragraphs
+ if self._get_numpr(p)
+ ]
+
+ # ---- 1. All 15 items are numbered paragraphs ----
+
+ def test_produces_fifteen_numbered_paragraphs(self):
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ assert len(numbered) == 15, (
+ f"Expected 15 numbered paragraphs (5 lists × 3 items), got {len(numbered)}: "
+ f"{[t for t, _ in numbered]}"
+ )
+
+ # ---- 2. Item text is correct ----
+
+ def test_item_text_is_correct(self):
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ texts = [t for t, _ in numbered]
+ for i in range(5):
+ group = texts[i * 3 : i * 3 + 3]
+ assert group == ["one", "two", "three"], (
+ f"List group {i} text should be ['one', 'two', 'three'], got {group}"
+ )
+
+ # ---- 3. Each list uses the correct numFmt ----
+
+ def test_standard_decimal_uses_decimal_format(self):
+ from docx.oxml.ns import qn
+
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ num_id = numbered[0][1][0]
+ ilvl = numbered[0][1][1]
+ abstract = self._get_abstract_num_for(doc, num_id)
+ lvl = self._get_lvl(abstract, ilvl)
+ fmt = lvl.find(qn("w:numFmt")).get(qn("w:val"))
+ assert fmt == "decimal", f"Standard numbered list should be decimal, got {fmt}"
+
+ def test_paren_decimal_uses_decimal_format(self):
+ from docx.oxml.ns import qn
+
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ num_id = numbered[3][1][0]
+ ilvl = numbered[3][1][1]
+ abstract = self._get_abstract_num_for(doc, num_id)
+ lvl = self._get_lvl(abstract, ilvl)
+ fmt = lvl.find(qn("w:numFmt")).get(qn("w:val"))
+ assert fmt == "decimal", f"Paren decimal list should be decimal, got {fmt}"
+
+ def test_paren_lower_letter_uses_lower_letter_format(self):
+ from docx.oxml.ns import qn
+
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ num_id = numbered[6][1][0]
+ ilvl = numbered[6][1][1]
+ abstract = self._get_abstract_num_for(doc, num_id)
+ lvl = self._get_lvl(abstract, ilvl)
+ fmt = lvl.find(qn("w:numFmt")).get(qn("w:val"))
+ assert fmt == "lowerLetter", (
+ f"Lower letter list should be lowerLetter, got {fmt}"
+ )
+
+ def test_paren_upper_letter_uses_upper_letter_format(self):
+ from docx.oxml.ns import qn
+
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ num_id = numbered[9][1][0]
+ ilvl = numbered[9][1][1]
+ abstract = self._get_abstract_num_for(doc, num_id)
+ lvl = self._get_lvl(abstract, ilvl)
+ fmt = lvl.find(qn("w:numFmt")).get(qn("w:val"))
+ assert fmt == "upperLetter", (
+ f"Upper letter list should be upperLetter, got {fmt}"
+ )
+
+ def test_paren_roman_uses_lower_roman_format(self):
+ from docx.oxml.ns import qn
+
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ num_id = numbered[12][1][0]
+ ilvl = numbered[12][1][1]
+ abstract = self._get_abstract_num_for(doc, num_id)
+ lvl = self._get_lvl(abstract, ilvl)
+ fmt = lvl.find(qn("w:numFmt")).get(qn("w:val"))
+ assert fmt == "lowerRoman", (
+ f"Roman numeral list should be lowerRoman, got {fmt}"
+ )
+
+ # ---- 4. Parenthesized lvlText for bracket lists ----
+
+ def test_paren_lists_use_parenthesized_lvl_text(self):
+ from docx.oxml.ns import qn
+
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ # Lists at indices 3, 6, 9, 12 are the paren lists
+ for start_idx in (3, 6, 9, 12):
+ num_id = numbered[start_idx][1][0]
+ ilvl = numbered[start_idx][1][1]
+ abstract = self._get_abstract_num_for(doc, num_id)
+ lvl = self._get_lvl(abstract, ilvl)
+ lvl_text = lvl.find(qn("w:lvlText")).get(qn("w:val"))
+ assert "(" in lvl_text and ")" in lvl_text, (
+ f"Item '{numbered[start_idx][0]}' (idx {start_idx}) should have "
+ f"parenthesized lvlText, got '{lvl_text}'"
+ )
+
+ def test_standard_decimal_uses_dot_lvl_text(self):
+ from docx.oxml.ns import qn
+
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ num_id = numbered[0][1][0]
+ ilvl = numbered[0][1][1]
+ abstract = self._get_abstract_num_for(doc, num_id)
+ lvl = self._get_lvl(abstract, ilvl)
+ lvl_text = lvl.find(qn("w:lvlText")).get(qn("w:val"))
+ assert "." in lvl_text, (
+ f"Standard decimal should use dot format, got '{lvl_text}'"
+ )
+
+ # ---- 5. Left-aligned justification ----
+
+ def test_all_lists_are_left_aligned(self):
+ from docx.oxml.ns import qn
+
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ checked = set()
+ for text, (num_id, ilvl) in numbered:
+ key = (num_id, ilvl)
+ if key in checked:
+ continue
+ checked.add(key)
+ abstract = self._get_abstract_num_for(doc, num_id)
+ lvl = self._get_lvl(abstract, ilvl)
+ jc = lvl.find(qn("w:lvlJc"))
+ assert jc is not None, f"'{text}' level should have lvlJc element"
+ assert jc.get(qn("w:val")) == "left", (
+ f"'{text}' should be left-aligned, got '{jc.get(qn('w:val'))}'"
+ )
+
+ # ---- 6. Text alignment consistency (hanging indent) ----
+
+ def test_items_within_each_list_share_same_hanging_indent(self):
+ """All items in a single list must use the same hanging indent so that
+ the text column is aligned even when numbering labels vary in width
+ (e.g. ``(i)`` vs ``(iii)``)."""
+ from docx.oxml.ns import qn
+
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+
+ for start_idx in range(0, 15, 3):
+ group = numbered[start_idx : start_idx + 3]
+ # All items in a group share the same numId + ilvl, so they share
+ # the same abstractNum level definition → same indent.
+ num_id = group[0][1][0]
+ ilvl = group[0][1][1]
+ abstract = self._get_abstract_num_for(doc, num_id)
+ lvl = self._get_lvl(abstract, ilvl)
+ pPr = lvl.find(qn("w:pPr"))
+ assert pPr is not None, f"Level {ilvl} should have pPr"
+ ind = pPr.find(qn("w:ind"))
+ assert ind is not None, f"Level {ilvl} should have indent"
+ hanging = ind.get(qn("w:hanging"))
+ left = ind.get(qn("w:left"))
+ assert hanging is not None, (
+ f"Hanging indent should be set for list starting at idx {start_idx}"
+ )
+ assert left is not None, (
+ f"Left indent should be set for list starting at idx {start_idx}"
+ )
+
+ def test_all_lists_share_same_hanging_indent(self):
+ """All lists at the same indentation level must use the same left and
+ hanging indent so that item text aligns at the same column regardless
+ of whether the list uses ``1.``, ``(a)``, ``(A)``, or ``(iii)`` labels."""
+ from docx.oxml.ns import qn
+
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+
+ indent_values = []
+ for start_idx in range(0, 15, 3):
+ num_id = numbered[start_idx][1][0]
+ ilvl = numbered[start_idx][1][1]
+ abstract = self._get_abstract_num_for(doc, num_id)
+ lvl = self._get_lvl(abstract, ilvl)
+ ind = lvl.find(qn("w:pPr")).find(qn("w:ind"))
+ hanging = ind.get(qn("w:hanging"))
+ left = ind.get(qn("w:left"))
+ indent_values.append((left, hanging))
+
+ first = indent_values[0]
+ for i, val in enumerate(indent_values):
+ assert val == first, (
+ f"List group {i} indent {val} differs from group 0 indent {first}; "
+ f"all lists must share the same indent for consistent text alignment"
+ )
+
+ def test_all_levels_use_tab_suffix(self):
+ """Each numbering level must use ```` so Word
+ inserts a tab (not a space) after the label. This ensures text aligns
+ at the left-indent position regardless of label width."""
+ from docx.oxml.ns import qn
+
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+
+ checked = set()
+ for text, (num_id, ilvl) in numbered:
+ key = (num_id, ilvl)
+ if key in checked:
+ continue
+ checked.add(key)
+ abstract = self._get_abstract_num_for(doc, num_id)
+ lvl = self._get_lvl(abstract, ilvl)
+ suff = lvl.find(qn("w:suff"))
+ assert suff is not None, f"'{text}' level should have a element"
+ assert suff.get(qn("w:val")) == "tab", (
+ f"'{text}' suffix should be 'tab', got '{suff.get(qn('w:val'))}'"
+ )
+
+
+class TestOrderedListStartOverride:
+ """Verify that ordered lists respect the start number from the markdown.
+
+ Markdown input:
+ # numbers
+ 1. one
+ 2. two
+ 3. three
+ # continuation of numbers
+ 4. four
+ 5. five
+ 6. six
+
+ Expected: two separate lists, each with 3 items.
+ The first list numbers 1, 2, 3; the second list numbers 4, 5, 6.
+ """
+
+ MARKDOWN = (
+ "# numbers\n"
+ "1. one\n"
+ "2. two\n"
+ "3. three\n"
+ "# continuation of numbers\n"
+ "4. four\n"
+ "5. five\n"
+ "6. six\n"
+ )
+
+ EXPECTED_ITEMS = [
+ ("one", 1),
+ ("two", 2),
+ ("three", 3),
+ ("four", 4),
+ ("five", 5),
+ ("six", 6),
+ ]
+
+ @staticmethod
+ def _get_numpr(paragraph):
+ from docx.oxml.ns import qn
+
+ pPr = paragraph._p.find(qn("w:pPr"))
+ if pPr is None:
+ return None
+ numPr = pPr.find(qn("w:numPr"))
+ if numPr is None:
+ return None
+ numId_el = numPr.find(qn("w:numId"))
+ ilvl_el = numPr.find(qn("w:ilvl"))
+ if numId_el is None or ilvl_el is None:
+ return None
+ return int(numId_el.get(qn("w:val"))), int(ilvl_el.get(qn("w:val")))
+
+ @staticmethod
+ def _get_start_val(doc, num_id, ilvl):
+ """Return the effective start value for a numbering instance.
+
+ Checks / first, then falls back to
+ the value in the abstract numbering level definition.
+ """
+ from docx.oxml.ns import qn
+
+ numbering = doc.part.numbering_part._element
+ for num_el in numbering.findall(qn("w:num")):
+ if int(num_el.get(qn("w:numId"))) != num_id:
+ continue
+ # Check for startOverride
+ for ovr in num_el.findall(qn("w:lvlOverride")):
+ if int(ovr.get(qn("w:ilvl"))) == ilvl:
+ start_ovr = ovr.find(qn("w:startOverride"))
+ if start_ovr is not None:
+ return int(start_ovr.get(qn("w:val")))
+ # Fall back to abstract num
+ abs_ref = num_el.find(qn("w:abstractNumId"))
+ abs_id = int(abs_ref.get(qn("w:val")))
+ for an in numbering.findall(qn("w:abstractNum")):
+ if int(an.get(qn("w:abstractNumId"))) == abs_id:
+ for lvl_el in an.findall(qn("w:lvl")):
+ if int(lvl_el.get(qn("w:ilvl"))) == ilvl:
+ start_el = lvl_el.find(qn("w:start"))
+ if start_el is not None:
+ return int(start_el.get(qn("w:val")))
+ return None
+
+ def _build_doc(self):
+ from docx import Document
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+ return doc
+
+ def _numbered_paragraphs(self, doc):
+ return [
+ (p.text.strip(), self._get_numpr(p))
+ for p in doc.paragraphs
+ if self._get_numpr(p)
+ ]
+
+ def test_produces_six_numbered_paragraphs(self):
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ assert len(numbered) == 6, (
+ f"Expected 6 numbered paragraphs, got {len(numbered)}: "
+ f"{[t for t, _ in numbered]}"
+ )
+
+ def test_two_distinct_lists(self):
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ num_ids = [numpr[0] for _, numpr in numbered]
+ distinct = list(dict.fromkeys(num_ids))
+ assert len(distinct) == 2, (
+ f"Expected 2 distinct numIds (two lists), got {len(distinct)}: {distinct}"
+ )
+
+ def test_each_list_has_three_items(self):
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ num_ids = [numpr[0] for _, numpr in numbered]
+ distinct = list(dict.fromkeys(num_ids))
+ first_count = sum(1 for n in num_ids if n == distinct[0])
+ second_count = sum(1 for n in num_ids if n == distinct[1])
+ assert first_count == 3, f"First list should have 3 items, got {first_count}"
+ assert second_count == 3, f"Second list should have 3 items, got {second_count}"
+
+ def test_item_text_matches(self):
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ for idx, (text, _) in enumerate(numbered):
+ expected_text = self.EXPECTED_ITEMS[idx][0]
+ assert text == expected_text, (
+ f"Item {idx}: expected text {expected_text!r}, got {text!r}"
+ )
+
+ def test_first_list_starts_at_one(self):
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ num_id, ilvl = numbered[0][1]
+ start = self._get_start_val(doc, num_id, ilvl)
+ assert start == 1, f"First list should start at 1, got {start}"
+
+ def test_second_list_starts_at_four(self):
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ num_id, ilvl = numbered[3][1]
+ start = self._get_start_val(doc, num_id, ilvl)
+ assert start == 4, f"Second list should start at 4, got {start}"
+
+ def test_effective_numbers_are_correct(self):
+ """Verify that the effective number for each item is correct by
+ checking the start value of its list and its position within the list."""
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+
+ for idx, (text, (num_id, ilvl)) in enumerate(numbered):
+ list_start = self._get_start_val(doc, num_id, ilvl)
+ position_in_list = sum(1 for i in range(idx) if numbered[i][1][0] == num_id)
+ effective_number = list_start + position_in_list
+ expected_number = self.EXPECTED_ITEMS[idx][1]
+ assert effective_number == expected_number, (
+ f"Item {idx} ({text!r}): expected number {expected_number}, "
+ f"got {effective_number} (list_start={list_start}, pos={position_in_list})"
+ )
+
+ def test_abstract_num_start_matches_override(self):
+ """The abstract numbering value must match the startOverride
+ so that renderers which ignore lvlOverride still produce correct
+ numbering."""
+ from docx.oxml.ns import qn
+
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ numbering = doc.part.numbering_part._element
+
+ # Second list (items 3-5) should have abstract start = 4
+ num_id, ilvl = numbered[3][1]
+ for num_el in numbering.findall(qn("w:num")):
+ if int(num_el.get(qn("w:numId"))) != num_id:
+ continue
+ abs_ref = num_el.find(qn("w:abstractNumId"))
+ abs_id = int(abs_ref.get(qn("w:val")))
+ for an in numbering.findall(qn("w:abstractNum")):
+ if int(an.get(qn("w:abstractNumId"))) == abs_id:
+ for lvl_el in an.findall(qn("w:lvl")):
+ if int(lvl_el.get(qn("w:ilvl"))) == ilvl:
+ start_el = lvl_el.find(qn("w:start"))
+ assert start_el is not None
+ assert int(start_el.get(qn("w:val"))) == 4, (
+ f"Abstract numbering start should be 4, "
+ f"got {start_el.get(qn('w:val'))}"
+ )
+
+
+class TestNestedNumberedListIndentation:
+ """Verify deeply nested numbered lists with mixed parenthesized formats.
+
+ Markdown input:
+ 1. one
+ (1) one
+ (a) one
+ (A) one
+ (i) one
+ (ii) two
+ (B) two
+ (i) one
+ (ii) two
+ (b) two
+ (A) one
+ (B) two
+ (2) two
+ (a) one
+ (b) two
+ 2. two
+ (1) one
+ (2) two
+
+ Expected nesting levels (ilvl):
+ 0 → 1. / 2.
+ 1 → (1) / (2)
+ 2 → (a) / (b)
+ 3 → (A) / (B)
+ 4 → (i) / (ii)
+
+ Each ilvl must have left indent = hanging * (ilvl + 1) where
+ hanging = 504 twips.
+ """
+
+ MARKDOWN = (
+ "# nested numbered lists\n"
+ "1. one\n"
+ " (1) one\n"
+ "\t (a) one\n"
+ "\t\t (A) one\n"
+ "\t\t\t (i) one\n"
+ "\t\t\t\t(ii) two\n"
+ "\t\t\t(B) two\n"
+ "\t\t\t (i) one\n"
+ "\t\t\t\t(ii) two\n"
+ "\t\t(b) two\n"
+ "\t\t (A) one\n"
+ "\t\t\t(B) two\n"
+ "\t(2) two\n"
+ "\t (a) one\n"
+ "\t\t(b) two\n"
+ "2. two\n"
+ " (1) one\n"
+ "\t(2) two\n"
+ )
+
+ # (expected_text, expected_ilvl, expected_num_fmt)
+ EXPECTED_ITEMS = [
+ ("one", 0, "decimal"), # 1.
+ ("one", 1, "decimal"), # (1)
+ ("one", 2, "lowerLetter"), # (a)
+ ("one", 3, "upperLetter"), # (A)
+ ("one", 4, "lowerRoman"), # (i)
+ ("two", 4, "lowerRoman"), # (ii)
+ ("two", 3, "upperLetter"), # (B)
+ ("one", 4, "lowerRoman"), # (i)
+ ("two", 4, "lowerRoman"), # (ii)
+ ("two", 2, "lowerLetter"), # (b)
+ ("one", 3, "upperLetter"), # (A)
+ ("two", 3, "upperLetter"), # (B)
+ ("two", 1, "decimal"), # (2)
+ ("one", 2, "lowerLetter"), # (a)
+ ("two", 2, "lowerLetter"), # (b)
+ ("two", 0, "decimal"), # 2.
+ ("one", 1, "decimal"), # (1)
+ ("two", 1, "decimal"), # (2)
+ ]
+
+ HANGING = 504 # _LIST_HANGING_INDENT
+
+ @staticmethod
+ def _get_numpr(paragraph):
+ from docx.oxml.ns import qn
+
+ pPr = paragraph._p.find(qn("w:pPr"))
+ if pPr is None:
+ return None
+ numPr = pPr.find(qn("w:numPr"))
+ if numPr is None:
+ return None
+ numId_el = numPr.find(qn("w:numId"))
+ ilvl_el = numPr.find(qn("w:ilvl"))
+ if numId_el is None or ilvl_el is None:
+ return None
+ return int(numId_el.get(qn("w:val"))), int(ilvl_el.get(qn("w:val")))
+
+ @staticmethod
+ def _get_abstract_num_for(doc, num_id):
+ from docx.oxml.ns import qn
+
+ numbering = doc.part.numbering_part._element
+ for num_el in numbering.findall(qn("w:num")):
+ if int(num_el.get(qn("w:numId"))) == num_id:
+ abstract_ref = num_el.find(qn("w:abstractNumId"))
+ abstract_id = int(abstract_ref.get(qn("w:val")))
+ for an in numbering.findall(qn("w:abstractNum")):
+ if int(an.get(qn("w:abstractNumId"))) == abstract_id:
+ return an
+ return None
+
+ @staticmethod
+ def _get_lvl(abstract, ilvl):
+ from docx.oxml.ns import qn
+
+ for lvl in abstract.findall(qn("w:lvl")):
+ if int(lvl.get(qn("w:ilvl"))) == ilvl:
+ return lvl
+ return None
+
+ def _build_doc(self):
+ from docx import Document
+
+ doc = Document()
+ parse_markdown_to_docx(doc, self.MARKDOWN)
+ return doc
+
+ def _numbered_paragraphs(self, doc):
+ return [
+ (p.text.strip(), self._get_numpr(p))
+ for p in doc.paragraphs
+ if self._get_numpr(p)
+ ]
+
+ def test_produces_eighteen_numbered_paragraphs(self):
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ assert len(numbered) == 18, (
+ f"Expected 18 numbered paragraphs, got {len(numbered)}: "
+ f"{[t for t, _ in numbered]}"
+ )
+
+ def test_item_text_matches_expected(self):
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ for idx, (text, _) in enumerate(numbered):
+ expected_text = self.EXPECTED_ITEMS[idx][0]
+ assert text == expected_text, (
+ f"Item {idx}: expected text {expected_text!r}, got {text!r}"
+ )
+
+ def test_ilvl_matches_expected(self):
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ for idx, (text, (num_id, ilvl)) in enumerate(numbered):
+ expected_ilvl = self.EXPECTED_ITEMS[idx][1]
+ assert ilvl == expected_ilvl, (
+ f"Item {idx} ({text!r}): expected ilvl={expected_ilvl}, got ilvl={ilvl}"
+ )
+
+ def test_num_fmt_matches_expected(self):
+ from docx.oxml.ns import qn
+
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ for idx, (text, (num_id, ilvl)) in enumerate(numbered):
+ expected_fmt = self.EXPECTED_ITEMS[idx][2]
+ abstract = self._get_abstract_num_for(doc, num_id)
+ lvl = self._get_lvl(abstract, ilvl)
+ fmt = lvl.find(qn("w:numFmt")).get(qn("w:val"))
+ assert fmt == expected_fmt, (
+ f"Item {idx} ({text!r}): expected numFmt={expected_fmt!r}, got {fmt!r}"
+ )
+
+ def test_left_indent_matches_ilvl(self):
+ from docx.oxml.ns import qn
+
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ for idx, (text, (num_id, ilvl)) in enumerate(numbered):
+ expected_left = self.HANGING * (ilvl + 1)
+ abstract = self._get_abstract_num_for(doc, num_id)
+ lvl = self._get_lvl(abstract, ilvl)
+ pPr = lvl.find(qn("w:pPr"))
+ assert pPr is not None, f"Item {idx} ({text!r}): missing pPr"
+ ind = pPr.find(qn("w:ind"))
+ assert ind is not None, f"Item {idx} ({text!r}): missing ind"
+ left = int(ind.get(qn("w:left")))
+ assert left == expected_left, (
+ f"Item {idx} ({text!r}, ilvl={ilvl}): "
+ f"expected left indent={expected_left}, got {left}"
+ )
+
+ def test_hanging_indent_is_consistent(self):
+ from docx.oxml.ns import qn
+
+ doc = self._build_doc()
+ numbered = self._numbered_paragraphs(doc)
+ for idx, (text, (num_id, ilvl)) in enumerate(numbered):
+ abstract = self._get_abstract_num_for(doc, num_id)
+ lvl = self._get_lvl(abstract, ilvl)
+ ind = lvl.find(qn("w:pPr")).find(qn("w:ind"))
+ hanging = int(ind.get(qn("w:hanging")))
+ assert hanging == self.HANGING, (
+ f"Item {idx} ({text!r}): expected hanging={self.HANGING}, got {hanging}"
+ )