From b39c3fb21026ef5de1195939bb1f817bc1a2972e Mon Sep 17 00:00:00 2001
From: Bruce-anle <840596168@qq.com>
Date: Sat, 9 May 2026 00:52:11 +0800
Subject: [PATCH 1/2] fix: convert markdown headers and footers
Background: convert_to_markdown reads word/header*.xml and word/footer*.xml, but passed w:hdr/w:ftr roots to parse_body_to_markdown. That parser only looked for w:body, so header/footer content was skipped.\n\nChanges: allow parse_body_to_markdown to traverse w:hdr and w:ftr roots directly while preserving normal w:document/w:body behavior.\n\nVerification: /home/brucean/doc4agent/.venv/bin/python -m pytest tests -q -p no:cacheprovider passed.
---
.../converters/markdown_converter.py | 3 ++
tests/test_markdown_header_footer.py | 48 +++++++++++++++++++
2 files changed, 51 insertions(+)
create mode 100644 tests/test_markdown_header_footer.py
diff --git a/docx2everything/converters/markdown_converter.py b/docx2everything/converters/markdown_converter.py
index 0ff408f..e7c0842 100644
--- a/docx2everything/converters/markdown_converter.py
+++ b/docx2everything/converters/markdown_converter.py
@@ -511,6 +511,9 @@ def parse_body_to_markdown(root, numbering_info=None, hyperlinks=None, images=No
markdown_parts = []
body = root.find(qn('w:body'))
+ if body is None and root.tag in (qn('w:hdr'), qn('w:ftr')):
+ body = root
+
if body is None:
return ''
diff --git a/tests/test_markdown_header_footer.py b/tests/test_markdown_header_footer.py
new file mode 100644
index 0000000..43d016d
--- /dev/null
+++ b/tests/test_markdown_header_footer.py
@@ -0,0 +1,48 @@
+import xml.etree.ElementTree as ET
+
+from docx2everything.converters.markdown_converter import parse_body_to_markdown
+
+
+W_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
+
+
+def xml_root(xml):
+ return ET.fromstring(xml)
+
+
+def test_header_root_is_converted_to_markdown():
+ root = xml_root(f"""
+
+ Header text
+
+ """)
+
+ markdown = parse_body_to_markdown(root)
+
+ assert markdown == "Header text"
+
+
+def test_footer_root_is_converted_to_markdown():
+ root = xml_root(f"""
+
+ Footer text
+
+ """)
+
+ markdown = parse_body_to_markdown(root)
+
+ assert markdown == "Footer text"
+
+
+def test_document_body_root_still_converts_to_markdown():
+ root = xml_root(f"""
+
+
+ Body text
+
+
+ """)
+
+ markdown = parse_body_to_markdown(root)
+
+ assert markdown == "Body text"
From f22671e447b08baadfc9b384ec0e29b49d761356 Mon Sep 17 00:00:00 2001
From: Bruce-anle <840596168@qq.com>
Date: Sat, 9 May 2026 01:10:55 +0800
Subject: [PATCH 2/2] fix: use header and footer relationships
Background: header and footer parts have their own relationship files. The markdown converter parsed header/footer XML but still used document.xml relationships, so header/footer hyperlinks and images could not resolve.\n\nChanges: load word/_rels/header*.xml.rels and word/_rels/footer*.xml.rels while parsing each header/footer part.\n\nVerification: /home/brucean/doc4agent/.venv/bin/python -m pytest tests -q -p no:cacheprovider passed.\n\nNote: this branch is stacked on the header/footer markdown parsing fix.
---
.../converters/markdown_converter.py | 8 +-
...st_markdown_header_footer_relationships.py | 80 +++++++++++++++++++
2 files changed, 86 insertions(+), 2 deletions(-)
create mode 100644 tests/test_markdown_header_footer_relationships.py
diff --git a/docx2everything/converters/markdown_converter.py b/docx2everything/converters/markdown_converter.py
index e7c0842..559d52f 100644
--- a/docx2everything/converters/markdown_converter.py
+++ b/docx2everything/converters/markdown_converter.py
@@ -619,7 +619,9 @@ def convert_to_markdown(zipf, filelist, img_dir=None):
try:
header_xml = zipf.read(fname)
header_root = ET.fromstring(header_xml)
- header_md = parse_body_to_markdown(header_root, numbering_info, hyperlinks, images, img_dir, zipf, footnotes=footnotes, endnotes=endnotes, comments=comments, styles_info=styles_info, charts=charts)
+ header_rels = 'word/_rels/' + os.path.basename(fname) + '.rels'
+ header_hyperlinks, header_images = parse_relationships(zipf, header_rels)
+ header_md = parse_body_to_markdown(header_root, numbering_info, header_hyperlinks, header_images, img_dir, zipf, footnotes=footnotes, endnotes=endnotes, comments=comments, styles_info=styles_info, charts=charts)
if header_md:
markdown_parts.append(header_md)
except Exception:
@@ -644,7 +646,9 @@ def convert_to_markdown(zipf, filelist, img_dir=None):
try:
footer_xml = zipf.read(fname)
footer_root = ET.fromstring(footer_xml)
- footer_md = parse_body_to_markdown(footer_root, numbering_info, hyperlinks, images, img_dir, zipf, footnotes=footnotes, endnotes=endnotes, comments=comments, styles_info=styles_info, charts=charts)
+ footer_rels = 'word/_rels/' + os.path.basename(fname) + '.rels'
+ footer_hyperlinks, footer_images = parse_relationships(zipf, footer_rels)
+ footer_md = parse_body_to_markdown(footer_root, numbering_info, footer_hyperlinks, footer_images, img_dir, zipf, footnotes=footnotes, endnotes=endnotes, comments=comments, styles_info=styles_info, charts=charts)
if footer_md:
markdown_parts.append(footer_md)
except Exception:
diff --git a/tests/test_markdown_header_footer_relationships.py b/tests/test_markdown_header_footer_relationships.py
new file mode 100644
index 0000000..08a9caa
--- /dev/null
+++ b/tests/test_markdown_header_footer_relationships.py
@@ -0,0 +1,80 @@
+import io
+import zipfile
+
+from docx2everything.converters.markdown_converter import convert_to_markdown
+
+
+def make_docx(parts):
+ buffer = io.BytesIO()
+ with zipfile.ZipFile(buffer, "w") as zipf:
+ for name, content in parts.items():
+ zipf.writestr(name, content)
+ buffer.seek(0)
+ return zipfile.ZipFile(buffer)
+
+
+def test_header_hyperlink_uses_header_relationships():
+ docx = make_docx({
+ "word/document.xml": """
+
+
+
+ """,
+ "word/header1.xml": """
+
+
+
+ Header link
+
+
+
+ """,
+ "word/_rels/header1.xml.rels": """
+
+
+
+ """,
+ })
+
+ markdown = convert_to_markdown(docx, docx.namelist())
+
+ assert markdown == "[Header link](https://example.com/header)"
+
+
+def test_footer_image_uses_footer_relationships():
+ docx = make_docx({
+ "word/document.xml": """
+
+
+
+ """,
+ "word/footer1.xml": """
+
+
+
+
+
+
+
+
+
+ """,
+ "word/_rels/footer1.xml.rels": """
+
+
+
+ """,
+ })
+
+ markdown = convert_to_markdown(docx, docx.namelist())
+
+ assert markdown == ""