diff --git a/docx2everything/converters/markdown_converter.py b/docx2everything/converters/markdown_converter.py index 0ff408f..559d52f 100644 --- a/docx2everything/converters/markdown_converter.py +++ b/docx2everything/converters/markdown_converter.py @@ -511,6 +511,9 @@ def parse_body_to_markdown(root, numbering_info=None, hyperlinks=None, images=No markdown_parts = [] body = root.find(qn('w:body')) + if body is None and root.tag in (qn('w:hdr'), qn('w:ftr')): + body = root + if body is None: return '' @@ -616,7 +619,9 @@ def convert_to_markdown(zipf, filelist, img_dir=None): try: header_xml = zipf.read(fname) header_root = ET.fromstring(header_xml) - header_md = parse_body_to_markdown(header_root, numbering_info, hyperlinks, images, img_dir, zipf, footnotes=footnotes, endnotes=endnotes, comments=comments, styles_info=styles_info, charts=charts) + header_rels = 'word/_rels/' + os.path.basename(fname) + '.rels' + header_hyperlinks, header_images = parse_relationships(zipf, header_rels) + header_md = parse_body_to_markdown(header_root, numbering_info, header_hyperlinks, header_images, img_dir, zipf, footnotes=footnotes, endnotes=endnotes, comments=comments, styles_info=styles_info, charts=charts) if header_md: markdown_parts.append(header_md) except Exception: @@ -641,7 +646,9 @@ def convert_to_markdown(zipf, filelist, img_dir=None): try: footer_xml = zipf.read(fname) footer_root = ET.fromstring(footer_xml) - footer_md = parse_body_to_markdown(footer_root, numbering_info, hyperlinks, images, img_dir, zipf, footnotes=footnotes, endnotes=endnotes, comments=comments, styles_info=styles_info, charts=charts) + footer_rels = 'word/_rels/' + os.path.basename(fname) + '.rels' + footer_hyperlinks, footer_images = parse_relationships(zipf, footer_rels) + footer_md = parse_body_to_markdown(footer_root, numbering_info, footer_hyperlinks, footer_images, img_dir, zipf, footnotes=footnotes, endnotes=endnotes, comments=comments, styles_info=styles_info, charts=charts) if footer_md: markdown_parts.append(footer_md) except Exception: diff --git a/tests/test_markdown_header_footer.py b/tests/test_markdown_header_footer.py new file mode 100644 index 0000000..43d016d --- /dev/null +++ b/tests/test_markdown_header_footer.py @@ -0,0 +1,48 @@ +import xml.etree.ElementTree as ET + +from docx2everything.converters.markdown_converter import parse_body_to_markdown + + +W_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main" + + +def xml_root(xml): + return ET.fromstring(xml) + + +def test_header_root_is_converted_to_markdown(): + root = xml_root(f""" + + Header text + + """) + + markdown = parse_body_to_markdown(root) + + assert markdown == "Header text" + + +def test_footer_root_is_converted_to_markdown(): + root = xml_root(f""" + + Footer text + + """) + + markdown = parse_body_to_markdown(root) + + assert markdown == "Footer text" + + +def test_document_body_root_still_converts_to_markdown(): + root = xml_root(f""" + + + Body text + + + """) + + markdown = parse_body_to_markdown(root) + + assert markdown == "Body text" diff --git a/tests/test_markdown_header_footer_relationships.py b/tests/test_markdown_header_footer_relationships.py new file mode 100644 index 0000000..08a9caa --- /dev/null +++ b/tests/test_markdown_header_footer_relationships.py @@ -0,0 +1,80 @@ +import io +import zipfile + +from docx2everything.converters.markdown_converter import convert_to_markdown + + +def make_docx(parts): + buffer = io.BytesIO() + with zipfile.ZipFile(buffer, "w") as zipf: + for name, content in parts.items(): + zipf.writestr(name, content) + buffer.seek(0) + return zipfile.ZipFile(buffer) + + +def test_header_hyperlink_uses_header_relationships(): + docx = make_docx({ + "word/document.xml": """ + + + + """, + "word/header1.xml": """ + + + + Header link + + + + """, + "word/_rels/header1.xml.rels": """ + + + + """, + }) + + markdown = convert_to_markdown(docx, docx.namelist()) + + assert markdown == "[Header link](https://example.com/header)" + + +def test_footer_image_uses_footer_relationships(): + docx = make_docx({ + "word/document.xml": """ + + + + """, + "word/footer1.xml": """ + + + + + + + + + + """, + "word/_rels/footer1.xml.rels": """ + + + + """, + }) + + markdown = convert_to_markdown(docx, docx.namelist()) + + assert markdown == "![footer-image.png](media/footer-image.png)"