re-add html to markdown function, fix tests

jnptk · jnptk · commit a79333d4b856 · 2026-05-21T12:15:41.000+02:00
diff --git a/src/plone/restapi/renderer/markdown.py b/src/plone/restapi/renderer/markdown.py
@@ -8,9 +8,11 @@
 from zope.interface import Interface
 
 import json
+import re
 import yaml
 
 EXCLUDED_FROM_FRONTMATTER = {"blocks", "blocks_layout"}
+BODY_FIELDS = {"text", "description"}
 
 
 @implementer(IRenderer)
@@ -39,6 +41,13 @@ def __call__(self, data):
                     body_parts.extend(
                         self._render_blocks(value, data.get("blocks_layout", []))
                     )
+            elif key in BODY_FIELDS:
+                # These go into the body, not frontmatter
+                if key == "text" and isinstance(value, dict):
+                    # RichText field structure
+                    body_parts.append(self._render_richtext(value))
+                elif value:
+                    body_parts.append(str(value))
             else:
                 frontmatter[key] = value
 
@@ -54,11 +63,147 @@ def __call__(self, data):
             parts.append("---")
             parts.append("")
 
+        # Add title as H1 if present
+        # TODO: this makes content objects with the blocks behavior have two titles
+        # because of the title block converter
+        if "title" in data and data["title"]:
+            parts.append(f"# {data['title']}")
+            parts.append("")
+
         if body_parts:
             parts.extend(body_parts)
 
         return "\n".join(parts)
 
+    def _html_to_markdown(self, html):
+        """Convert HTML to GitHub Flavored Markdown.
+
+        This is a basic implementation. For production use, consider using
+        a library like 'markdownify' or 'html2text'.
+        """
+        if not html:
+            return ""
+
+        text = html
+
+        # Convert headings
+        text = re.sub(
+            r"<h1[^>]*>(.*?)</h1>", r"# \1", text, flags=re.IGNORECASE | re.DOTALL
+        )
+        text = re.sub(
+            r"<h2[^>]*>(.*?)</h2>", r"## \1", text, flags=re.IGNORECASE | re.DOTALL
+        )
+        text = re.sub(
+            r"<h3[^>]*>(.*?)</h3>", r"### \1", text, flags=re.IGNORECASE | re.DOTALL
+        )
+        text = re.sub(
+            r"<h4[^>]*>(.*?)</h4>", r"#### \1", text, flags=re.IGNORECASE | re.DOTALL
+        )
+        text = re.sub(
+            r"<h5[^>]*>(.*?)</h5>", r"##### \1", text, flags=re.IGNORECASE | re.DOTALL
+        )
+        text = re.sub(
+            r"<h6[^>]*>(.*?)</h6>", r"###### \1", text, flags=re.IGNORECASE | re.DOTALL
+        )
+
+        # Convert bold and italic
+        text = re.sub(
+            r"<strong[^>]*>(.*?)</strong>",
+            r"**\1**",
+            text,
+            flags=re.IGNORECASE | re.DOTALL,
+        )
+        text = re.sub(
+            r"<b[^>]*>(.*?)</b>", r"**\1**", text, flags=re.IGNORECASE | re.DOTALL
+        )
+        text = re.sub(
+            r"<em[^>]*>(.*?)</em>", r"*\1*", text, flags=re.IGNORECASE | re.DOTALL
+        )
+        text = re.sub(
+            r"<i[^>]*>(.*?)</i>", r"*\1*", text, flags=re.IGNORECASE | re.DOTALL
+        )
+
+        # Convert links
+        text = re.sub(
+            r'<a[^>]*href=["\']([^"\']+)["\'][^>]*>(.*?)</a>',
+            r"[\2](\1)",
+            text,
+            flags=re.IGNORECASE | re.DOTALL,
+        )
+
+        # Convert images
+        text = re.sub(
+            r'<img[^>]*src=["\']([^"\']+)["\'][^>]*alt=["\']([^"\']*)["\'][^>]*>',
+            r"![\2](\1)",
+            text,
+            flags=re.IGNORECASE,
+        )
+        text = re.sub(
+            r'<img[^>]*alt=["\']([^"\']*)["\'][^>]*src=["\']([^"\']+)["\'][^>]*>',
+            r"![\1](\2)",
+            text,
+            flags=re.IGNORECASE,
+        )
+        text = re.sub(
+            r'<img[^>]*src=["\']([^"\']+)["\'][^>]*>',
+            r"![](\1)",
+            text,
+            flags=re.IGNORECASE,
+        )
+
+        # Convert lists
+        text = re.sub(r"<ul[^>]*>", "", text, flags=re.IGNORECASE)
+        text = re.sub(r"</ul>", "\n", text, flags=re.IGNORECASE)
+        text = re.sub(r"<ol[^>]*>", "", text, flags=re.IGNORECASE)
+        text = re.sub(r"</ol>", "\n", text, flags=re.IGNORECASE)
+        text = re.sub(
+            r"<li[^>]*>(.*?)</li>", r"- \1", text, flags=re.IGNORECASE | re.DOTALL
+        )
+
+        # Convert paragraphs
+        text = re.sub(
+            r"<p[^>]*>(.*?)</p>", r"\1\n\n", text, flags=re.IGNORECASE | re.DOTALL
+        )
+
+        # Convert line breaks
+        text = re.sub(r"<br\s*/?>", "\n", text, flags=re.IGNORECASE)
+
+        # Convert code
+        text = re.sub(
+            r"<code[^>]*>(.*?)</code>", r"`\1`", text, flags=re.IGNORECASE | re.DOTALL
+        )
+        text = re.sub(
+            r"<pre[^>]*>(.*?)</pre>",
+            r"```\n\1\n```",
+            text,
+            flags=re.IGNORECASE | re.DOTALL,
+        )
+
+        # Remove remaining HTML tags
+        text = re.sub(r"<[^>]+>", "", text)
+
+        # Clean up whitespace
+        text = re.sub(r"\n{3,}", "\n\n", text)
+        text = text.strip()
+
+        return text
+
+    def _render_richtext(self, richtext_data):
+        """Convert a RichText field to Markdown."""
+        if not isinstance(richtext_data, dict):
+            return str(richtext_data)
+
+        content = richtext_data.get("data", "")
+        content_type = richtext_data.get("content-type", "text/plain")
+
+        if content_type == "text/html":
+            return self._html_to_markdown(content)
+        elif content_type == "text/plain":
+            return content
+        else:
+            # Unknown content type, return as-is
+            return content
+
     def _render_blocks(self, blocks: dict, blocks_layout: list) -> list[str]:
         """Convert Volto blocks to Markdown.
 
diff --git a/src/plone/restapi/tests/test_renderer_markdown.py b/src/plone/restapi/tests/test_renderer_markdown.py
@@ -1,5 +1,7 @@
 """Tests for Markdown renderer."""
 
+from simplejson.compat import b
+
 from plone.app.testing import login
 from plone.app.testing import setRoles
 from plone.app.testing import SITE_OWNER_NAME
@@ -67,8 +69,8 @@ def test_markdown_basic_rendering(self):
 
         # Check for YAML frontmatter
         self.assertIn("---", content)
-        self.assertIn("@id:", content)
-        self.assertIn("@type: Document", content)
+        self.assertIn("'@id':", content)
+        self.assertIn("'@type': Document", content)
         self.assertIn("title: Test Document", content)
 
         # Check for title as H1
@@ -100,6 +102,7 @@ def test_default_to_json(self):
         )
         response = requests.get(
             url,
+            headers={"Accept": "*/*"},
             auth=(SITE_OWNER_NAME, SITE_OWNER_PASSWORD),
         )