wip

avvertix · avvertix · commit b7c374287bc9 · 2026-02-18T11:21:28.000+01:00
diff --git a/src/parxy_core/models/models.py b/src/parxy_core/models/models.py
@@ -165,17 +165,16 @@ def contentmd(
     ) -> str:
         """Get the document content formatted as content-md.
 
-        Generates a content-md string: YAML frontmatter followed by Markdown.
-        Per the spec, all heading levels are shifted up by one so the document
-        title occupies the sole h1, and images use ``<figure>`` blocks.
+        Delegates to :class:`~parxy_core.services.ContentMdService`.
 
         Parameters
         ----------
         title : str, optional
-            Document title. Falls back to metadata.title, then filename.
+            Document title. Falls back to metadata.title, a heading inferred
+            from the first page, filename, then 'Untitled'.
         description : str, optional
-            Short summary (~200 characters). Required by the spec; omitted from
-            frontmatter when not provided.
+            Short summary (~200 characters). Falls back to a doc-abstract block,
+            then the longest TextBlock across the first two pages.
         date : str, optional
             Creation/publication date in ISO 8601. Falls back to metadata dates.
         license : str, optional
@@ -188,133 +187,16 @@ def contentmd(
         str
             The document content formatted as content-md.
         """
-        def _guess_title_from_first_page() -> Optional[str]:
-            if not self.pages:
-                return None
-            first_page = self.pages[0]
-            if not first_page.blocks:
-                return None
-            # Prefer an explicit doc-title block, then the highest-ranking heading
-            doc_title = next(
-                (
-                    b
-                    for b in first_page.blocks
-                    if isinstance(b, TextBlock)
-                    and b.role == 'doc-title'
-                    and b.text.strip()
-                ),
-                None,
-            )
-            if doc_title:
-                return _normalize(doc_title.text)
-            headings = [
-                b
-                for b in first_page.blocks
-                if isinstance(b, TextBlock)
-                and b.role == 'heading'
-                and b.text.strip()
-            ]
-            if not headings:
-                return None
-            return _normalize(min(headings, key=lambda b: b.level or 1).text)
-
-        resolved_title = (
-            title
-            or (self.metadata.title if self.metadata else None)
-            or _guess_title_from_first_page()
-            or self.filename
-            or 'Untitled'
+        from parxy_core.services.contentmd_service import ContentMdService
+
+        return ContentMdService.render(
+            self,
+            title=title,
+            description=description,
+            date=date,
+            license=license,
+            author=author,
         )
-        resolved_date = date or (
-            (self.metadata.created_at or self.metadata.updated_at)
-            if self.metadata
-            else None
-        )
-        resolved_author = author or (self.metadata.author if self.metadata else None)
-
-        def _infer_description() -> Optional[str]:
-            first_two_pages = self.pages[:2]
-            blocks = [
-                b
-                for page in first_two_pages
-                if page.blocks
-                for b in page.blocks
-                if isinstance(b, TextBlock) and b.text.strip()
-            ]
-            abstract = next((b for b in blocks if b.role == 'doc-abstract'), None)
-            if abstract:
-                return _normalize(abstract.text)
-            text_blocks = [b for b in blocks if b.role != 'doc-title']
-            if not text_blocks:
-                return None
-            return _normalize(max(text_blocks, key=lambda b: len(b.text)).text)
-
-        resolved_description = description or _infer_description()
-
-        def _normalize(text: str) -> str:
-            """Collapse runs of whitespace to a single space and strip."""
-            return ' '.join(text.split())
-
-        def _yaml_str(v: str) -> str:
-            return '"' + v.replace('\\', '\\\\').replace('"', '\\"') + '"'
-
-        fm = ['---', f'title: {_yaml_str(resolved_title)}']
-        if resolved_description:
-            fm.append(f'description: {_yaml_str(resolved_description)}')
-        if resolved_date:
-            fm.append(f'date: {_yaml_str(resolved_date)}')
-        if license:
-            fm.append(f'license: {_yaml_str(license)}')
-        if resolved_author:
-            fm.append(f'author: {_yaml_str(resolved_author)}')
-        fm.append('---')
-        frontmatter = '\n'.join(fm)
-
-        if not self.pages:
-            return f'{frontmatter}\n\n# {resolved_title}\n'
-
-        parts = [f'# {resolved_title}']
-
-        for page in self.pages:
-            if not page.blocks:
-                if page.text.strip():
-                    parts.append(_normalize(page.text))
-                continue
-
-            for block in page.blocks:
-                role = (block.role or 'generic').lower()
-
-                if isinstance(block, TextBlock):
-                    if role == 'doc-title':
-                        # Already rendered as the top-level # heading — skip
-                        pass
-                    elif role == 'heading':
-                        # Shift all heading levels by +1 so h1 content becomes h2
-                        shifted = min((block.level or 1) + 1, 6)
-                        parts.append(f'{"#" * shifted} {_normalize(block.text)}')
-                    elif role in ('list', 'listitem'):
-                        for line in block.text.splitlines():
-                            if line.strip():
-                                parts.append(f'- {_normalize(line)}')
-                    elif role == 'doc-abstract':
-                        lang_attr = f' lang="{self.language}"' if self.language else ''
-                        parts.append(
-                            f'<abstract{lang_attr}>\n{_normalize(block.text)}\n</abstract>'
-                        )
-                    else:
-                        normalized = _normalize(block.text)
-                        if normalized:
-                            parts.append(normalized)
-
-                elif isinstance(block, ImageBlock):
-                    alt = block.alt_text or ''
-                    parts.append(f'<figure>\n{alt}\n</figure>')
-
-                elif isinstance(block, TableBlock):
-                    if block.text.strip():
-                        parts.append(block.text.strip())
-
-        return f'{frontmatter}\n\n' + '\n\n'.join(parts) + '\n'
 
     def markdown(self) -> str:
         """Get the document content formatted as Markdown.
diff --git a/src/parxy_core/services/__init__.py b/src/parxy_core/services/__init__.py
@@ -1,5 +1,6 @@
 """Services module for parxy_core."""
 
+from parxy_core.services.contentmd_service import ContentMdService
 from parxy_core.services.pdf_service import PdfService
 
-__all__ = ['PdfService']
+__all__ = ['ContentMdService', 'PdfService']
diff --git a/src/parxy_core/services/contentmd_service.py b/src/parxy_core/services/contentmd_service.py