Skip to content

Commit 57476f3

Browse files
committed
wip
1 parent 5d7fd9c commit 57476f3

2 files changed

Lines changed: 53 additions & 16 deletions

File tree

src/parxy_core/services/contentmd_service.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -207,10 +207,11 @@ def render(
207207
The document to render.
208208
title:
209209
Document title. Falls back to ``metadata.title``, a heading
210-
inferred from the first page, ``filename``, then ``'Untitled'``.
210+
inferred from the first page, then ``filename``. Raises
211+
``ValueError`` if no title can be resolved.
211212
description:
212213
Short summary (~200 characters). Falls back to a ``doc-abstract``
213-
block, then the longest :class:`TextBlock` in the first two pages.
214+
block, then the first five body blocks in the first two pages.
214215
date:
215216
Creation/publication date in ISO 8601. Falls back to
216217
``metadata.created_at`` / ``metadata.updated_at``.
@@ -229,8 +230,13 @@ def render(
229230
or (document.metadata.title if document.metadata else None)
230231
or ContentMdService._guess_title(document)
231232
or document.filename
232-
or 'Untitled'
233233
)
234+
if not resolved_title:
235+
raise ValueError(
236+
'Cannot render content-md: no title could be resolved. '
237+
'Provide a title via metadata, a doc-title/heading block, '
238+
'a filename, or pass title= explicitly.'
239+
)
234240
resolved_description = description or ContentMdService._infer_description(
235241
document
236242
)

tests/services/test_contentmd_service.py

Lines changed: 44 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -149,10 +149,10 @@ def test_title_from_filename_when_no_headings(self):
149149
result = ContentMdService.render(doc)
150150
assert 'title: "my-report.pdf"' in result
151151

152-
def test_title_fallback_to_untitled(self):
152+
def test_title_raises_when_unresolvable(self):
153153
doc = make_doc(pages=[make_page(text='body text')])
154-
result = ContentMdService.render(doc)
155-
assert 'title: "Untitled"' in result
154+
with pytest.raises(ValueError, match='no title could be resolved'):
155+
ContentMdService.render(doc)
156156

157157
def test_description_from_explicit_param(self, minimal_doc):
158158
result = ContentMdService.render(
@@ -168,13 +168,13 @@ def test_description_from_doc_abstract_block(self):
168168
),
169169
]
170170
doc = make_doc(pages=[make_page(text='', blocks=blocks)])
171-
result = ContentMdService.render(doc)
171+
result = ContentMdService.render(doc, title='T')
172172
assert 'description: "Abstract content here."' in result
173173

174174
def test_description_from_first_five_body_blocks(self):
175175
blocks = [make_text_block(f'Sentence {i}.', role='paragraph') for i in range(7)]
176176
doc = make_doc(pages=[make_page(text='', blocks=blocks)])
177-
result = ContentMdService.render(doc)
177+
result = ContentMdService.render(doc, title='T')
178178
# Only the first five contribute; the sixth and seventh are ignored
179179
assert 'Sentence 5' not in result.split('---\n')[1].split('\n')[0]
180180
assert 'Sentence 0' in result
@@ -194,21 +194,27 @@ def test_description_truncated_to_200_chars(self):
194194
long_text = 'word ' * 60 # well over 200 chars
195195
blocks = [make_text_block(long_text, role='paragraph')]
196196
doc = make_doc(pages=[make_page(text='', blocks=blocks)])
197-
result = ContentMdService.render(doc)
197+
result = ContentMdService.render(doc, title='T')
198198
fm_end = result.index('---\n', 4)
199199
frontmatter = result[:fm_end]
200-
desc_line = next(l for l in frontmatter.splitlines() if l.startswith('description:'))
200+
desc_line = next(
201+
l for l in frontmatter.splitlines() if l.startswith('description:')
202+
)
201203
# Strip the YAML quoting to measure the actual value length
202-
value = desc_line[len('description: "'):-1]
204+
value = desc_line[len('description: "') : -1]
203205
assert len(value) <= 200
204206

205207
def test_description_contains_no_newlines(self):
206-
blocks = [make_text_block('Line one.\nLine two.\nLine three.', role='paragraph')]
208+
blocks = [
209+
make_text_block('Line one.\nLine two.\nLine three.', role='paragraph')
210+
]
207211
doc = make_doc(pages=[make_page(text='', blocks=blocks)])
208-
result = ContentMdService.render(doc)
212+
result = ContentMdService.render(doc, title='T')
209213
fm_end = result.index('---\n', 4)
210214
frontmatter = result[:fm_end]
211-
desc_line = next(l for l in frontmatter.splitlines() if l.startswith('description:'))
215+
desc_line = next(
216+
l for l in frontmatter.splitlines() if l.startswith('description:')
217+
)
212218
assert '\n' not in desc_line
213219

214220
def test_description_searches_first_two_pages(self):
@@ -224,7 +230,7 @@ def test_description_searches_first_two_pages(self):
224230
blocks=[make_text_block('Page 3 has the longest block of all by far.')],
225231
)
226232
doc = make_doc(pages=[page1, page2, page3])
227-
result = ContentMdService.render(doc)
233+
result = ContentMdService.render(doc, title='T')
228234
# Page 3 is out of the two-page window
229235
assert 'Page 3' not in result.split('---')[1] # not in frontmatter
230236

@@ -235,7 +241,7 @@ def test_date_from_metadata_created_at(self, metadata_doc):
235241
def test_date_from_metadata_updated_at_when_no_created_at(self):
236242
meta = Metadata(updated_at='2025-06-01')
237243
doc = make_doc(pages=[make_page(text='')], metadata=meta)
238-
result = ContentMdService.render(doc)
244+
result = ContentMdService.render(doc, title='T')
239245
assert 'date: "2025-06-01"' in result
240246

241247
def test_explicit_date_overrides_metadata(self, metadata_doc):
@@ -472,3 +478,28 @@ def test_render_delegates_from_document_method(self, metadata_doc):
472478
via_service = ContentMdService.render(metadata_doc)
473479
via_method = metadata_doc.contentmd()
474480
assert via_service == via_method
481+
482+
def test_empty_document_without_args_raises(self):
483+
"""A document with no metadata, no blocks, no filename, and no user
484+
arguments cannot satisfy the required title constraint."""
485+
doc = Document(pages=[])
486+
with pytest.raises(ValueError, match='no title could be resolved'):
487+
ContentMdService.render(doc)
488+
489+
def test_empty_document_with_title_arg_returns_contentmd(self):
490+
"""Passing title= explicitly must succeed even when the document is
491+
completely empty."""
492+
doc = Document(pages=[])
493+
result = ContentMdService.render(doc, title='Provided Title')
494+
assert 'title: "Provided Title"' in result
495+
assert '# Provided Title' in result
496+
497+
def test_empty_document_with_title_and_description_returns_contentmd(self):
498+
"""Both title= and description= passed explicitly on an empty document."""
499+
doc = Document(pages=[])
500+
result = ContentMdService.render(
501+
doc, title='My Title', description='My description.'
502+
)
503+
assert 'title: "My Title"' in result
504+
assert 'description: "My description."' in result
505+
assert result.endswith('\n')

0 commit comments

Comments
 (0)