@@ -171,27 +171,45 @@ def test_description_from_doc_abstract_block(self):
171171 result = ContentMdService .render (doc )
172172 assert 'description: "Abstract content here."' in result
173173
174- def test_description_from_longest_textblock_when_no_abstract (self ):
175- blocks = [
176- make_text_block ('Short.' , role = 'paragraph' ),
177- make_text_block (
178- 'This is a considerably longer paragraph block.' , role = 'paragraph'
179- ),
180- ]
174+ def test_description_from_first_five_body_blocks (self ):
175+ blocks = [make_text_block (f'Sentence { i } .' , role = 'paragraph' ) for i in range (7 )]
181176 doc = make_doc (pages = [make_page (text = '' , blocks = blocks )])
182177 result = ContentMdService .render (doc )
183- assert 'description: "This is a considerably longer paragraph block."' in result
178+ # Only the first five contribute; the sixth and seventh are ignored
179+ assert 'Sentence 5' not in result .split ('---\n ' )[1 ].split ('\n ' )[0 ]
180+ assert 'Sentence 0' in result
184181
185- def test_description_excludes_doc_title_from_longest_candidate (self ):
182+ def test_description_excludes_structural_roles (self ):
186183 blocks = [
187- make_text_block (
188- 'This is a very long doc-title block text.' , role = 'doc-title'
189- ),
190- make_text_block ('Shorter paragraph .' , role = 'paragraph' ),
184+ make_text_block ('Table of contents text.' , role = 'doc-toc' ),
185+ make_text_block ( 'Page header text.' , role = 'doc-pageheader' ),
186+ make_text_block ( 'A heading block.' , role = 'heading' ),
187+ make_text_block ('Body content .' , role = 'paragraph' ),
191188 ]
192189 doc = make_doc (pages = [make_page (text = '' , blocks = blocks )])
193190 result = ContentMdService .render (doc )
194- assert 'description: "Shorter paragraph."' in result
191+ assert 'description: "Body content."' in result
192+
193+ def test_description_truncated_to_200_chars (self ):
194+ long_text = 'word ' * 60 # well over 200 chars
195+ blocks = [make_text_block (long_text , role = 'paragraph' )]
196+ doc = make_doc (pages = [make_page (text = '' , blocks = blocks )])
197+ result = ContentMdService .render (doc )
198+ fm_end = result .index ('---\n ' , 4 )
199+ frontmatter = result [:fm_end ]
200+ desc_line = next (l for l in frontmatter .splitlines () if l .startswith ('description:' ))
201+ # Strip the YAML quoting to measure the actual value length
202+ value = desc_line [len ('description: "' ):- 1 ]
203+ assert len (value ) <= 200
204+
205+ def test_description_contains_no_newlines (self ):
206+ blocks = [make_text_block ('Line one.\n Line two.\n Line three.' , role = 'paragraph' )]
207+ doc = make_doc (pages = [make_page (text = '' , blocks = blocks )])
208+ result = ContentMdService .render (doc )
209+ fm_end = result .index ('---\n ' , 4 )
210+ frontmatter = result [:fm_end ]
211+ desc_line = next (l for l in frontmatter .splitlines () if l .startswith ('description:' ))
212+ assert '\n ' not in desc_line
195213
196214 def test_description_searches_first_two_pages (self ):
197215 page1 = make_page (number = 1 , text = '' , blocks = [make_text_block ('Page 1 text.' )])
0 commit comments