Skip to content

Commit 340418b

Browse files
committed
Primitive max-word-count HTML excerpt.
1 parent bab77fa commit 340418b

1 file changed

Lines changed: 57 additions & 0 deletions

File tree

tests/phpunit/tests/html-api/wpHtmlProcessor-stringBuilder.php

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,4 +111,61 @@ public function data_html_and_associated_html_content() {
111111
'Text with comment inside it.' => array( 'Ignore <!-- everything inside this --> comment.', 0, 'Ignore <!-- everything inside this --> comment.' ),
112112
);
113113
}
114+
115+
/**
116+
* @dataProvider data_html_with_locale_and_excerpt
117+
*
118+
* @param $html
119+
* @param $locale
120+
* @param $word_count
121+
* @return void
122+
*/
123+
public function test_excerpt_of_so_many_words( $html, $locale, $max_word_count, $html_excerpt ) {
124+
$processor = new WP_HTML_Tag_Processor( $html );
125+
126+
$excerpt_text = '';
127+
$excerpt = '';
128+
$words = IntlBreakIterator::createWordInstance( $locale );
129+
130+
while ( $processor->next_tag( array( 'tag_closers' => 'visit' ) ) ) {
131+
$word_count = 0;
132+
$excerpt_text .= $processor->get_previous_text_chunk();
133+
$words->setText( $excerpt_text );
134+
135+
list( $html, $text ) = $processor->get_previous_html_chunk();
136+
$excerpt .= $html;
137+
foreach ( $words as $_ ) {
138+
if ( IntlRuleBasedBreakIterator::WORD_NONE !== $words->getRuleStatus() ) {
139+
$word_count++;
140+
}
141+
142+
if ( $word_count > $max_word_count ) {
143+
break 2;
144+
}
145+
}
146+
$excerpt .= $text;
147+
}
148+
if ( $word_count <= $max_word_count ) {
149+
list( $html, $text ) = $processor->get_previous_html_chunk();
150+
$excerpt .= $html;
151+
}
152+
153+
$this->assertEquals( $html_excerpt, $excerpt, 'Extracted wrong excerpt from document.' );
154+
}
155+
156+
/**
157+
* Data provider.
158+
*
159+
* @return array[].
160+
*/
161+
public function data_html_with_locale_and_excerpt() {
162+
return array(
163+
array( '<div>This is a <img> with <em>great</em> ability to inspire.</div>', 'en_US', 3, '<div>This is a <img>' ),
164+
array( '<div>This is a <img> with <em>great</em> ability to inspire.</div>', 'en_US', 4, '<div>This is a <img> with <em>' ),
165+
array( '<em>W</em>hat a <i>T</i>hing', 'en_US', 2, '<em>W</em>hat a <i>' ),
166+
array( '<span>彼</span>は<em>アメリカ人</em>です。', 'jp_JP', 2, '<span>彼</span>は<em>' ),
167+
array( '<span>彼</span>は<em>アメリカ人</em>です。', 'jp_JP', 4, '<span>彼</span>は<em>アメリカ人</em>' ),
168+
array( '<div>שְׁמַע יִשְׂרָאֵל<br> יְהוָה אֱלֹהֵינוּ<br> יְהוָה אֶחָֽד</div>', 'he_IL', 2, '<div>שְׁמַע יִשְׂרָאֵל<br>' ),
169+
);
170+
}
114171
}

0 commit comments

Comments
 (0)