@@ -111,4 +111,61 @@ public function data_html_and_associated_html_content() {
111111 'Text with comment inside it. ' => array ( 'Ignore <!-- everything inside this --> comment. ' , 0 , 'Ignore <!-- everything inside this --> comment. ' ),
112112 );
113113 }
114+
115+ /**
116+ * @dataProvider data_html_with_locale_and_excerpt
117+ *
118+ * @param $html
119+ * @param $locale
120+ * @param $word_count
121+ * @return void
122+ */
123+ public function test_excerpt_of_so_many_words ( $ html , $ locale , $ max_word_count , $ html_excerpt ) {
124+ $ processor = new WP_HTML_Tag_Processor ( $ html );
125+
126+ $ excerpt_text = '' ;
127+ $ excerpt = '' ;
128+ $ words = IntlBreakIterator::createWordInstance ( $ locale );
129+
130+ while ( $ processor ->next_tag ( array ( 'tag_closers ' => 'visit ' ) ) ) {
131+ $ word_count = 0 ;
132+ $ excerpt_text .= $ processor ->get_previous_text_chunk ();
133+ $ words ->setText ( $ excerpt_text );
134+
135+ list ( $ html , $ text ) = $ processor ->get_previous_html_chunk ();
136+ $ excerpt .= $ html ;
137+ foreach ( $ words as $ _ ) {
138+ if ( IntlRuleBasedBreakIterator::WORD_NONE !== $ words ->getRuleStatus () ) {
139+ $ word_count ++;
140+ }
141+
142+ if ( $ word_count > $ max_word_count ) {
143+ break 2 ;
144+ }
145+ }
146+ $ excerpt .= $ text ;
147+ }
148+ if ( $ word_count <= $ max_word_count ) {
149+ list ( $ html , $ text ) = $ processor ->get_previous_html_chunk ();
150+ $ excerpt .= $ html ;
151+ }
152+
153+ $ this ->assertEquals ( $ html_excerpt , $ excerpt , 'Extracted wrong excerpt from document. ' );
154+ }
155+
156+ /**
157+ * Data provider.
158+ *
159+ * @return array[].
160+ */
161+ public function data_html_with_locale_and_excerpt () {
162+ return array (
163+ array ( '<div>This is a <img> with <em>great</em> ability to inspire.</div> ' , 'en_US ' , 3 , '<div>This is a <img> ' ),
164+ array ( '<div>This is a <img> with <em>great</em> ability to inspire.</div> ' , 'en_US ' , 4 , '<div>This is a <img> with <em> ' ),
165+ array ( '<em>W</em>hat a <i>T</i>hing ' , 'en_US ' , 2 , '<em>W</em>hat a <i> ' ),
166+ array ( '<span>彼</span>は<em>アメリカ人</em>です。 ' , 'jp_JP ' , 2 , '<span>彼</span>は<em> ' ),
167+ array ( '<span>彼</span>は<em>アメリカ人</em>です。 ' , 'jp_JP ' , 4 , '<span>彼</span>は<em>アメリカ人</em> ' ),
168+ array ( '<div>שְׁמַע יִשְׂרָאֵל<br> יְהוָה אֱלֹהֵינוּ<br> יְהוָה אֶחָֽד</div> ' , 'he_IL ' , 2 , '<div>שְׁמַע יִשְׂרָאֵל<br> ' ),
169+ );
170+ }
114171}
0 commit comments