File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 11# selectolax Changelog
22
3+ - Fix ` .text() ` and ` iter() ` for HTML fragments when there are multiple nodes at the root level.
4+
35# Version 0.4.6
46
57
Original file line number Diff line number Diff line change @@ -168,7 +168,8 @@ cdef class LexborNode:
168168
169169 """
170170 cdef unsigned char * text
171- cdef lxb_dom_node_t * node = < lxb_dom_node_t * > self .node.first_child
171+ cdef LexborNode start_node = self ._get_node()
172+ cdef lxb_dom_node_t * node = < lxb_dom_node_t * > start_node.node.first_child
172173
173174 if not deep:
174175 container = TextContainer(separator, strip)
@@ -197,7 +198,7 @@ cdef class LexborNode:
197198 container.append(text.decode(_ENCODING))
198199
199200 lxb_dom_node_simple_walk(
200- < lxb_dom_node_t * > self .node,
201+ < lxb_dom_node_t * > start_node .node,
201202 < lxb_dom_node_simple_walker_f> text_callback,
202203 < void * > container
203204 )
@@ -468,7 +469,8 @@ cdef class LexborNode:
468469 to the provided options.
469470 """
470471
471- cdef lxb_dom_node_t * node = self .node.first_child
472+ cdef LexborNode start_node = self ._get_node()
473+ cdef lxb_dom_node_t * node = start_node.node.first_child
472474 cdef LexborNode next_node
473475
474476 while node != NULL :
Original file line number Diff line number Diff line change 11from inspect import cleandoc
22import pytest
3- from selectolax .lexbor import LexborHTMLParser , SelectolaxError
3+ from selectolax .lexbor import LexborHTMLParser
44
55
66def clean_doc (text : str ) -> str :
@@ -491,10 +491,14 @@ def test_fragment_create_node_with_attributes():
491491 assert 'class="link"' in html
492492
493493
494- def test_fragment_create_node_empty_tag_name ():
495- parser = LexborHTMLParser ("<div></div>" , is_fragment = True )
496- try :
497- parser .create_node ("" )
498- assert False , "Should have raised an exception"
499- except SelectolaxError :
500- pass
494+ def test_fragment_text_extraction_multiple_nodes ():
495+ html = "<p>1</p><p>2</p>"
496+ p = LexborHTMLParser (html , is_fragment = True )
497+ assert p .text (deep = False ) == ""
498+ assert p .text (deep = True , separator = " " , strip = True ) == "1 2"
499+
500+
501+ def test_fragment_iter_multiple_nodes ():
502+ html = "<p>1</p><p>2</p>"
503+ p = LexborHTMLParser (html , is_fragment = True )
504+ assert len (list (p .root .iter ())) == 2
You can’t perform that action at this time.
0 commit comments