Skip to content

Commit 06c0b96

Browse files
committed
Fixes to #159
1 parent 31a9cb3 commit 06c0b96

7 files changed

Lines changed: 39 additions & 12 deletions

File tree

CHANGES.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
selectolax Changelog
22
====================
3+
Version 0.3.28
4+
-------------
5+
6+
Released
7+
8+
- Feat: Add unwrap empty tags functionality. Fixes #159.
9+
310

411
Version 0.3.28
512
-------------

selectolax/lexbor.pyi

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ class LexborNode:
101101
def id(self) -> str | None: ...
102102
def iter(self, include_text: bool = False) -> Iterator[LexborNode]: ...
103103
def unwrap(self) -> None: ...
104-
def unwrap_tags(self, tags: list[str]) -> None: ...
104+
def unwrap_tags(self, tags: list[str], delete_empty : bool = False) -> None: ...
105105
def traverse(self, include_text: bool = False) -> Iterator[LexborNode]: ...
106106
def replace_with(self, value: bytes | str | LexborNode) -> None: ...
107107
def insert_before(self, value: bytes | str | LexborNode) -> None: ...
@@ -152,7 +152,7 @@ class LexborHTMLParser:
152152
def scripts_srcs_contain(self, queries: tuple[str]) -> bool: ...
153153
def css_matches(self, selector: str) -> bool: ...
154154
def clone(self) -> LexborHTMLParser: ...
155-
def unwrap_tags(self, tags: list[str]) -> None: ...
155+
def unwrap_tags(self, tags: list[str], delete_empty : bool = False) -> None: ...
156156

157157
def create_tag(tag: str) -> LexborNode:
158158
"""

selectolax/lexbor.pyx

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,7 @@ cdef class LexborHTMLParser:
327327

328328
cls = LexborHTMLParser.from_document(cloned_document, self.raw_html)
329329
return cls
330-
def unwrap_tags(self, list tags):
330+
def unwrap_tags(self, list tags, delete_empty = False):
331331
"""Unwraps specified tags from the HTML tree.
332332
333333
Works the same as the ``unwrap`` method, but applied to a list of tags.
@@ -336,6 +336,8 @@ cdef class LexborHTMLParser:
336336
----------
337337
tags : list
338338
List of tags to remove.
339+
delete_empty : bool
340+
Whenever to delete empty tags.
339341
340342
Examples
341343
--------
@@ -346,4 +348,4 @@ cdef class LexborHTMLParser:
346348
'<body><div>Hello world!</div></body>'
347349
"""
348350
if self.root is not None:
349-
self.root.unwrap_tags(tags)
351+
self.root.unwrap_tags(tags, delete_empty=delete_empty)

selectolax/lexbor/node.pxi

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,11 @@ cdef class LexborNode:
419419
def unwrap(self, delete_empty=False):
420420
"""Replace node with whatever is inside this node.
421421
422+
Parameters
423+
----------
424+
delete_empty : bool, default False
425+
If True, removes empty tags.
426+
422427
Examples
423428
--------
424429
@@ -448,7 +453,7 @@ cdef class LexborNode:
448453
lxb_dom_node_insert_before(self.node, self.node.first_child)
449454
lxb_dom_node_destroy(<lxb_dom_node_t *> self.node)
450455

451-
def unwrap_tags(self, list tags, delete_empty=False):
456+
def unwrap_tags(self, list tags, delete_empty = False):
452457
"""Unwraps specified tags from the HTML tree.
453458
454459
Works the same as the ``unwrap`` method, but applied to a list of tags.
@@ -457,6 +462,8 @@ cdef class LexborNode:
457462
----------
458463
tags : list
459464
List of tags to remove.
465+
delete_empty : bool, default False
466+
If True, removes empty tags.
460467
461468
Examples
462469
--------

selectolax/modest/node.pxi

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -515,9 +515,14 @@ cdef class Node:
515515
"""An alias for the decompose method."""
516516
self.decompose(recursive)
517517

518-
def unwrap(self, delete_empty=False):
518+
def unwrap(self, delete_empty = False):
519519
"""Replace node with whatever is inside this node.
520520
521+
Parameters
522+
----------
523+
delete_empty : bool, default False
524+
Whenever to delete empty tags.
525+
521526
Examples
522527
--------
523528
@@ -571,7 +576,7 @@ cdef class Node:
571576
for element in self.css(tag):
572577
element.decompose(recursive=recursive)
573578

574-
def unwrap_tags(self, list tags, delete_empty=False):
579+
def unwrap_tags(self, list tags, delete_empty = False):
575580
"""Unwraps specified tags from the HTML tree.
576581
577582
Works the same as the ``unwrap`` method, but applied to a list of tags.
@@ -580,6 +585,8 @@ cdef class Node:
580585
----------
581586
tags : list
582587
List of tags to remove.
588+
delete_empty : bool, default False
589+
Whenever to delete empty tags.
583590
584591
Examples
585592
--------
@@ -757,7 +764,7 @@ cdef class Node:
757764
else:
758765
raise TypeError("Expected a string or Node instance, but %s found" % type(value).__name__)
759766

760-
def unwrap_tags(self, list_tags, delete_empty=False):
767+
def unwrap_tags(self, list tags, delete_empty = False):
761768
"""Unwraps specified tags from the HTML tree.
762769
763770
Works the same as th ``unwrap`` method, but applied to a list of tags.
@@ -766,6 +773,8 @@ cdef class Node:
766773
----------
767774
tags : list
768775
List of tags to remove.
776+
delete_empty : bool, default False
777+
Whenever to delete empty tags.
769778
770779
Examples
771780
--------

selectolax/parser.pyi

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ class Node:
166166
def strip_tags(self, tags: list[str], recursive: bool = False) -> None:
167167
"""Remove specified tags from the HTML tree."""
168168
...
169-
def unwrap_tags(self, tags: list[str]) -> None:
169+
def unwrap_tags(self, tags: list[str], delete_empty: bool = False) -> None:
170170
"""Unwraps specified tags from the HTML tree.
171171
172172
Works the same as the unwrap method, but applied to a list of tags."""
@@ -267,7 +267,7 @@ class HTMLParser:
267267
"""Returns the text of the node including text of all its child nodes."""
268268
...
269269
def strip_tags(self, tags: list[str], recursive: bool = False) -> None: ...
270-
def unwrap_tags(self, tags: list[str]) -> None:
270+
def unwrap_tags(self, tags: list[str], delete_empty: bool = False) -> None:
271271
"""Unwraps specified tags from the HTML tree.
272272
273273
Works the same as th unwrap method, but applied to a list of tags."""

selectolax/parser.pyx

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ cdef class HTMLParser:
269269
myhtml_collection_destroy(collection)
270270

271271

272-
def unwrap_tags(self, list tags):
272+
def unwrap_tags(self, list tags, delete_empty : bool = False):
273273
"""Unwraps specified tags from the HTML tree.
274274
275275
Works the same as th `unwrap` method, but applied to a list of tags.
@@ -278,6 +278,8 @@ cdef class HTMLParser:
278278
----------
279279
tags : list
280280
List of tags to remove.
281+
delete_empty : bool, default False
282+
If True, removes empty tags.
281283
282284
Examples
283285
--------
@@ -288,7 +290,7 @@ cdef class HTMLParser:
288290
'<body><div>Hello world!</div></body>'
289291
"""
290292
if self.root is not None:
291-
self.root.unwrap_tags(tags)
293+
self.root.unwrap_tags(tags, delete_empty=delete_empty)
292294

293295
@property
294296
def html(self):

0 commit comments

Comments
 (0)