@@ -123,6 +123,74 @@ cdef class LexborNode:
123123 return html
124124 return None
125125
126+ cdef inline str _serialize_html(self , lxb_html_serialize_opt_t options, size_t indent, bint pretty):
127+ cdef lexbor_str_t * lxb_str
128+ cdef lxb_status_t status
129+
130+ lxb_str = lexbor_str_create()
131+ if self ._is_fragment_root:
132+ if pretty:
133+ status = serialize_fragment_pretty(self .node, lxb_str, options, indent)
134+ else :
135+ status = serialize_fragment(self .node, lxb_str)
136+ else :
137+ if pretty:
138+ status = lxb_html_serialize_pretty_tree_str(self .node, options, indent, lxb_str)
139+ else :
140+ status = lxb_html_serialize_tree_str(self .node, lxb_str)
141+
142+ if status == 0 and lxb_str.data:
143+ html = lxb_str.data.decode(_ENCODING).replace(' <-undef>' , ' ' )
144+ lexbor_str_destroy(lxb_str, self .node.owner_document.text, True )
145+ return html
146+ return None
147+
148+ def html_pretty (
149+ self ,
150+ Py_ssize_t indent = 0 ,
151+ bint skip_ws_nodes = False ,
152+ bint skip_comment = False ,
153+ bint raw = False ,
154+ bint without_closing = False ,
155+ bint tag_with_ns = False ,
156+ bint without_text_indent = False ,
157+ bint full_doctype = False ,
158+ ):
159+ """ Return pretty-printed HTML for the current node.
160+
161+ Parameters
162+ ----------
163+ indent : int, optional
164+ Initial indentation level passed to Lexbor. Defaults to ``0``.
165+ skip_ws_nodes : bool, optional
166+ Skip text nodes that contain only whitespace.
167+ skip_comment : bool, optional
168+ Exclude HTML comment nodes from the serialized output.
169+ raw : bool, optional
170+ Serialize text and attribute values without HTML escaping.
171+ without_closing : bool, optional
172+ Omit closing tags for non-void elements.
173+ tag_with_ns : bool, optional
174+ Include namespace prefixes in serialized tag names when available.
175+ without_text_indent : bool, optional
176+ Disable extra indentation added around text and comment content.
177+ full_doctype : bool, optional
178+ Serialize the full document type declaration when a doctype node is present.
179+ """
180+ cdef lxb_html_serialize_opt_t options
181+ if indent < 0 :
182+ raise ValueError (" indent must be greater than or equal to 0" )
183+ options = _html_pretty_options(
184+ skip_ws_nodes,
185+ skip_comment,
186+ raw,
187+ without_closing,
188+ tag_with_ns,
189+ without_text_indent,
190+ full_doctype,
191+ )
192+ return self ._serialize_html(options, < size_t> indent, True )
193+
126194 def __hash__ (self ):
127195 return self .mem_id
128196
@@ -1129,6 +1197,51 @@ cdef lxb_status_t serialize_fragment(lxb_dom_node_t *node, lexbor_str_t *lxb_str
11291197
11301198 return LXB_STATUS_OK
11311199
1200+
1201+ cdef lxb_status_t serialize_fragment_pretty(
1202+ lxb_dom_node_t * node,
1203+ lexbor_str_t * lxb_str,
1204+ lxb_html_serialize_opt_t options,
1205+ size_t indent,
1206+ ):
1207+ cdef lxb_status_t status
1208+ while node != NULL :
1209+ status = lxb_html_serialize_pretty_tree_str(node, options, indent, lxb_str)
1210+ if status != LXB_STATUS_OK:
1211+ return status
1212+ node = node.next
1213+
1214+ return LXB_STATUS_OK
1215+
1216+
1217+ cdef inline lxb_html_serialize_opt_t _html_pretty_options(
1218+ bint skip_ws_nodes,
1219+ bint skip_comment,
1220+ bint raw,
1221+ bint without_closing,
1222+ bint tag_with_ns,
1223+ bint without_text_indent,
1224+ bint full_doctype,
1225+ ):
1226+ cdef lxb_html_serialize_opt_t options = LXB_HTML_SERIALIZE_OPT_UNDEF
1227+
1228+ if skip_ws_nodes:
1229+ options = < lxb_html_serialize_opt_t> (options | LXB_HTML_SERIALIZE_OPT_SKIP_WS_NODES)
1230+ if skip_comment:
1231+ options = < lxb_html_serialize_opt_t> (options | LXB_HTML_SERIALIZE_OPT_SKIP_COMMENT)
1232+ if raw:
1233+ options = < lxb_html_serialize_opt_t> (options | LXB_HTML_SERIALIZE_OPT_RAW)
1234+ if without_closing:
1235+ options = < lxb_html_serialize_opt_t> (options | LXB_HTML_SERIALIZE_OPT_WITHOUT_CLOSING)
1236+ if tag_with_ns:
1237+ options = < lxb_html_serialize_opt_t> (options | LXB_HTML_SERIALIZE_OPT_TAG_WITH_NS)
1238+ if without_text_indent:
1239+ options = < lxb_html_serialize_opt_t> (options | LXB_HTML_SERIALIZE_OPT_WITHOUT_TEXT_INDENT)
1240+ if full_doctype:
1241+ options = < lxb_html_serialize_opt_t> (options | LXB_HTML_SERIALIZE_OPT_FULL_DOCTYPE)
1242+
1243+ return options
1244+
11321245cdef inline bint _is_node_type(lxb_dom_node_t * node, lxb_dom_node_type_t expected_type):
11331246 return node != NULL and node.type == expected_type
11341247
0 commit comments