Skip to content

Commit 80a87b1

Browse files
Fix HtmlNode.ToString: preserve whitespace in elements nested inside <pre>
Elements nested multiple levels deep inside a <pre> block could have newlines and indentation inserted between them during serialization. This corrupted output from syntax highlighters (e.g. shiki) that emit <pre><code><span class="line"><span>...</span></span></code></pre>. Root cause: the serialize function did not propagate an 'insidePre' context to descendant elements. When a non-pre element with multiple element children (onlyText = false) appeared inside <pre>, the canAddNewLine flag on sibling elements caused newLines to be inserted. Fix: add an 'insidePre' parameter to serialize. When true, all newline/indentation formatting is suppressed regardless of element type. Closes #1509 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent d8eb38d commit 80a87b1

2 files changed

Lines changed: 19 additions & 6 deletions

File tree

src/FSharp.Data.Html.Core/HtmlNode.fs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ type HtmlNode =
114114

115115
fun name -> Set.contains name set
116116

117-
let rec serialize (sb: StringBuilder) indentation canAddNewLine html =
117+
let rec serialize (sb: StringBuilder) indentation canAddNewLine insidePre html =
118118
let append (str: string) = sb.Append str |> ignore
119119

120120
let appendEndTag name =
@@ -135,8 +135,9 @@ type HtmlNode =
135135
| _ -> false)
136136

137137
let isPreTag = name = "pre"
138+
let nowInsidePre = insidePre || isPreTag
138139

139-
if canAddNewLine && not (onlyText || isPreTag) then
140+
if canAddNewLine && not insidePre && not (onlyText || isPreTag) then
140141
newLine 0
141142

142143
append "<"
@@ -157,16 +158,16 @@ type HtmlNode =
157158
else
158159
append ">"
159160

160-
if not (onlyText || isPreTag) then
161+
if not insidePre && not (onlyText || isPreTag) then
161162
newLine 2
162163

163164
let mutable canAddNewLine = false
164165

165166
for element in elements do
166-
serialize sb (indentation + 2) canAddNewLine element
167+
serialize sb (indentation + 2) canAddNewLine nowInsidePre element
167168
canAddNewLine <- true
168169

169-
if not (onlyText || isPreTag) then
170+
if not insidePre && not (onlyText || isPreTag) then
170171
newLine 0
171172

172173
appendEndTag name
@@ -181,7 +182,7 @@ type HtmlNode =
181182
append "]]>"
182183

183184
let sb = StringBuilder()
184-
serialize sb 0 false x |> ignore
185+
serialize sb 0 false false x |> ignore
185186
sb.ToString()
186187

187188
/// <exclude />

tests/FSharp.Data.Core.Tests/HtmlParser.fs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -869,6 +869,18 @@ let ``Maintain whitespace inside pre tag through round-trip``() =
869869
let expected = html
870870
result |> should equal expected
871871

872+
[<Test>]
873+
let ``Maintain whitespace in deeply nested elements inside pre through round-trip``() =
874+
// Regression test for https://github.com/fsprojects/FSharp.Data/issues/1509
875+
// Syntax highlighters (e.g. shiki) emit <pre><code><span class="line"><span>...</span></span></code></pre>
876+
// Without the insidePre fix, the nested multi-element spans would have newlines inserted between them.
877+
let html = """<pre><code><span class="line"><span style="color:red">let</span> <span style="color:blue">x</span> <span style="color:green">=</span> <span style="color:black">1</span></span></code></pre>"""
878+
879+
let result = HtmlDocument.Parse(html).ToString()
880+
881+
let expected = html
882+
result |> should equal expected
883+
872884
[<Test>]
873885
let ``Can parse national rail mobile site correctly``() =
874886
HtmlDocument.Load "UKDepartures.html"

0 commit comments

Comments
 (0)