Skip to content

Commit cbd975a

Browse files
github-actions[bot]Repo AssistCopilot
authored
[Repo Assist] Fix #964: Comprehensive tests for emoji in FSX → HTML conversion (#992)
* Add comprehensive tests for emoji in FSX comments -> HTML conversion (issue #964) - Add 13 emoji tests to FSharp.Markdown.Tests covering all Unicode categories: supplementary plane (surrogate pairs), BMP, variation selectors, ZWJ sequences in paragraphs, headings, bold/italic, lists, links, inline code, fenced code - Add 17 emoji tests to FSharp.Literate.Tests covering full FSX -> HTML pipeline: ParseScriptString, file on disk, ConvertScriptFile HTML output, Markdown output, multiple doc comment blocks, multi-line, HTML escaping compatibility - Add emoji.fsx test fixture file - No code changes required: the pipeline already handles emoji correctly Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Apply Fantomas formatting to test files Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --------- Co-authored-by: Repo Assist <repo-assist@github.com> Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent 98bef08 commit cbd975a

4 files changed

Lines changed: 310 additions & 0 deletions

File tree

tests/FSharp.Literate.Tests/FSharp.Literate.Tests.fsproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
</PropertyGroup>
88
<ItemGroup>
99
<None Include="files/simple1.fsx" />
10+
<None Include="files/emoji.fsx" />
1011
<None Include="files/simple2.md" />
1112
<None Include="files/template.html" />
1213
<None Include="files/docpage.html" />

tests/FSharp.Literate.Tests/LiterateTests.fs

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1767,3 +1767,186 @@ let ``Script transforms to markdown`` () =
17671767
md |> shouldContainText "[substitute-in-href-text: simple1](http://google.com)"
17681768
md |> shouldContainText "Another [hyperlink](simple2.md)"
17691769
md |> shouldContainText "let hello ="
1770+
1771+
// --------------------------------------------------------------------------------------
1772+
// Emoji in FSX comments → HTML (Issue #964)
1773+
// These tests verify that emoji characters are preserved throughout the full
1774+
// FSX → Markdown → HTML conversion pipeline.
1775+
// --------------------------------------------------------------------------------------
1776+
1777+
// Supplementary plane emoji (U+1F389, stored as surrogate pair in UTF-16)
1778+
let emojiParty = "\U0001F389" // 🎉 PARTY POPPER
1779+
let emojiRocket = "\U0001F680" // 🚀 ROCKET
1780+
let emojiConstruction = "\U0001F6A7" // 🚧 CONSTRUCTION SIGN
1781+
// Basic multilingual plane emoji (single UTF-16 code unit)
1782+
let emojiStar = "\u2B50" // ⭐ WHITE MEDIUM STAR
1783+
let emojiCheck = "\u2705" // ✅ WHITE HEAVY CHECK MARK
1784+
// Emoji with variation selector (two code points)
1785+
let emojiWarning = "\u26A0\uFE0F" // ⚠️ WARNING SIGN + VS-16
1786+
// ZWJ sequence (multiple code points joined with zero-width joiner)
1787+
let emojiFamily = "\U0001F468\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466" // 👨‍👩‍👧‍👦
1788+
1789+
[<Test>]
1790+
let ``Supplementary plane emoji in FSX doc comment are preserved in HTML`` () =
1791+
let fsx = sprintf "(**\nLike this %s and %s\n*)\nlet x = 42" emojiParty emojiRocket
1792+
let doc = Literate.ParseScriptString(fsx)
1793+
let html = Literate.ToHtml(doc)
1794+
html |> shouldContainText emojiParty
1795+
html |> shouldContainText emojiRocket
1796+
1797+
[<Test>]
1798+
let ``BMP emoji in FSX doc comment are preserved in HTML`` () =
1799+
let fsx = sprintf "(**\nStars %s and checks %s\n*)\nlet x = 42" emojiStar emojiCheck
1800+
let doc = Literate.ParseScriptString(fsx)
1801+
let html = Literate.ToHtml(doc)
1802+
html |> shouldContainText emojiStar
1803+
html |> shouldContainText emojiCheck
1804+
1805+
[<Test>]
1806+
let ``Variation selector emoji in FSX doc comment are preserved in HTML`` () =
1807+
let fsx = sprintf "(**\nWarning %s sign\n*)\nlet x = 42" emojiWarning
1808+
let doc = Literate.ParseScriptString(fsx)
1809+
let html = Literate.ToHtml(doc)
1810+
html |> shouldContainText emojiWarning
1811+
1812+
[<Test>]
1813+
let ``ZWJ emoji sequence in FSX doc comment are preserved in HTML`` () =
1814+
let fsx = sprintf "(**\nFamily %s emoji\n*)\nlet x = 42" emojiFamily
1815+
let doc = Literate.ParseScriptString(fsx)
1816+
let html = Literate.ToHtml(doc)
1817+
html |> shouldContainText emojiFamily
1818+
1819+
[<Test>]
1820+
let ``Emoji in FSX heading (H1) are preserved in HTML`` () =
1821+
let fsx = sprintf "(**\n# Heading %s Title\n*)\nlet x = 42" emojiParty
1822+
let doc = Literate.ParseScriptString(fsx)
1823+
let html = Literate.ToHtml(doc)
1824+
html |> shouldContainText emojiParty
1825+
1826+
[<Test>]
1827+
let ``Emoji in FSX heading (H2) are preserved in HTML`` () =
1828+
let fsx = sprintf "(**\n## Subheading %s\n*)\nlet x = 42" emojiRocket
1829+
let doc = Literate.ParseScriptString(fsx)
1830+
let html = Literate.ToHtml(doc)
1831+
html |> shouldContainText emojiRocket
1832+
1833+
[<Test>]
1834+
let ``Emoji in bold spans in FSX doc comment are preserved in HTML`` () =
1835+
let fsx = sprintf "(**\n**Bold %s text**\n*)\nlet x = 42" emojiParty
1836+
let doc = Literate.ParseScriptString(fsx)
1837+
let html = Literate.ToHtml(doc)
1838+
html |> shouldContainText emojiParty
1839+
1840+
[<Test>]
1841+
let ``Emoji in italic spans in FSX doc comment are preserved in HTML`` () =
1842+
let fsx = sprintf "(**\n_Italic %s text_\n*)\nlet x = 42" emojiStar
1843+
let doc = Literate.ParseScriptString(fsx)
1844+
let html = Literate.ToHtml(doc)
1845+
html |> shouldContainText emojiStar
1846+
1847+
[<Test>]
1848+
let ``Emoji in list items in FSX doc comment are preserved in HTML`` () =
1849+
let fsx = sprintf "(**\n- Item %s\n- Item %s\n*)\nlet x = 42" emojiParty emojiCheck
1850+
let doc = Literate.ParseScriptString(fsx)
1851+
let html = Literate.ToHtml(doc)
1852+
html |> shouldContainText emojiParty
1853+
html |> shouldContainText emojiCheck
1854+
1855+
[<Test>]
1856+
let ``Emoji in inline code in FSX doc comment are preserved in HTML`` () =
1857+
let fsx = sprintf "(**\nCode `%s emoji` here\n*)\nlet x = 42" emojiParty
1858+
let doc = Literate.ParseScriptString(fsx)
1859+
let html = Literate.ToHtml(doc)
1860+
html |> shouldContainText emojiParty
1861+
1862+
[<Test>]
1863+
let ``All emoji types together in FSX doc comment are all preserved in HTML`` () =
1864+
let fsx =
1865+
sprintf
1866+
"(**\nAll: %s %s %s %s %s\n*)\nlet x = 42"
1867+
emojiParty
1868+
emojiConstruction
1869+
emojiStar
1870+
emojiWarning
1871+
emojiCheck
1872+
1873+
let doc = Literate.ParseScriptString(fsx)
1874+
let html = Literate.ToHtml(doc)
1875+
html |> shouldContainText emojiParty
1876+
html |> shouldContainText emojiConstruction
1877+
html |> shouldContainText emojiStar
1878+
html |> shouldContainText emojiWarning
1879+
html |> shouldContainText emojiCheck
1880+
1881+
[<Test>]
1882+
let ``Emoji across multiple FSX doc comment blocks are all preserved in HTML`` () =
1883+
let fsx = sprintf "(**\nFirst block %s\n*)\nlet x = 42\n(**\nSecond block %s\n*)\nlet y = 99" emojiParty emojiRocket
1884+
let doc = Literate.ParseScriptString(fsx)
1885+
let html = Literate.ToHtml(doc)
1886+
html |> shouldContainText emojiParty
1887+
html |> shouldContainText emojiRocket
1888+
1889+
[<Test>]
1890+
let ``Emoji in multi-line FSX doc comment are preserved in HTML`` () =
1891+
let fsx = sprintf "(**\nLine one %s\nLine two %s\nLine three %s\n*)\nlet x = 42" emojiParty emojiStar emojiCheck
1892+
let doc = Literate.ParseScriptString(fsx)
1893+
let html = Literate.ToHtml(doc)
1894+
html |> shouldContainText emojiParty
1895+
html |> shouldContainText emojiStar
1896+
html |> shouldContainText emojiCheck
1897+
1898+
[<Test>]
1899+
let ``Emoji do not break HTML escaping in FSX doc comments`` () =
1900+
let fsx = sprintf "(**\nA &amp; %s and &lt;tag&gt;\n*)\nlet x = 42" emojiParty
1901+
let doc = Literate.ParseScriptString(fsx)
1902+
let html = Literate.ToHtml(doc)
1903+
html |> shouldContainText emojiParty
1904+
html |> shouldContainText "&amp;"
1905+
1906+
[<Test>]
1907+
let ``Emoji in FSX file on disk are preserved in HTML output`` () =
1908+
let fsx = File.ReadAllText(__SOURCE_DIRECTORY__ </> "files" </> "emoji.fsx")
1909+
let doc = Literate.ParseScriptString(fsx, __SOURCE_DIRECTORY__ </> "files" </> "emoji.fsx")
1910+
let html = Literate.ToHtml(doc)
1911+
html |> shouldContainText emojiParty
1912+
html |> shouldContainText emojiRocket
1913+
html |> shouldContainText emojiStar
1914+
html |> shouldContainText emojiWarning
1915+
html |> shouldContainText emojiCheck
1916+
html |> shouldContainText emojiConstruction
1917+
html |> shouldContainText emojiFamily
1918+
1919+
[<Test>]
1920+
let ``Emoji in ConvertScriptFile HTML output file are preserved`` () =
1921+
let outputFile = __SOURCE_DIRECTORY__ </> "output" </> "emoji.html"
1922+
1923+
Literate.ConvertScriptFile(
1924+
__SOURCE_DIRECTORY__ </> "files" </> "emoji.fsx",
1925+
outputKind = OutputKind.Html,
1926+
output = outputFile
1927+
)
1928+
1929+
let html = File.ReadAllText outputFile
1930+
html |> shouldContainText emojiParty
1931+
html |> shouldContainText emojiRocket
1932+
html |> shouldContainText emojiStar
1933+
html |> shouldContainText emojiWarning
1934+
html |> shouldContainText emojiCheck
1935+
html |> shouldContainText emojiConstruction
1936+
1937+
[<Test>]
1938+
let ``Emoji in ConvertScriptFile Markdown output file are preserved`` () =
1939+
let outputFile = __SOURCE_DIRECTORY__ </> "output2" </> "emoji.md"
1940+
1941+
Literate.ConvertScriptFile(
1942+
__SOURCE_DIRECTORY__ </> "files" </> "emoji.fsx",
1943+
outputKind = OutputKind.Markdown,
1944+
output = outputFile
1945+
)
1946+
1947+
let md = File.ReadAllText outputFile
1948+
md |> shouldContainText emojiParty
1949+
md |> shouldContainText emojiRocket
1950+
md |> shouldContainText emojiStar
1951+
1952+
// End emoji tests
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
(**
2+
# Title with 🎉 Emoji
3+
4+
This paragraph contains **bold 🚀 text** and *italic ⭐ text*.
5+
6+
Here are some emoji categories:
7+
- Party 🎉 (supplementary plane, surrogate pair)
8+
- Rocket 🚀 (supplementary plane, surrogate pair)
9+
- Star ⭐ (basic multilingual plane)
10+
- Warning ⚠️ (BMP + variation selector)
11+
- Check ✅ (BMP)
12+
- Construction 🚧 (supplementary plane)
13+
14+
A family ZWJ sequence: 👨‍👩‍👧‍👦
15+
16+
Inline `code with 🎉 emoji` here.
17+
18+
*)
19+
let x = 42

tests/FSharp.Markdown.Tests/Markdown.fs

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,113 @@ let ``Inline HTML tag containing 'at' is not turned into hyperlink`` () =
5454
)
5555
) ]
5656

57+
// --------------------------------------------------------------------------------------
58+
// Emoji in Markdown → HTML (Issue #964)
59+
// These tests verify the full FSX → HTML path for emoji characters.
60+
// Emoji should be preserved as-is in HTML output (raw UTF-8).
61+
// --------------------------------------------------------------------------------------
62+
63+
// Supplementary plane emoji (U+1F389, stored as surrogate pair in UTF-16)
64+
let emojiParty = "\U0001F389" // 🎉 PARTY POPPER
65+
let emojiRocket = "\U0001F680" // 🚀 ROCKET
66+
let emojiConstruction = "\U0001F6A7" // 🚧 CONSTRUCTION SIGN
67+
// Basic multilingual plane emoji (single UTF-16 code unit)
68+
let emojiStar = "\u2B50" // ⭐ WHITE MEDIUM STAR
69+
let emojiCheck = "\u2705" // ✅ WHITE HEAVY CHECK MARK
70+
// Emoji with variation selector (two code points)
71+
let emojiWarning = "\u26A0\uFE0F" // ⚠️ WARNING SIGN + VS-16
72+
// ZWJ sequence (multiple code points joined)
73+
let emojiFamily = "\U0001F468\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466" // 👨‍👩‍👧‍👦
74+
75+
[<Test>]
76+
let ``Supplementary plane emoji (surrogate pair) are preserved in paragraph`` () =
77+
let html = sprintf "Like this %s and %s" emojiParty emojiRocket |> Markdown.ToHtml
78+
html |> should contain emojiParty
79+
html |> should contain emojiRocket
80+
81+
[<Test>]
82+
let ``BMP emoji (single code unit) are preserved in paragraph`` () =
83+
let html = sprintf "Stars %s and checks %s" emojiStar emojiCheck |> Markdown.ToHtml
84+
html |> should contain emojiStar
85+
html |> should contain emojiCheck
86+
87+
[<Test>]
88+
let ``Emoji with variation selector are preserved`` () =
89+
let html = sprintf "Warning %s sign" emojiWarning |> Markdown.ToHtml
90+
html |> should contain emojiWarning
91+
92+
[<Test>]
93+
let ``ZWJ emoji sequences are preserved`` () =
94+
let html = sprintf "Family %s emoji" emojiFamily |> Markdown.ToHtml
95+
html |> should contain emojiFamily
96+
97+
[<Test>]
98+
let ``Emoji are preserved in headings`` () =
99+
let html =
100+
sprintf "# Heading %s\n\n## Subheading %s" emojiParty emojiRocket
101+
|> Markdown.ToHtml
102+
103+
html |> should contain emojiParty
104+
html |> should contain emojiRocket
105+
106+
[<Test>]
107+
let ``Emoji are preserved in bold and italic spans`` () =
108+
let html = sprintf "**Bold %s** and _italic %s_" emojiParty emojiStar |> Markdown.ToHtml
109+
html |> should contain emojiParty
110+
html |> should contain emojiStar
111+
112+
[<Test>]
113+
let ``Emoji are preserved in list items`` () =
114+
let html =
115+
sprintf "- Item %s\n- Item %s\n- Item %s" emojiParty emojiStar emojiCheck
116+
|> Markdown.ToHtml
117+
118+
html |> should contain emojiParty
119+
html |> should contain emojiStar
120+
html |> should contain emojiCheck
121+
122+
[<Test>]
123+
let ``Emoji are preserved in link text`` () =
124+
let html = sprintf "[Link %s](http://example.com)" emojiParty |> Markdown.ToHtml
125+
html |> should contain emojiParty
126+
127+
[<Test>]
128+
let ``Emoji are preserved in inline code`` () =
129+
let html = sprintf "Code `%s emoji`" emojiParty |> Markdown.ToHtml
130+
html |> should contain emojiParty
131+
132+
[<Test>]
133+
let ``Emoji do not break HTML escaping of & < > characters`` () =
134+
let html = sprintf "A &amp; %s and &lt;tag&gt;" emojiParty |> Markdown.ToHtml
135+
html |> should contain "&amp;"
136+
html |> should contain "&lt;"
137+
html |> should contain "&gt;"
138+
html |> should contain emojiParty
139+
140+
[<Test>]
141+
let ``Multiple emoji types together are all preserved`` () =
142+
let text = sprintf "%s%s%s%s%s" emojiParty emojiConstruction emojiStar emojiWarning emojiCheck
143+
let html = text |> Markdown.ToHtml
144+
html |> should contain emojiParty
145+
html |> should contain emojiConstruction
146+
html |> should contain emojiStar
147+
html |> should contain emojiWarning
148+
html |> should contain emojiCheck
149+
150+
[<Test>]
151+
let ``Emoji at start and end of paragraph are preserved`` () =
152+
let html = sprintf "%s Start and End %s" emojiParty emojiRocket |> Markdown.ToHtml
153+
html |> should contain emojiParty
154+
html |> should contain emojiRocket
155+
156+
[<Test>]
157+
let ``Emoji are preserved in fenced code block`` () =
158+
let md = sprintf "```\nlet emoji = \"%s\"\n```" emojiParty
159+
let html = md |> Markdown.ToHtml
160+
html |> should contain emojiParty
161+
162+
// End emoji tests
163+
57164
[<Test>]
58165
let ``Encode '<' and '>' characters as HTML entities`` () =
59166
let doc = "foo\n\n - a --> b" |> Markdown.ToHtml

0 commit comments

Comments
 (0)