-
Notifications
You must be signed in to change notification settings - Fork 284
Expand file tree
/
Copy pathHtmlOperations.fs
More file actions
198 lines (168 loc) · 7.83 KB
/
Copy pathHtmlOperations.fs
File metadata and controls
198 lines (168 loc) · 7.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
module FSharp.Data.Tests.HtmlOperations
open NUnit.Framework
open FsUnit
open System
open FSharp.Data
open FSharp.Data.HtmlNode
open FSharp.Data.HtmlAttribute
[<Test>]
let ``Can get the name of a HtmlAttribute``() =
let attr = HtmlAttribute.New("id", "table_1")
HtmlAttribute.name attr |> should equal "id"
[<Test>]
let ``Can get the value of a HtmlAttribute``() =
let attr = HtmlAttribute.New("id", "table_1")
HtmlAttribute.value attr |> should equal "table_1"
let htmlFragment =
HtmlNode.NewElement("div", ["id", "my_div"; "class", "my_class highlighted"], [HtmlNode.NewText "Hello World!"])
[<Test>]
let ``Can get the name of a HtmlElement``() =
HtmlNode.name htmlFragment |> should equal "div"
[<Test>]
let ``Name of a content element is an Empty string``() =
HtmlNode.name (HtmlNode.NewText "Hello") |> should equal String.Empty
[<Test>]
let ``Getting the value of an attribute works``() =
HtmlNode.attribute "class" htmlFragment |> should equal (HtmlAttribute.New("class", "my_class highlighted"))
[<Test>]
let ``Getting a missing attribute returns None``() =
HtmlNode.tryGetAttribute "test" htmlFragment |> should equal None
[<Test>]
let ``Getting the value of a missing attribute returns empty string``() =
HtmlNode.attributeValue "test" htmlFragment |> should equal ""
[<Test>]
let ``Checking for id works``() =
HtmlNode.hasId "my_div" htmlFragment |> should equal true
[<Test>]
let ``Checking for class works``() =
HtmlNode.hasClass "my_class" htmlFragment |> should equal true
HtmlNode.hasClass "highlighted" htmlFragment |> should equal true
HtmlNode.hasClass "my_class highlighted" htmlFragment |> should equal true
HtmlNode.hasClass "highlighted my_class" htmlFragment |> should equal true
[<Test>]
let ``The children of a content node is an empty list``() =
let expected: HtmlNode list = []
HtmlNode.elements (HtmlNode.NewText "Hello") |> should equal expected
[<Test>]
let ``Can get the children of a node``() =
HtmlNode.elements htmlFragment |> should equal [HtmlNode.NewText "Hello World!"]
let doc =
"""<html>
<head>
<script language="JavaScript" src="/bwx_generic.js"></script>
<link rel="stylesheet" type="text/css" href="/bwx_style.css">
</head>
<body>
<img src="myimg.jpg">
<table title="table">
<tr><th>Column 1</th><th>Column 2</th></tr>
<tr><td>1</td><td>yes</td></tr>
</table>
</body>
</html>"""
|> HtmlDocument.Parse
|> HtmlDocument.elements
|> Seq.head
[<Test>]
let ``Can get descendants of a node that matches a predicate``() =
let result = doc |> HtmlNode.descendants false (HtmlNode.name >> (=) "link")
let expected = HtmlNode.NewElement("link", ["rel", "stylesheet"; "type", "text/css"; "href", "/bwx_style.css"])
result |> should equal [expected]
[<Test>]
let ``Can get all of the descendants that match the given set of names``() =
let result = doc |> HtmlNode.descendantsNamed false ["link"]
let expected = HtmlNode.NewElement("link", ["rel", "stylesheet"; "type", "text/css"; "href", "/bwx_style.css"])
result |> should equal [expected]
[<Test>]
let ``Can get descendants with path of a node that matches a predicate``() =
let result = doc |> HtmlNode.descendantsWithPath false (HtmlNode.name >> (=) "link")
let expected = HtmlNode.NewElement("link", ["rel", "stylesheet"; "type", "text/css"; "href", "/bwx_style.css"])
result |> Seq.map fst |> Seq.toList |> should equal [expected]
result |> Seq.map (snd >> List.map HtmlNode.name) |> Seq.toList |> should equal [["head"; "html"]]
[<Test>]
let ``Can get all of the descendants with path that match the given set of names``() =
let result = doc |> HtmlNode.descendantsNamedWithPath false ["link"]
let expected = HtmlNode.NewElement("link", ["rel", "stylesheet"; "type", "text/css"; "href", "/bwx_style.css"])
result |> Seq.map fst |> Seq.toList |> should equal [expected]
result |> Seq.map (snd >> List.map HtmlNode.name) |> Seq.toList |> should equal [["head"; "html"]]
[<Test>]
let ``Can get all elements of a node that matches a set of names``() =
let result =
"""<body>
<img src="myimg.jpg">
<div>Hello World</div>
<table title="table">
<tr><th>Column 1</th><th>Column 2</th></tr>
<tr><td>1</td><td>yes</td></tr>
</table>
</body>"""
|> HtmlNode.Parse
|> List.head
|> HtmlNode.elementsNamed ["img"; "div"]
let expected = [
HtmlNode.NewElement("img", ["src", "myimg.jpg"])
HtmlNode.NewElement("div", [HtmlNode.NewText "Hello World"])
]
result |> should equal expected
[<Test>]
let ``Can extract the inner text from a node``() =
let result = doc.Descendants("tr") |> Seq.map (HtmlNode.innerText) |> Seq.toList
result |> should equal [
"Column 1Column 2"
"1yes"
]
[<Test>]
let ``Can get direct inner text``() =
let html = "<div>21 minutes ago<span> LIVE</span> x</div>" |> HtmlNode.Parse |> Seq.exactlyOne
html.InnerText() |> should equal "21 minutes ago LIVE x"
html.DirectInnerText() |> should equal "21 minutes ago x"
[<Test>]
let ``Inner text on a comment should be String.Empty``() =
let comment = HtmlNode.NewComment "Hello World"
HtmlNode.innerText comment |> should equal String.Empty
// --------------------------------------------------------------------------------------
// Tests for Utils module functions (tested indirectly through public API)
[<Test>]
let ``Case-insensitive element name matching works via getNameSet``() =
let html = "<div><P>Para 1</P><span>Span</span><p>Para 2</p></div>"
|> HtmlNode.Parse |> Seq.head
let result = html |> HtmlNode.elementsNamed ["p"]
result.Length |> should equal 2
result |> List.map HtmlNode.innerText |> should equal ["Para 1"; "Para 2"]
[<Test>]
let ``Case-insensitive descendant name matching works with mixed case input``() =
let html = "<div><DIV><P>Test</P></DIV><p>Another</p></div>"
|> HtmlNode.Parse |> Seq.head
let result = html |> HtmlNode.descendantsNamed false ["P"; "div"] |> List.ofSeq
result.Length |> should equal 2
[<Test>]
let ``Case-insensitive attribute matching works via toLower``() =
let html = "<div ID='Test' Class='highlight'>Content</div>"
|> HtmlNode.Parse |> Seq.head
html |> HtmlNode.hasAttribute "id" "test" |> should equal true
html |> HtmlNode.hasAttribute "ID" "TEST" |> should equal true
html |> HtmlNode.hasAttribute "class" "HIGHLIGHT" |> should equal true
[<Test>]
let ``getNameSet handles empty name collections``() =
let html = "<div><p>Test</p></div>" |> HtmlNode.Parse |> Seq.head
let result = html |> HtmlNode.elementsNamed []
result.Length |> should equal 0
[<Test>]
let ``getNameSet handles duplicate names (case variations)``() =
let html = "<div><P>Para 1</P><span>Span</span><p>Para 2</p></div>"
|> HtmlNode.Parse |> Seq.head
// Test with duplicate names in different cases
let result = html |> HtmlNode.elementsNamed ["p"; "P"; "p"]
result.Length |> should equal 2
[<Test>]
let ``toLower handles special characters in attribute values``() =
let html = "<div title='Ñoño Café'>Content</div>"
|> HtmlNode.Parse |> Seq.head
html |> HtmlNode.hasAttribute "title" "ñoño café" |> should equal true
[<Test>]
let ``Case-insensitive matching works in descendantsNamedWithPath``() =
let html = "<html><head><Title>Test</Title></head></html>"
|> HtmlNode.Parse |> Seq.head
let result = html |> HtmlNode.descendantsNamedWithPath false ["title"]
result |> Seq.length |> should equal 1
result |> Seq.head |> fst |> HtmlNode.innerText |> should equal "Test"