Skip to content

Commit c87020d

Browse files
[Repo Assist] Split HtmlDocument operations out of HtmlOperations.fs (#1683)
* Split HtmlDocument operations out of HtmlOperations.fs into HtmlDocumentOperations.fs HtmlOperations.fs (1169 lines) contained both HtmlNode/HtmlAttribute operations and HtmlDocument operations. Split HtmlDocument module, HtmlDocumentExtensions, and HtmlExtensions into a new HtmlDocumentOperations.fs file, reducing HtmlOperations.fs to 865 lines. Part of #1677 (splitting large files). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * ci: trigger CI checks --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 32c2ce2 commit c87020d

3 files changed

Lines changed: 316 additions & 305 deletions

File tree

src/FSharp.Data.Html.Core/FSharp.Data.Html.Core.fsproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
<Compile Include="HtmlNode.fs" />
1919
<Compile Include="HtmlParser.fs" />
2020
<Compile Include="HtmlOperations.fs" />
21+
<Compile Include="HtmlDocumentOperations.fs" />
2122
<Compile Include="HtmlCssSelectorExtensions.fs" />
2223
<Compile Include="HtmlActivePatterns.fs" />
2324
<Compile Include="HtmlInference.fs" />
Lines changed: 315 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,315 @@
1+
namespace FSharp.Data
2+
3+
open FSharp.Data
4+
open System.Runtime.CompilerServices
5+
6+
[<AutoOpen>]
7+
module private DocumentUtils =
8+
9+
let inline toLower (s: string) = s.ToLowerInvariant()
10+
let inline getNameSet names = names |> Seq.map toLower |> Set.ofSeq
11+
12+
// --------------------------------------------------------------------------------------
13+
14+
[<CompilationRepresentation(CompilationRepresentationFlags.ModuleSuffix)>]
15+
/// Module with operations on HTML documents
16+
module HtmlDocument =
17+
18+
/// Returns the doctype of the document
19+
let docType doc =
20+
match doc with
21+
| HtmlDocument(docType = docType) -> docType
22+
23+
//// Gets all of the root elements of the document
24+
let elements doc =
25+
match doc with
26+
| HtmlDocument(elements = elements) -> elements
27+
28+
/// <summary>
29+
/// Returns all of the root elements of the document that match the set of names
30+
/// </summary>
31+
/// <param name="names">The set of names to match</param>
32+
/// <param name="doc">The given document</param>
33+
let inline elementsNamed names doc =
34+
let nameSet = getNameSet names
35+
36+
doc |> elements |> List.filter (HtmlNode.name >> nameSet.Contains)
37+
38+
/// <summary>
39+
/// Gets all of the descendants of this document that statisfy the given predicate
40+
/// </summary>
41+
/// <param name="recurseOnMatch">If a match is found continues down the tree matching child elements</param>
42+
/// <param name="predicate">The predicate by which to match the nodes to return</param>
43+
/// <param name="doc">The given document</param>
44+
let inline descendants recurseOnMatch predicate doc =
45+
doc
46+
|> elements
47+
|> Seq.collect (HtmlNode.descendantsAndSelf recurseOnMatch predicate)
48+
49+
/// <summary>
50+
/// Finds all of the descendant nodes of this document that match the given set of names
51+
/// </summary>
52+
/// <param name="recurseOnMatch">If a match is found continues down the tree matching child elements</param>
53+
/// <param name="names">The set of names to match</param>
54+
/// <param name="doc">The given document</param>
55+
let inline descendantsNamed recurseOnMatch names doc =
56+
let nameSet = getNameSet names
57+
58+
doc |> descendants recurseOnMatch (HtmlNode.name >> nameSet.Contains)
59+
60+
/// <summary>
61+
/// Gets all of the descendants of this document that statisfy the given predicate
62+
/// </summary>
63+
/// <param name="recurseOnMatch">If a match is found continues down the tree matching child elements</param>
64+
/// <param name="predicate">The predicate by which to match the nodes to return</param>
65+
/// <param name="doc">The given document</param>
66+
let inline descendantsWithPath recurseOnMatch predicate doc =
67+
doc
68+
|> elements
69+
|> Seq.collect (HtmlNode.descendantsAndSelfWithPath recurseOnMatch predicate)
70+
71+
/// <summary>
72+
/// Finds all of the descendant nodes of this document that match the given set of names
73+
/// </summary>
74+
/// <param name="recurseOnMatch">If a match is found continues down the tree matching child elements</param>
75+
/// <param name="names">The set of names to match</param>
76+
/// <param name="doc">The given document</param>
77+
let inline descendantsNamedWithPath recurseOnMatch names doc =
78+
let nameSet = getNameSet names
79+
80+
doc |> descendantsWithPath recurseOnMatch (HtmlNode.name >> nameSet.Contains)
81+
82+
/// <summary>
83+
/// Finds the body element of the given document,
84+
/// this throws an exception if no body element exists.
85+
/// </summary>
86+
/// <param name="x">The given document</param>
87+
let inline body (x: HtmlDocument) =
88+
match List.ofSeq <| descendantsNamed false [ "body" ] x with
89+
| [] -> failwith "No element body found!"
90+
| body :: _ -> body
91+
92+
/// <summary>
93+
/// Tries to find the body element of the given document.
94+
/// </summary>
95+
/// <param name="x">The given document</param>
96+
let inline tryGetBody (x: HtmlDocument) =
97+
match List.ofSeq <| descendantsNamed false [ "body" ] x with
98+
| [] -> None
99+
| body :: _ -> Some body
100+
101+
/// <summary>
102+
/// Finds the html element of the given document,
103+
/// this throws an exception if no html element exists.
104+
/// </summary>
105+
/// <param name="x">The given document</param>
106+
let inline html (x: HtmlDocument) =
107+
match List.ofSeq <| descendantsNamed false [ "html" ] x with
108+
| [] -> failwith "No element html found!"
109+
| html :: _ -> html
110+
111+
/// <summary>
112+
/// Tries to find the html element of the given document.
113+
/// </summary>
114+
/// <param name="x">The given document</param>
115+
let inline tryGetHtml (x: HtmlDocument) =
116+
match List.ofSeq <| descendantsNamed false [ "html" ] x with
117+
| [] -> None
118+
| html :: _ -> Some html
119+
120+
121+
[<Extension>]
122+
/// Extension methods with operations on HTML documents
123+
type HtmlDocumentExtensions =
124+
125+
/// <summary>
126+
/// Returns all of the root elements of the current document
127+
/// </summary>
128+
/// <param name="doc">The given document</param>
129+
[<Extension>]
130+
static member Elements(doc: HtmlDocument) = HtmlDocument.elements doc
131+
132+
/// <summary>
133+
/// Returns all of the root elements in the current document that match the set of names
134+
/// </summary>
135+
/// <param name="doc">The given document</param>
136+
/// <param name="names">The set of names to match</param>
137+
[<Extension>]
138+
static member Elements(doc: HtmlDocument, names: seq<string>) = HtmlDocument.elementsNamed names doc
139+
140+
/// <summary>
141+
/// Returns all of the root elements in the current document that match the name
142+
/// </summary>
143+
/// <param name="doc">The given document</param>
144+
/// <param name="name">The name to match</param>
145+
[<Extension>]
146+
static member Elements(doc: HtmlDocument, name: string) = HtmlDocument.elementsNamed [ name ] doc
147+
148+
/// <summary>
149+
/// Gets all of the descendants of this document that statisfy the given predicate
150+
/// </summary>
151+
/// <param name="doc">The given document</param>
152+
/// <param name="predicate">The predicate by which to match the nodes to return</param>
153+
/// <param name="recurseOnMatch">If a match is found continues down the tree matching child elements</param>
154+
[<Extension>]
155+
static member Descendants(doc: HtmlDocument, predicate, recurseOnMatch) =
156+
HtmlDocument.descendants recurseOnMatch predicate doc
157+
158+
/// <summary>
159+
/// Gets all of the descendants of this document that statisfy the given predicate
160+
/// Recurses on match
161+
/// </summary>
162+
/// <param name="doc">The given document</param>
163+
/// <param name="predicate">The predicate by which to match the nodes to return</param>
164+
[<Extension>]
165+
static member Descendants(doc: HtmlDocument, predicate) =
166+
let recurseOnMatch = true
167+
HtmlDocument.descendants recurseOnMatch predicate doc
168+
169+
/// Gets all of the descendants of this document
170+
/// Recurses on match
171+
[<Extension>]
172+
static member Descendants(doc: HtmlDocument) =
173+
let recurseOnMatch = true
174+
let predicate = fun _ -> true
175+
HtmlDocument.descendants recurseOnMatch predicate doc
176+
177+
/// <summary>
178+
/// Finds all of the descendant nodes of this document that match the given set of names
179+
/// </summary>
180+
/// <param name="doc">The given document</param>
181+
/// <param name="names">The set of names to match</param>
182+
/// <param name="recurseOnMatch">If a match is found continues down the tree matching child elements</param>
183+
[<Extension>]
184+
static member Descendants(doc: HtmlDocument, names: seq<string>, recurseOnMatch) =
185+
HtmlDocument.descendantsNamed recurseOnMatch names doc
186+
187+
/// <summary>
188+
/// Finds all of the descendant nodes of this document that match the given set of names
189+
/// Recurses on match
190+
/// </summary>
191+
/// <param name="doc">The given document</param>
192+
/// <param name="names">The set of names to match</param>
193+
[<Extension>]
194+
static member Descendants(doc: HtmlDocument, names: seq<string>) =
195+
let recurseOnMatch = true
196+
HtmlDocument.descendantsNamed recurseOnMatch names doc
197+
198+
/// <summary>
199+
/// Finds all of the descendant nodes of this document that match the given name
200+
/// </summary>
201+
/// <param name="doc">The given document</param>
202+
/// <param name="name">The name to match</param>
203+
/// <param name="recurseOnMatch">If a match is found continues down the tree matching child elements</param>
204+
[<Extension>]
205+
static member Descendants(doc: HtmlDocument, name: string, recurseOnMatch) =
206+
HtmlDocument.descendantsNamed recurseOnMatch [ name ] doc
207+
208+
/// <summary>
209+
/// Finds all of the descendant nodes of this document that match the given name
210+
/// Recurses on match
211+
/// </summary>
212+
/// <param name="doc">The given document</param>
213+
/// <param name="name">The name to match</param>
214+
[<Extension>]
215+
static member Descendants(doc: HtmlDocument, name: string) =
216+
let recurseOnMatch = true
217+
HtmlDocument.descendantsNamed recurseOnMatch [ name ] doc
218+
219+
/// <summary>
220+
/// Gets all of the descendants of this document that statisfy the given predicate
221+
/// </summary>
222+
/// <param name="doc">The given document</param>
223+
/// <param name="predicate">The predicate by which to match the nodes to return</param>
224+
/// <param name="recurseOnMatch">If a match is found continues down the tree matching child elements</param>
225+
[<Extension>]
226+
static member DescendantsWithPath(doc: HtmlDocument, predicate, recurseOnMatch) =
227+
HtmlDocument.descendantsWithPath recurseOnMatch predicate doc
228+
229+
/// <summary>
230+
/// Gets all of the descendants of this document that statisfy the given predicate
231+
/// Recurses on match
232+
/// </summary>
233+
/// <param name="doc">The given document</param>
234+
/// <param name="predicate">The predicate by which to match the nodes to return</param>
235+
[<Extension>]
236+
static member DescendantsWithPath(doc: HtmlDocument, predicate) =
237+
let recurseOnMatch = true
238+
HtmlDocument.descendantsWithPath recurseOnMatch predicate doc
239+
240+
/// Gets all of the descendants of this document
241+
/// Recurses on match
242+
[<Extension>]
243+
static member DescendantsWithPath(doc: HtmlDocument) =
244+
let recurseOnMatch = true
245+
let predicate = fun _ -> true
246+
HtmlDocument.descendantsWithPath recurseOnMatch predicate doc
247+
248+
/// <summary>
249+
/// Finds all of the descendant nodes of this document that match the given set of names
250+
/// </summary>
251+
/// <param name="doc">The given document</param>
252+
/// <param name="names">The set of names to match</param>
253+
/// <param name="recurseOnMatch">If a match is found continues down the tree matching child elements</param>
254+
[<Extension>]
255+
static member DescendantsWithPath(doc: HtmlDocument, names: seq<string>, recurseOnMatch) =
256+
HtmlDocument.descendantsNamedWithPath recurseOnMatch names doc
257+
258+
/// <summary>
259+
/// Finds all of the descendant nodes of this document that match the given set of names
260+
/// Recurses on match
261+
/// </summary>
262+
/// <param name="doc">The given document</param>
263+
/// <param name="names">The set of names to match</param>
264+
[<Extension>]
265+
static member DescendantsWithPath(doc: HtmlDocument, names: seq<string>) =
266+
let recurseOnMatch = true
267+
HtmlDocument.descendantsNamedWithPath recurseOnMatch names doc
268+
269+
/// <summary>
270+
/// Finds all of the descendant nodes of this document that match the given name
271+
/// </summary>
272+
/// <param name="doc">The given document</param>
273+
/// <param name="name">The name to match</param>
274+
/// <param name="recurseOnMatch">If a match is found continues down the tree matching child elements</param>
275+
[<Extension>]
276+
static member DescendantsWithPath(doc: HtmlDocument, name: string, recurseOnMatch) =
277+
HtmlDocument.descendantsNamedWithPath recurseOnMatch [ name ] doc
278+
279+
/// <summary>
280+
/// Finds all of the descendant nodes of this document that match the given name
281+
/// Recurses on match
282+
/// </summary>
283+
/// <param name="doc">The given document</param>
284+
/// <param name="name">The name to match</param>
285+
[<Extension>]
286+
static member DescendantsWithPath(doc: HtmlDocument, name: string) =
287+
let recurseOnMatch = true
288+
HtmlDocument.descendantsNamedWithPath recurseOnMatch [ name ] doc
289+
290+
/// Finds the body element of the given document,
291+
/// this throws an exception if no body element exists.
292+
[<Extension>]
293+
static member Body(doc: HtmlDocument) = HtmlDocument.body doc
294+
295+
/// Tries to find the body element of the given document.
296+
[<Extension>]
297+
static member TryGetBody(doc: HtmlDocument) = HtmlDocument.tryGetBody doc
298+
299+
/// Finds the html element of the given document,
300+
/// this throws an exception if no html element exists.
301+
[<Extension>]
302+
static member Html(doc: HtmlDocument) = HtmlDocument.html doc
303+
304+
/// Tries to find the html element of the given document.
305+
[<Extension>]
306+
static member TryGetHtml(doc: HtmlDocument) = HtmlDocument.tryGetHtml doc
307+
308+
// --------------------------------------------------------------------------------------
309+
310+
[<CompilationRepresentation(CompilationRepresentationFlags.ModuleSuffix)>]
311+
/// Provides the dynamic operator for getting attribute values from HTML elements
312+
module HtmlExtensions =
313+
314+
/// Gets the value of an attribute from an HTML element
315+
let (?) (node: HtmlNode) name = HtmlNode.attributeValue name node

0 commit comments

Comments
 (0)