-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathRegexAssetLinkDetector.cs
More file actions
81 lines (66 loc) · 2.61 KB
/
RegexAssetLinkDetector.cs
File metadata and controls
81 lines (66 loc) · 2.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
using System.Runtime.CompilerServices;
using System.Text.RegularExpressions;
using OwlCore.Storage;
namespace WindowsAppCommunity.Blog.Assets;
/// <summary>
/// Detects relative asset links in markdown and HTML text.
/// </summary>
public sealed partial class RegexAssetLinkDetector : IAssetLinkDetector
{
/// <summary>
/// Regex pattern for markdown links and images.
/// </summary>
[GeneratedRegex("""!?\[[^\]]*\]\((?<path>[^)\s]+)(?:\s+[^)]*)?\)""", RegexOptions.Compiled)]
private static partial Regex MarkdownLinkPattern();
/// <summary>
/// Regex pattern for HTML href/src attributes.
/// </summary>
[GeneratedRegex("""(?:href|src)\s*=\s*["'](?<path>[^"']+)["']""", RegexOptions.IgnoreCase | RegexOptions.Compiled)]
private static partial Regex HtmlAttributePattern();
/// <inheritdoc/>
public async IAsyncEnumerable<string> DetectAsync(IFile source, [EnumeratorCancellation] CancellationToken ct = default)
{
var text = await source.ReadTextAsync(ct);
var seen = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (Match match in MarkdownLinkPattern().Matches(text))
{
if (ct.IsCancellationRequested)
yield break;
var path = match.Groups["path"].Value;
if (!ShouldYield(path, seen))
continue;
yield return path;
}
foreach (Match match in HtmlAttributePattern().Matches(text))
{
if (ct.IsCancellationRequested)
yield break;
var path = match.Groups["path"].Value;
if (!ShouldYield(path, seen))
continue;
yield return path;
}
}
private static bool ShouldYield(string path, HashSet<string> seen)
{
if (string.IsNullOrWhiteSpace(path))
return false;
path = path.Trim().Trim('<', '>');
if (string.IsNullOrWhiteSpace(path))
return false;
if (path.StartsWith('#') || path.StartsWith('/') || path.StartsWith('\\'))
return false;
if (path.StartsWith("//", StringComparison.Ordinal))
return false;
if (path.Contains("://", StringComparison.Ordinal))
return false;
if (path.StartsWith("mailto:", StringComparison.OrdinalIgnoreCase) ||
path.StartsWith("data:", StringComparison.OrdinalIgnoreCase) ||
path.StartsWith("javascript:", StringComparison.OrdinalIgnoreCase) ||
path.StartsWith("tel:", StringComparison.OrdinalIgnoreCase))
{
return false;
}
return seen.Add(path);
}
}