Skip to content

Commit e193540

Browse files
committed
Some files has h1 in multiple lines.
1 parent 859662c commit e193540

1 file changed

Lines changed: 22 additions & 9 deletions

File tree

converter/generator/DocTransformer.cs

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
using System.Net;
1+
using System.Buffers;
2+
using System.Net;
23
using System.Text.Json;
34
using System.Text.RegularExpressions;
45
using System.Xml.Linq;
@@ -147,22 +148,34 @@ protected void Transform(string sourceFile, string destinationFile, string langu
147148
document.ToHtml(sw, HtmlMarkupFormatter.Instance);
148149
}
149150

150-
protected static string GetPageTitle(string sourceFile)
151+
protected string GetPageTitle(string sourceFile)
151152
{
152-
using var reader = new StreamReader(sourceFile);
153+
string title = null!;
153154

154-
while (reader.ReadLine() is string line)
155+
using var reader = new StreamReader(sourceFile);
156+
var buffer = ArrayPool<char>.Shared.Rent(1024);
157+
var read = reader.ReadBlock(buffer);
158+
if (read > 0)
155159
{
156-
if (HeaderRegex.Match(line) is { Success: true } match)
160+
foreach (var match in HeaderRegex.EnumerateMatches(buffer.AsSpan(0, read)))
157161
{
158162
var parser = new HtmlParser();
159-
var doc = parser.ParseDocument(match.Value);
163+
var doc = parser.ParseDocument(buffer.AsMemory(match.Index, match.Length));
160164

161-
return doc.QuerySelector("h1")!.Text();
165+
title = doc.QuerySelector("h1")!.Text();
166+
break;
162167
}
163168
}
164169

165-
return "";
170+
if (title is null)
171+
{
172+
title = "";
173+
ReportProblem(sourceFile, "Missing h1");
174+
}
175+
176+
ArrayPool<char>.Shared.Return(buffer);
177+
178+
return title;
166179
}
167180

168181
void Transform(IHtmlDocument document, string sourceFile, string language, in Nav nav, INodeList? headerNodes, INodeList? bannerNodes, INodeList? footerNodes)
@@ -548,6 +561,6 @@ public void PrintProblems()
548561
}
549562
}
550563

551-
[GeneratedRegex(@"<h1[^>]*>.*?</h1>")]
564+
[GeneratedRegex(@"<h1[^>]*>.*?</h1>", RegexOptions.Singleline | RegexOptions.IgnoreCase)]
552565
private static partial Regex HeaderRegex { get; }
553566
}

0 commit comments

Comments
 (0)