11using System . Net ;
22using System . Text . Json ;
3+ using System . Text . RegularExpressions ;
34using System . Xml . Linq ;
45using AngleSharp . Common ;
56using AngleSharp . Dom ;
1112
1213namespace OriginLab . DocumentGeneration ;
1314
14- internal abstract class DocTransformer
15+ internal abstract partial class DocTransformer
1516{
1617 protected string SourceFolder { get ; }
1718 protected string SourceFolderEn { get ; }
@@ -148,14 +149,21 @@ protected void Transform(string sourceFile, string destinationFile, string langu
148149
149150 protected static string GetPageTitle ( string sourceFile )
150151 {
151- using var fs = new FileStream ( sourceFile , FileMode . Open , FileAccess . Read ) ;
152- var parser = new HtmlParser ( ) ;
153- var document = parser . ParseDocument ( fs ) ;
152+ using var reader = new StreamReader ( sourceFile ) ;
154153
155- return GetFirstHeading ( document ) ;
156- }
154+ while ( reader . ReadLine ( ) is string line )
155+ {
156+ if ( HeaderRegex . Match ( line ) is { Success : true } match )
157+ {
158+ var parser = new HtmlParser ( ) ;
159+ var doc = parser . ParseDocument ( match . Value ) ;
157160
158- private static string GetFirstHeading ( IHtmlDocument document ) => document . QuerySelector ( "h1" ) ? . Text ( ) ?? "" ;
161+ return doc . QuerySelector ( "h1" ) ! . Text ( ) ;
162+ }
163+ }
164+
165+ return "" ;
166+ }
159167
160168 void Transform ( IHtmlDocument document , string sourceFile , string language , in Nav nav , INodeList ? headerNodes , INodeList ? bannerNodes , INodeList ? footerNodes )
161169 {
@@ -211,7 +219,7 @@ void Transform(IHtmlDocument document, string sourceFile, string language, in Na
211219
212220 private static void CleanUp ( IHtmlDocument document )
213221 {
214- document . Title = GetFirstHeading ( document ) ;
222+ document . Title = document . QuerySelector ( "h1" ) ? . Text ( ) ?? "" ;
215223
216224 document . QuerySelectorAll < IHtmlSpanElement > ( "span.mw-editsection" ) . Remove ( ) ;
217225 }
@@ -517,4 +525,7 @@ public void PrintProblems()
517525 }
518526 }
519527 }
528+
529+ [ GeneratedRegex ( @"<h1[^>]*>.*?</h1>" ) ]
530+ private static partial Regex HeaderRegex { get ; }
520531}
0 commit comments