Skip to content

Commit 8239a4f

Browse files
authored
Merge pull request #12069 from SubtitleEdit/perf/hot-path-fixes
Optimize waveform render loop and per-line string utility hot paths
2 parents eddb038 + 100b727 commit 8239a4f

7 files changed

Lines changed: 231 additions & 54 deletions

File tree

src/libse/Common/HtmlUtil.cs

Lines changed: 74 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -657,21 +657,71 @@ public static string FixUpperTags(string input)
657657
return input;
658658
}
659659

660-
var text = input;
661-
var idx = text.IndexOfAny(UppercaseTags, StringComparison.Ordinal);
662-
while (idx >= 0)
660+
// Single forward pass - the old rescan-from-zero loop did eight IndexOf passes plus
661+
// two full-string copies per fixed tag, and this runs per line from AutoBreakLine.
662+
char[] chars = null;
663+
var i = 0;
664+
while (i < input.Length)
663665
{
664-
var endIdx = text.IndexOf('>', idx + 2);
665-
if (endIdx < idx)
666+
if (input[i] == '<' && StartsWithUppercaseTag(input, i))
666667
{
667-
break;
668+
var endIdx = input.IndexOf('>', i + 2);
669+
if (endIdx < 0)
670+
{
671+
break;
672+
}
673+
674+
chars ??= input.ToCharArray();
675+
for (var k = i; k < endIdx; k++)
676+
{
677+
chars[k] = char.ToLowerInvariant(chars[k]);
678+
}
679+
680+
i = endIdx + 1;
681+
continue;
668682
}
669683

670-
var tag = text.Substring(idx, endIdx - idx).ToLowerInvariant();
671-
text = text.Remove(idx, endIdx - idx).Insert(idx, tag);
672-
idx = text.IndexOfAny(UppercaseTags, StringComparison.Ordinal);
684+
i++;
673685
}
674-
return text;
686+
687+
return chars == null ? input : new string(chars);
688+
}
689+
690+
private static bool StartsWithUppercaseTag(string input, int index)
691+
{
692+
// Matches UppercaseTags: <I> <U> <B> <FONT </I> </U> </B> </FONT>
693+
var remaining = input.Length - index;
694+
if (remaining < 3)
695+
{
696+
return false;
697+
}
698+
699+
var c1 = input[index + 1];
700+
if (c1 == 'I' || c1 == 'U' || c1 == 'B')
701+
{
702+
return input[index + 2] == '>';
703+
}
704+
705+
if (c1 == 'F')
706+
{
707+
return remaining >= 5 && input[index + 2] == 'O' && input[index + 3] == 'N' && input[index + 4] == 'T';
708+
}
709+
710+
if (c1 == '/' && remaining >= 4)
711+
{
712+
var c2 = input[index + 2];
713+
if (c2 == 'I' || c2 == 'U' || c2 == 'B')
714+
{
715+
return input[index + 3] == '>';
716+
}
717+
718+
if (c2 == 'F')
719+
{
720+
return remaining >= 7 && input[index + 3] == 'O' && input[index + 4] == 'N' && input[index + 5] == 'T' && input[index + 6] == '>';
721+
}
722+
}
723+
724+
return false;
675725
}
676726

677727
/// <summary>
@@ -1399,18 +1449,19 @@ public static SKColor GetColorFromString(string s)
13991449
/// </summary>
14001450
/// <param name="input">The string from which to remove color tags.</param>
14011451
/// <returns>A new string with color tags removed.</returns>
1452+
private static readonly Regex ColorAttributeRegex = new Regex("[ ]*(COLOR|color|Color)=[\"']*[#\\dA-Za-z]*[\"']*[ ]*", RegexOptions.Compiled);
1453+
14021454
public static string RemoveColorTags(string input)
14031455
{
1404-
var r = new Regex("[ ]*(COLOR|color|Color)=[\"']*[#\\dA-Za-z]*[\"']*[ ]*");
1456+
var r = ColorAttributeRegex;
14051457
var s = input;
14061458
var match = r.Match(s);
14071459
while (match.Success)
14081460
{
14091461
s = s.Remove(match.Index, match.Value.Length).Insert(match.Index, " ");
14101462
if (match.Index > 4)
14111463
{
1412-
var font = s.Substring(match.Index - 5);
1413-
if (font.StartsWith("<font >", StringComparison.OrdinalIgnoreCase))
1464+
if (string.Compare(s, match.Index - 5, "<font >", 0, 7, StringComparison.OrdinalIgnoreCase) == 0)
14141465
{
14151466
s = s.Remove(match.Index - 5, 7);
14161467
var endIndex = s.IndexOf("</font>", match.Index - 5, StringComparison.OrdinalIgnoreCase);
@@ -1447,6 +1498,11 @@ public static string RemoveColorTags(string input)
14471498
return s.Trim();
14481499
}
14491500

1501+
private static readonly Regex FontFaceAttributeRegex = new Regex("[ ]*(FACE|face|Face)=[\"']*[\\d\\p{L} ]*[\"']*[ ]*", RegexOptions.Compiled);
1502+
private static readonly Regex AssaFontNameOnlyTagRegex = new Regex("{\\\\fn[a-zA-Z \\d]+}", RegexOptions.Compiled);
1503+
private static readonly Regex AssaFontNameLastTagRegex = new Regex("\\\\fn[a-zA-Z \\d]+}", RegexOptions.Compiled);
1504+
private static readonly Regex AssaFontNameInnerTagRegex = new Regex("\\\\fn[a-zA-Z \\d]+\\\\", RegexOptions.Compiled);
1505+
14501506
/// <summary>
14511507
/// Remove font tag from HTML or ASSA.
14521508
/// </summary>
@@ -1457,24 +1513,23 @@ public static string RemoveFontName(string input)
14571513
var x = input;
14581514
if (x.Contains("\\fn"))
14591515
{
1460-
x = Regex.Replace(x, "{\\\\fn[a-zA-Z \\d]+}", string.Empty);
1461-
x = Regex.Replace(x, "\\\\fn[a-zA-Z \\d]+}", "}");
1462-
x = Regex.Replace(x, "\\\\fn[a-zA-Z \\d]+\\\\", "\\");
1516+
x = AssaFontNameOnlyTagRegex.Replace(x, string.Empty);
1517+
x = AssaFontNameLastTagRegex.Replace(x, "}");
1518+
x = AssaFontNameInnerTagRegex.Replace(x, "\\");
14631519
}
14641520

14651521
return x;
14661522
}
14671523

1468-
var r = new Regex("[ ]*(FACE|face|Face)=[\"']*[\\d\\p{L} ]*[\"']*[ ]*");
1524+
var r = FontFaceAttributeRegex;
14691525
var s = input;
14701526
var match = r.Match(s);
14711527
while (match.Success)
14721528
{
14731529
s = s.Remove(match.Index, match.Value.Length).Insert(match.Index, " ");
14741530
if (match.Index > 4)
14751531
{
1476-
var font = s.Substring(match.Index - 5);
1477-
if (font.StartsWith("<font >", StringComparison.OrdinalIgnoreCase))
1532+
if (string.Compare(s, match.Index - 5, "<font >", 0, 7, StringComparison.OrdinalIgnoreCase) == 0)
14781533
{
14791534
s = s.Remove(match.Index - 5, 7);
14801535
var endIndex = s.IndexOf("</font>", match.Index - 5, StringComparison.OrdinalIgnoreCase);

src/libse/Common/RegexUtils.cs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,13 +259,31 @@ public static string ReplaceNewLineSafe(Regex regularExpression, string text, st
259259
{
260260
// Match/return with line-feed-normalized line breaks so a pattern's \n (see FixNewLine)
261261
// matches regardless of whether the in-memory text uses \n or \r\n (#11956).
262+
263+
// Fast path (runs per replace rule per subtitle line): text without \r or U+2028 round-trips
264+
// unchanged through the SplitToLines+Join normalization, so skip the four allocations.
265+
if (!ContainsNonLineFeedNewLine(text) && !ContainsNonLineFeedNewLine(replaceWith))
266+
{
267+
return regularExpression.Replace(text, replaceWith, count, startIndex);
268+
}
269+
262270
text = regularExpression.Replace(string.Join("\n", text.SplitToLines()), replaceWith, count, startIndex);
263271
return string.Join("\n", text.SplitToLines());
264272
}
265273

266274
public static int CountNewLineSafe(Regex regularExpression, string text)
267275
{
276+
if (!ContainsNonLineFeedNewLine(text))
277+
{
278+
return regularExpression.Matches(text).Count;
279+
}
280+
268281
return regularExpression.Matches(string.Join("\n", text.SplitToLines())).Count;
269282
}
283+
284+
private static bool ContainsNonLineFeedNewLine(string text)
285+
{
286+
return text.IndexOf('\r') >= 0 || text.IndexOf('\u2028') >= 0;
287+
}
270288
}
271289
}

src/libse/Common/StringExtensions.cs

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,26 @@ public static List<string> SplitToLines(this string s, int max)
177177

178178
public static int CountWords(this string source)
179179
{
180-
return HtmlUtil.RemoveHtmlTags(source, true).Split(new[] { ' ', '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries).Length;
180+
// Called per line on grid repaints (words-per-minute) - count boundaries directly
181+
// instead of allocating a separator array plus one substring per word.
182+
var text = HtmlUtil.RemoveHtmlTags(source, true);
183+
var count = 0;
184+
var inWord = false;
185+
for (var i = 0; i < text.Length; i++)
186+
{
187+
var ch = text[i];
188+
if (ch == ' ' || ch == '\n' || ch == '\r')
189+
{
190+
inWord = false;
191+
}
192+
else if (!inWord)
193+
{
194+
inWord = true;
195+
count++;
196+
}
197+
}
198+
199+
return count;
181200
}
182201

183202
// http://www.codeproject.com/Articles/43726/Optimizing-string-operations-in-C
@@ -598,9 +617,8 @@ private static List<KeyValuePair<int, string>> RemoveAndSaveTags(string input, S
598617
var ch = input[index];
599618

600619
if (!tagOn && isAssa && ch == '\\'
601-
&& (input.Substring(index).StartsWith("\\N")
602-
|| input.Substring(index).StartsWith("\\n")
603-
|| input.Substring(index).StartsWith("\\h")))
620+
&& index + 1 < input.Length
621+
&& (input[index + 1] == 'N' || input[index + 1] == 'n' || input[index + 1] == 'h'))
604622
{
605623
tags.Add(new KeyValuePair<int, string>(index, input.Substring(index, 2)));
606624
skipNext = true;
@@ -618,7 +636,7 @@ private static List<KeyValuePair<int, string>> RemoveAndSaveTags(string input, S
618636

619637
if (!tagOn && ch == '<')
620638
{
621-
var s = input.Substring(index);
639+
var s = input.AsSpan(index);
622640
if (
623641
s.StartsWith("<i>", StringComparison.OrdinalIgnoreCase) ||
624642
s.StartsWith("</i>", StringComparison.OrdinalIgnoreCase) ||
@@ -647,7 +665,7 @@ private static List<KeyValuePair<int, string>> RemoveAndSaveTags(string input, S
647665
}
648666
else if (!tagOn && ch == '{')
649667
{
650-
var s = input.Substring(index);
668+
var s = input.AsSpan(index);
651669
if (s.StartsWith("{\\", StringComparison.Ordinal))
652670
{
653671
tagOn = true;

src/libse/Common/TextLengthCalculator/CalcFactory.cs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,18 @@ public static class CalcFactory
2222

2323
public static ICalcLength MakeCalculator(string strategy)
2424
{
25-
var c = Calculators.FirstOrDefault(calculator => calculator.GetType().Name == strategy);
26-
return c ?? new CalcAll();
25+
// Called per line on grid repaints (characters-per-second) - avoid the
26+
// LINQ closure and the fallback allocation; the list entries are shared anyway.
27+
var calculators = Calculators;
28+
for (var i = 0; i < calculators.Count; i++)
29+
{
30+
if (calculators[i].GetType().Name == strategy)
31+
{
32+
return calculators[i];
33+
}
34+
}
35+
36+
return calculators[0]; // CalcAll
2737
}
2838
}
2939
}

src/libse/Common/Utilities.cs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -224,8 +224,7 @@ internal static bool CanBreak(string s, int index, string language)
224224
string s2 = s.Substring(0, index);
225225
if (Configuration.Settings.Tools.UseNoLineBreakAfter)
226226
{
227-
var noBreakList = NoBreakAfterList(language).ToArray();
228-
foreach (NoBreakAfterItem ending in noBreakList)
227+
foreach (NoBreakAfterItem ending in NoBreakAfterList(language))
229228
{
230229
if (ending.IsMatch(s2))
231230
{

0 commit comments

Comments
 (0)