Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 74 additions & 19 deletions src/libse/Common/HtmlUtil.cs
Original file line number Diff line number Diff line change
Expand Up @@ -657,21 +657,71 @@ public static string FixUpperTags(string input)
return input;
}

var text = input;
var idx = text.IndexOfAny(UppercaseTags, StringComparison.Ordinal);
while (idx >= 0)
// Single forward pass - the old rescan-from-zero loop did eight IndexOf passes plus
// two full-string copies per fixed tag, and this runs per line from AutoBreakLine.
char[] chars = null;
var i = 0;
while (i < input.Length)
{
var endIdx = text.IndexOf('>', idx + 2);
if (endIdx < idx)
if (input[i] == '<' && StartsWithUppercaseTag(input, i))
{
break;
var endIdx = input.IndexOf('>', i + 2);
if (endIdx < 0)
{
break;
}

chars ??= input.ToCharArray();
for (var k = i; k < endIdx; k++)
{
chars[k] = char.ToLowerInvariant(chars[k]);
}

i = endIdx + 1;
continue;
}

var tag = text.Substring(idx, endIdx - idx).ToLowerInvariant();
text = text.Remove(idx, endIdx - idx).Insert(idx, tag);
idx = text.IndexOfAny(UppercaseTags, StringComparison.Ordinal);
i++;
}
return text;

return chars == null ? input : new string(chars);
}

private static bool StartsWithUppercaseTag(string input, int index)
{
// Matches UppercaseTags: <I> <U> <B> <FONT </I> </U> </B> </FONT>
var remaining = input.Length - index;
if (remaining < 3)
{
return false;
}

var c1 = input[index + 1];
if (c1 == 'I' || c1 == 'U' || c1 == 'B')
{
return input[index + 2] == '>';
}

if (c1 == 'F')
{
return remaining >= 5 && input[index + 2] == 'O' && input[index + 3] == 'N' && input[index + 4] == 'T';
}

if (c1 == '/' && remaining >= 4)
{
var c2 = input[index + 2];
if (c2 == 'I' || c2 == 'U' || c2 == 'B')
{
return input[index + 3] == '>';
}

if (c2 == 'F')
{
return remaining >= 7 && input[index + 3] == 'O' && input[index + 4] == 'N' && input[index + 5] == 'T' && input[index + 6] == '>';
}
}

return false;
}

/// <summary>
Expand Down Expand Up @@ -1399,18 +1449,19 @@ public static SKColor GetColorFromString(string s)
/// </summary>
/// <param name="input">The string from which to remove color tags.</param>
/// <returns>A new string with color tags removed.</returns>
private static readonly Regex ColorAttributeRegex = new Regex("[ ]*(COLOR|color|Color)=[\"']*[#\\dA-Za-z]*[\"']*[ ]*", RegexOptions.Compiled);

public static string RemoveColorTags(string input)
{
var r = new Regex("[ ]*(COLOR|color|Color)=[\"']*[#\\dA-Za-z]*[\"']*[ ]*");
var r = ColorAttributeRegex;
var s = input;
var match = r.Match(s);
while (match.Success)
{
s = s.Remove(match.Index, match.Value.Length).Insert(match.Index, " ");
if (match.Index > 4)
{
var font = s.Substring(match.Index - 5);
if (font.StartsWith("<font >", StringComparison.OrdinalIgnoreCase))
if (string.Compare(s, match.Index - 5, "<font >", 0, 7, StringComparison.OrdinalIgnoreCase) == 0)
{
s = s.Remove(match.Index - 5, 7);
var endIndex = s.IndexOf("</font>", match.Index - 5, StringComparison.OrdinalIgnoreCase);
Expand Down Expand Up @@ -1447,6 +1498,11 @@ public static string RemoveColorTags(string input)
return s.Trim();
}

private static readonly Regex FontFaceAttributeRegex = new Regex("[ ]*(FACE|face|Face)=[\"']*[\\d\\p{L} ]*[\"']*[ ]*", RegexOptions.Compiled);
private static readonly Regex AssaFontNameOnlyTagRegex = new Regex("{\\\\fn[a-zA-Z \\d]+}", RegexOptions.Compiled);
private static readonly Regex AssaFontNameLastTagRegex = new Regex("\\\\fn[a-zA-Z \\d]+}", RegexOptions.Compiled);
private static readonly Regex AssaFontNameInnerTagRegex = new Regex("\\\\fn[a-zA-Z \\d]+\\\\", RegexOptions.Compiled);

/// <summary>
/// Remove font tag from HTML or ASSA.
/// </summary>
Expand All @@ -1457,24 +1513,23 @@ public static string RemoveFontName(string input)
var x = input;
if (x.Contains("\\fn"))
{
x = Regex.Replace(x, "{\\\\fn[a-zA-Z \\d]+}", string.Empty);
x = Regex.Replace(x, "\\\\fn[a-zA-Z \\d]+}", "}");
x = Regex.Replace(x, "\\\\fn[a-zA-Z \\d]+\\\\", "\\");
x = AssaFontNameOnlyTagRegex.Replace(x, string.Empty);
x = AssaFontNameLastTagRegex.Replace(x, "}");
x = AssaFontNameInnerTagRegex.Replace(x, "\\");
}

return x;
}

var r = new Regex("[ ]*(FACE|face|Face)=[\"']*[\\d\\p{L} ]*[\"']*[ ]*");
var r = FontFaceAttributeRegex;
var s = input;
var match = r.Match(s);
while (match.Success)
{
s = s.Remove(match.Index, match.Value.Length).Insert(match.Index, " ");
if (match.Index > 4)
{
var font = s.Substring(match.Index - 5);
if (font.StartsWith("<font >", StringComparison.OrdinalIgnoreCase))
if (string.Compare(s, match.Index - 5, "<font >", 0, 7, StringComparison.OrdinalIgnoreCase) == 0)
{
s = s.Remove(match.Index - 5, 7);
var endIndex = s.IndexOf("</font>", match.Index - 5, StringComparison.OrdinalIgnoreCase);
Expand Down
18 changes: 18 additions & 0 deletions src/libse/Common/RegexUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -259,13 +259,31 @@ public static string ReplaceNewLineSafe(Regex regularExpression, string text, st
{
// Match/return with line-feed-normalized line breaks so a pattern's \n (see FixNewLine)
// matches regardless of whether the in-memory text uses \n or \r\n (#11956).

// Fast path (runs per replace rule per subtitle line): text without \r or U+2028 round-trips
// unchanged through the SplitToLines+Join normalization, so skip the four allocations.
if (!ContainsNonLineFeedNewLine(text) && !ContainsNonLineFeedNewLine(replaceWith))
{
return regularExpression.Replace(text, replaceWith, count, startIndex);
}

text = regularExpression.Replace(string.Join("\n", text.SplitToLines()), replaceWith, count, startIndex);
return string.Join("\n", text.SplitToLines());
}

public static int CountNewLineSafe(Regex regularExpression, string text)
{
if (!ContainsNonLineFeedNewLine(text))
{
return regularExpression.Matches(text).Count;
}

return regularExpression.Matches(string.Join("\n", text.SplitToLines())).Count;
}

private static bool ContainsNonLineFeedNewLine(string text)
{
return text.IndexOf('\r') >= 0 || text.IndexOf('\u2028') >= 0;
}
}
}
30 changes: 24 additions & 6 deletions src/libse/Common/StringExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,26 @@ public static List<string> SplitToLines(this string s, int max)

public static int CountWords(this string source)
{
return HtmlUtil.RemoveHtmlTags(source, true).Split(new[] { ' ', '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries).Length;
// Called per line on grid repaints (words-per-minute) - count boundaries directly
// instead of allocating a separator array plus one substring per word.
var text = HtmlUtil.RemoveHtmlTags(source, true);
var count = 0;
var inWord = false;
for (var i = 0; i < text.Length; i++)
{
var ch = text[i];
if (ch == ' ' || ch == '\n' || ch == '\r')
{
inWord = false;
}
else if (!inWord)
{
inWord = true;
count++;
}
}

return count;
}

// http://www.codeproject.com/Articles/43726/Optimizing-string-operations-in-C
Expand Down Expand Up @@ -598,9 +617,8 @@ private static List<KeyValuePair<int, string>> RemoveAndSaveTags(string input, S
var ch = input[index];

if (!tagOn && isAssa && ch == '\\'
&& (input.Substring(index).StartsWith("\\N")
|| input.Substring(index).StartsWith("\\n")
|| input.Substring(index).StartsWith("\\h")))
&& index + 1 < input.Length
&& (input[index + 1] == 'N' || input[index + 1] == 'n' || input[index + 1] == 'h'))
{
tags.Add(new KeyValuePair<int, string>(index, input.Substring(index, 2)));
skipNext = true;
Expand All @@ -618,7 +636,7 @@ private static List<KeyValuePair<int, string>> RemoveAndSaveTags(string input, S

if (!tagOn && ch == '<')
{
var s = input.Substring(index);
var s = input.AsSpan(index);
if (
s.StartsWith("<i>", StringComparison.OrdinalIgnoreCase) ||
s.StartsWith("</i>", StringComparison.OrdinalIgnoreCase) ||
Expand Down Expand Up @@ -647,7 +665,7 @@ private static List<KeyValuePair<int, string>> RemoveAndSaveTags(string input, S
}
else if (!tagOn && ch == '{')
{
var s = input.Substring(index);
var s = input.AsSpan(index);
if (s.StartsWith("{\\", StringComparison.Ordinal))
{
tagOn = true;
Expand Down
14 changes: 12 additions & 2 deletions src/libse/Common/TextLengthCalculator/CalcFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,18 @@ public static class CalcFactory

public static ICalcLength MakeCalculator(string strategy)
{
var c = Calculators.FirstOrDefault(calculator => calculator.GetType().Name == strategy);
return c ?? new CalcAll();
// Called per line on grid repaints (characters-per-second) - avoid the
// LINQ closure and the fallback allocation; the list entries are shared anyway.
var calculators = Calculators;
for (var i = 0; i < calculators.Count; i++)
{
if (calculators[i].GetType().Name == strategy)
{
return calculators[i];
}
}

return calculators[0]; // CalcAll
}
}
}
3 changes: 1 addition & 2 deletions src/libse/Common/Utilities.cs
Original file line number Diff line number Diff line change
Expand Up @@ -224,8 +224,7 @@ internal static bool CanBreak(string s, int index, string language)
string s2 = s.Substring(0, index);
if (Configuration.Settings.Tools.UseNoLineBreakAfter)
{
var noBreakList = NoBreakAfterList(language).ToArray();
foreach (NoBreakAfterItem ending in noBreakList)
foreach (NoBreakAfterItem ending in NoBreakAfterList(language))
{
if (ending.IsMatch(s2))
{
Expand Down
Loading