-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDiffUtilities.cs
More file actions
94 lines (81 loc) · 3.04 KB
/
DiffUtilities.cs
File metadata and controls
94 lines (81 loc) · 3.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
namespace KemTranslate
{
internal static class DiffUtilities
{
private static readonly Regex DiffTokenRegex = new(@"\w+|\s+|[^\w\s]+", RegexOptions.Compiled);
public static LanguageToolWriteResult BuildDiffWriteResult(string originalText, string updatedText)
{
var originalTokens = TokenizeForDiff(originalText);
var updatedTokens = TokenizeForDiff(updatedText);
var commonUpdated = GetCommonUpdatedTokenFlags(originalTokens, updatedTokens);
var segments = new List<LanguageToolSegment>();
for (int i = 0; i < updatedTokens.Count; i++)
{
bool isChanged = !commonUpdated[i] && !string.IsNullOrWhiteSpace(updatedTokens[i]);
if (segments.Count > 0 && segments[^1].IsChanged == isChanged)
{
segments[^1].Text += updatedTokens[i];
continue;
}
segments.Add(new LanguageToolSegment { Text = updatedTokens[i], IsChanged = isChanged });
}
if (segments.Count == 0)
segments.Add(new LanguageToolSegment { Text = updatedText });
return new LanguageToolWriteResult
{
CorrectedText = updatedText,
Segments = segments
};
}
public static List<string> TokenizeForDiff(string text)
{
if (string.IsNullOrEmpty(text))
return [];
var matches = DiffTokenRegex.Matches(text);
if (matches.Count == 0)
return [text];
return matches.Select(x => x.Value).ToList();
}
public static bool[] GetCommonUpdatedTokenFlags(IReadOnlyList<string> originalTokens, IReadOnlyList<string> updatedTokens)
{
int n = originalTokens.Count;
int m = updatedTokens.Count;
var dp = new int[n + 1, m + 1];
for (int i = n - 1; i >= 0; i--)
{
for (int j = m - 1; j >= 0; j--)
{
if (string.Equals(originalTokens[i], updatedTokens[j], StringComparison.Ordinal))
dp[i, j] = dp[i + 1, j + 1] + 1;
else
dp[i, j] = Math.Max(dp[i + 1, j], dp[i, j + 1]);
}
}
var commonUpdated = new bool[m];
int x = 0;
int y = 0;
while (x < n && y < m)
{
if (string.Equals(originalTokens[x], updatedTokens[y], StringComparison.Ordinal))
{
commonUpdated[y] = true;
x++;
y++;
}
else if (dp[x + 1, y] >= dp[x, y + 1])
{
x++;
}
else
{
y++;
}
}
return commonUpdated;
}
}
}