-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSentenceUtilities.cs
More file actions
131 lines (100 loc) · 3.97 KB
/
SentenceUtilities.cs
File metadata and controls
131 lines (100 loc) · 3.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
using System;
namespace KemTranslate
{
internal static class SentenceUtilities
{
public static bool TryGetSentenceRange(string text, int offset, out int start, out int length)
{
start = -1;
length = 0;
if (string.IsNullOrWhiteSpace(text) || offset < 0 || offset >= text.Length)
return false;
int sentenceStart = offset;
while (sentenceStart > 0 && !IsSentenceBoundary(text[sentenceStart - 1]))
sentenceStart--;
int sentenceEnd = offset;
while (sentenceEnd < text.Length && !IsSentenceBoundary(text[sentenceEnd]))
sentenceEnd++;
if (sentenceEnd < text.Length && (text[sentenceEnd] == '.' || text[sentenceEnd] == '!' || text[sentenceEnd] == '?'))
sentenceEnd++;
while (sentenceStart < sentenceEnd && char.IsWhiteSpace(text[sentenceStart]))
sentenceStart++;
while (sentenceEnd > sentenceStart && char.IsWhiteSpace(text[sentenceEnd - 1]))
sentenceEnd--;
if (sentenceEnd <= sentenceStart)
return false;
start = sentenceStart;
length = sentenceEnd - sentenceStart;
return true;
}
public static bool TryGetSentenceOrdinal(string text, int offset, out int ordinal)
{
ordinal = -1;
if (string.IsNullOrWhiteSpace(text) || offset < 0 || offset >= text.Length)
return false;
int cursor = 0;
int index = 0;
while (TryGetSentenceRangeByCursor(text, ref cursor, out var start, out var length))
{
if (offset >= start && offset < start + length)
{
ordinal = index;
return true;
}
index++;
}
return false;
}
public static bool TryGetSentenceRangeByOrdinal(string text, int ordinal, out int start, out int length)
{
start = -1;
length = 0;
if (ordinal < 0 || string.IsNullOrWhiteSpace(text))
return false;
int cursor = 0;
int index = 0;
while (TryGetSentenceRangeByCursor(text, ref cursor, out start, out length))
{
if (index == ordinal)
return true;
index++;
}
start = -1;
length = 0;
return false;
}
public static bool TryGetSentenceRangeByCursor(string text, ref int cursor, out int start, out int length)
{
start = -1;
length = 0;
if (string.IsNullOrWhiteSpace(text))
return false;
while (cursor < text.Length && char.IsWhiteSpace(text[cursor]))
cursor++;
if (cursor >= text.Length)
return false;
int sentenceStart = cursor;
while (cursor < text.Length && !IsSentenceBoundary(text[cursor]))
cursor++;
int sentenceEnd = cursor;
if (cursor < text.Length && (text[cursor] == '.' || text[cursor] == '!' || text[cursor] == '?'))
{
sentenceEnd = cursor + 1;
cursor++;
}
while (sentenceStart < sentenceEnd && char.IsWhiteSpace(text[sentenceStart]))
sentenceStart++;
while (sentenceEnd > sentenceStart && char.IsWhiteSpace(text[sentenceEnd - 1]))
sentenceEnd--;
if (sentenceEnd <= sentenceStart)
return TryGetSentenceRangeByCursor(text, ref cursor, out start, out length);
start = sentenceStart;
length = sentenceEnd - sentenceStart;
return true;
}
public static bool IsSentenceBoundary(char c)
{
return c == '.' || c == '!' || c == '?' || c == '\r' || c == '\n';
}
}
}