Skip to content

Commit 3ea431d

Browse files
author
Bytekeeper
committed
Some fiddling with the grouping algorithm
1 parent 92fab56 commit 3ea431d

File tree

2 files changed

+24
-13
lines changed

2 files changed

+24
-13
lines changed

src/main/java/org/stt/text/CommonPrefixGrouper.java

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,18 +42,29 @@ public List<Group> getGroupsOf(String text) {
4242
int i = 0;
4343
int start = 0;
4444
while (i < n && node != null) {
45-
char lastChar = (char) -1;
46-
while (i < n && node != null && (node.numChildren() <= 1 || i < start + 3)) {
47-
lastChar = chars[i];
48-
node = node.child(lastChar);
45+
int lastGood = i;
46+
do {
47+
if (!Character.isWhitespace(chars[i])) {
48+
lastGood = i;
49+
}
50+
node = node.child(chars[i]);
4951
i++;
52+
} while (i < n && node != null && node.numChildren() <= 1);
53+
do {
54+
if (lastGood >= i && node != null) {
55+
node = node.child(chars[i]);
56+
i++;
57+
}
58+
lastGood++;
5059
}
51-
while (i < n && node != null && lastChar != ' ') {
52-
lastChar = chars[i];
53-
node = node.child(lastChar);
60+
while (lastGood < n && (!Character.isWhitespace(chars[lastGood]) || lastGood - start < 3));
61+
groups.add(new Group(Type.MATCH, text.substring(start, lastGood), new IntRange(start, lastGood)));
62+
while (i < n && Character.isWhitespace(chars[i])) {
63+
if (node != null) {
64+
node = node.child(chars[i]);
65+
}
5466
i++;
5567
}
56-
groups.add(new Group(Type.MATCH, text.substring(start, i), new IntRange(start, i)));
5768
start = i;
5869
}
5970
if (i < n) {

src/test/java/org/stt/text/CommonPrefixGrouperTest.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ public void shouldFindGroupsWithSpaces() {
9999
List<String> result = groupsAsString(firstComment);
100100

101101
// THEN
102-
assertThat(result, is(Arrays.asList("group subgroup ", "one")));
102+
assertThat(result, is(Arrays.asList("group subgroup", "one")));
103103

104104
}
105105

@@ -117,8 +117,8 @@ public void shouldFindSubGroups() {
117117

118118
// THEN
119119
assertThat(withThreeGroups,
120-
is(Arrays.asList("group subgroup ", "one")));
121-
assertThat(withTwoGroups, is(Arrays.asList("group subgroup2 ", "one")));
120+
is(Arrays.asList("group subgroup", "one")));
121+
assertThat(withTwoGroups, is(Arrays.asList("group subgroup2", "one")));
122122
}
123123

124124
@Test
@@ -131,7 +131,7 @@ public void shouldFindLongestCommonPrefix() {
131131
List<String> groups = groupsAsString(firstComment);
132132

133133
// THEN
134-
assertThat(groups, is(Arrays.asList("group ", "one")));
134+
assertThat(groups, is(Arrays.asList("group", "one")));
135135

136136
}
137137

@@ -161,7 +161,7 @@ public void shouldCutGroupAtShorterItem()
161161
List<String> result = groupsAsString("aaaa bbbb cccc dddd");
162162

163163
// THEN
164-
assertThat(result, is(Arrays.asList("aaaa ", "bbbb ", "cccc ", "dddd")));
164+
assertThat(result, is(Arrays.asList("aaaa", "bbbb", "cccc", "dddd")));
165165
}
166166

167167
private void givenReaderReturnsItemsWithComment(

0 commit comments

Comments
 (0)