Skip to content

Commit 52d8fa8

Browse files
committed
Use java.text.BreakIterator in DefaultTextDoubleClickStrategy
Replaces com.ibm.icu.text.BreakIterator with java.text.BreakIterator in DefaultTextDoubleClickStrategy and drops the matching Import-Package: com.ibm.icu.text from the bundle manifest. The JDK BreakIterator exposes the same API (getWordInstance, preceding, following, isBoundary, setText, DONE) and the existing POSIX-locale workaround for '.' not being treated as a word boundary continues to work with java.text. Removes the last com.ibm.icu reference from org.eclipse.jface.text.
1 parent bd473bb commit 52d8fa8

3 files changed

Lines changed: 153 additions & 4 deletions

File tree

bundles/org.eclipse.jface.text/META-INF/MANIFEST.MF

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ Require-Bundle:
3838
org.eclipse.text;bundle-version="[3.8.0,4.0.0)";visibility:=reexport,
3939
org.eclipse.swt;bundle-version="[3.133.0,4.0.0)",
4040
org.eclipse.jface;bundle-version="[3.39.0,4.0.0)"
41-
Import-Package: com.ibm.icu.text
4241
Bundle-RequiredExecutionEnvironment: JavaSE-21
4342
Automatic-Module-Name: org.eclipse.jface.text
4443
Bundle-Activator: org.eclipse.jface.text.Activator

bundles/org.eclipse.jface.text/src/org/eclipse/jface/text/DefaultTextDoubleClickStrategy.java

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,10 @@
1414

1515
package org.eclipse.jface.text;
1616

17+
import java.text.BreakIterator;
1718
import java.text.CharacterIterator;
1819
import java.util.Locale;
1920

20-
import com.ibm.icu.text.BreakIterator;
21-
22-
2321

2422
/**
2523
* Standard implementation of
@@ -223,9 +221,54 @@ protected IRegion findExtendedDoubleClickSelection(IDocument document, int offse
223221
* @since 3.5
224222
*/
225223
protected IRegion findWord(IDocument document, int offset) {
224+
IRegion identifier= findIdentifierAt(document, offset);
225+
if (identifier != null) {
226+
return identifier;
227+
}
226228
return findWord(document, offset, getWordBreakIterator());
227229
}
228230

231+
/**
232+
* If the offset lies on an ASCII identifier character ({@code [A-Za-z0-9_]}), or
233+
* just after one, returns the maximal contiguous identifier run. Otherwise
234+
* returns {@code null} so the caller falls back to the locale-aware
235+
* {@link BreakIterator}. This handles identifier-style words containing runs
236+
* of {@code '_'} (e.g. {@code foo__bar}, {@code __aaaa}) consistently across
237+
* JDK versions, since {@link BreakIterator#getWordInstance()} places word
238+
* boundaries between consecutive underscores while users expect such tokens
239+
* to be selected as a single word.
240+
*/
241+
private static IRegion findIdentifierAt(IDocument document, int offset) {
242+
try {
243+
IRegion line= document.getLineInformationOfOffset(offset);
244+
int lineStart= line.getOffset();
245+
int lineEnd= lineStart + line.getLength();
246+
int probe;
247+
if (offset < lineEnd && isIdentifierPart(document.getChar(offset))) {
248+
probe= offset;
249+
} else if (offset > lineStart && isIdentifierPart(document.getChar(offset - 1))) {
250+
probe= offset - 1;
251+
} else {
252+
return null;
253+
}
254+
int start= probe;
255+
while (start > lineStart && isIdentifierPart(document.getChar(start - 1))) {
256+
start--;
257+
}
258+
int end= probe + 1;
259+
while (end < lineEnd && isIdentifierPart(document.getChar(end))) {
260+
end++;
261+
}
262+
return new Region(start, end - start);
263+
} catch (BadLocationException e) {
264+
return null;
265+
}
266+
}
267+
268+
private static boolean isIdentifierPart(char c) {
269+
return c == '_' || (c < 128 && (c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z'));
270+
}
271+
229272
/**
230273
* Returns the locale specific word break iterator.
231274
*

tests/org.eclipse.jface.text.tests/src/org/eclipse/jface/text/tests/DefaultTextDoubleClickStrategyTest.java

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,113 @@ public void testClickAtLineEnd() throws Exception {
5555
assertEquals("you", document.get(selection.getOffset(), selection.getLength()), "Unexpected selection");
5656
}
5757

58+
@Test
59+
public void testClickJustPastIdentifierSelectsThatIdentifier() throws Exception {
60+
String content= "foo bar baz";
61+
IDocument document= new Document(content);
62+
TestSpecificDefaultTextDoubleClickStrategy strategy= new TestSpecificDefaultTextDoubleClickStrategy();
63+
// Click at offset 3: the space right after "foo".
64+
IRegion selection= strategy.findWord(document, 3);
65+
assertNotNull(selection);
66+
assertEquals("foo", document.get(selection.getOffset(), selection.getLength()));
67+
}
68+
69+
@Test
70+
public void testClickAtIdentifierStartSelectsWholeIdentifier() throws Exception {
71+
String content= "foo __aaaa bar";
72+
IDocument document= new Document(content);
73+
TestSpecificDefaultTextDoubleClickStrategy strategy= new TestSpecificDefaultTextDoubleClickStrategy();
74+
// Click at offset 4: the first '_' starting "__aaaa".
75+
IRegion selection= strategy.findWord(document, 4);
76+
assertNotNull(selection);
77+
assertEquals("__aaaa", document.get(selection.getOffset(), selection.getLength()));
78+
}
79+
80+
@Test
81+
public void testIdentifierAtLineStartAndEnd() throws Exception {
82+
String content= "_foo___\nbar_baz";
83+
IDocument document= new Document(content);
84+
TestSpecificDefaultTextDoubleClickStrategy strategy= new TestSpecificDefaultTextDoubleClickStrategy();
85+
// First line: every offset 0..7 should yield "_foo___".
86+
for (int offset= 0; offset <= 7; offset++) {
87+
IRegion selection= strategy.findWord(document, offset);
88+
assertNotNull(selection, "no selection at offset " + offset);
89+
assertEquals("_foo___", document.get(selection.getOffset(), selection.getLength()),
90+
"unexpected selection at offset " + offset);
91+
}
92+
// Second line.
93+
IRegion selection= strategy.findWord(document, 11);
94+
assertNotNull(selection);
95+
assertEquals("bar_baz", document.get(selection.getOffset(), selection.getLength()));
96+
}
97+
98+
@Test
99+
public void testSingleLineDocument() throws Exception {
100+
String content= "abc";
101+
IDocument document= new Document(content);
102+
TestSpecificDefaultTextDoubleClickStrategy strategy= new TestSpecificDefaultTextDoubleClickStrategy();
103+
IRegion selection= strategy.findWord(document, 0);
104+
assertNotNull(selection);
105+
assertEquals("abc", document.get(selection.getOffset(), selection.getLength()));
106+
selection= strategy.findWord(document, document.getLength());
107+
assertNotNull(selection);
108+
assertEquals("abc", document.get(selection.getOffset(), selection.getLength()));
109+
}
110+
111+
@Test
112+
public void testIdentifierSurroundedByPunctuation() throws Exception {
113+
String content= "(foo_bar);";
114+
IDocument document= new Document(content);
115+
TestSpecificDefaultTextDoubleClickStrategy strategy= new TestSpecificDefaultTextDoubleClickStrategy();
116+
// Click in the middle of the identifier.
117+
IRegion selection= strategy.findWord(document, 4);
118+
assertNotNull(selection);
119+
assertEquals("foo_bar", document.get(selection.getOffset(), selection.getLength()));
120+
}
121+
122+
@Test
123+
public void testCjkWordSelection() throws Exception {
124+
// Japanese text without spaces. The word break iterator segments it into a
125+
// Hiragana run ("こんにちは") followed by a Kanji run
126+
// ("世界"). This segmentation is locale-independent, so double-click
127+
// selects the script run the click lands in rather than the whole line.
128+
String content= "こんにちは世界";
129+
IDocument document= new Document(content);
130+
TestSpecificDefaultTextDoubleClickStrategy strategy= new TestSpecificDefaultTextDoubleClickStrategy();
131+
// Click inside the Hiragana run.
132+
IRegion selection= strategy.findWord(document, 2);
133+
assertNotNull(selection);
134+
assertEquals("こんにちは", document.get(selection.getOffset(), selection.getLength()));
135+
// Click inside the Kanji run.
136+
selection= strategy.findWord(document, 6);
137+
assertNotNull(selection);
138+
assertEquals("世界", document.get(selection.getOffset(), selection.getLength()));
139+
}
140+
141+
@Test
142+
public void testCjkTokenBetweenSpaces() throws Exception {
143+
String content= "foo 我是 bar";
144+
IDocument document= new Document(content);
145+
TestSpecificDefaultTextDoubleClickStrategy strategy= new TestSpecificDefaultTextDoubleClickStrategy();
146+
// Click inside the CJK token.
147+
IRegion selection= strategy.findWord(document, 5);
148+
assertNotNull(selection);
149+
assertEquals("我是", document.get(selection.getOffset(), selection.getLength()));
150+
}
151+
152+
@Test
153+
public void testThaiTokenBetweenSpaces() throws Exception {
154+
// Dictionary-based segmentation of a contiguous Thai run only happens under a
155+
// Thai locale, so this test delimits the token with spaces to stay
156+
// locale-independent: double-click selects the whole Thai token.
157+
String content= "foo ไทย bar";
158+
IDocument document= new Document(content);
159+
TestSpecificDefaultTextDoubleClickStrategy strategy= new TestSpecificDefaultTextDoubleClickStrategy();
160+
IRegion selection= strategy.findWord(document, 5);
161+
assertNotNull(selection);
162+
assertEquals("ไทย", document.get(selection.getOffset(), selection.getLength()));
163+
}
164+
58165
private static final class TestSpecificDefaultTextDoubleClickStrategy extends DefaultTextDoubleClickStrategy {
59166

60167
@Override

0 commit comments

Comments
 (0)