Skip to content

Commit 2b03fe7

Browse files
author
chengyitian
committed
AJ-865: optimize code for 'getUTFlength', use 'utf-8' to cal length;
1 parent 6582b41 commit 2b03fe7

1 file changed

Lines changed: 15 additions & 4 deletions

File tree

src/com/xxdb/io/AbstractExtendedDataOutputStream.java

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -99,15 +99,26 @@ public void writeBlob(byte[] value) throws IOException {
9999

100100
public static int getUTFlength(String value, int start, int sum) throws IOException {
101101
int len = value.length();
102-
for (int i = start; i < len; ++i){
102+
for (int i = start; i < len; ++i) {
103103
char c = value.charAt(i);
104-
if (c >= '\u0001' && c <= '\u007f')
104+
if (Character.isHighSurrogate(c) && i + 1 < len && Character.isLowSurrogate(value.charAt(i + 1))) {
105+
// Check if this is a high surrogate of a surrogate pair.
106+
// Characters represented by surrogate pairs take 4 bytes in UTF-8.
107+
sum += 4;
108+
// Skip the low surrogate character.
109+
i++;
110+
} else if (c <= '\u007F') {
111+
// ASCII characters (including null character) take 1 byte.
105112
sum += 1;
106-
else if (c == '\u0000' || (c >= '\u0080' && c <= '\u07ff'))
113+
} else if (c <= '\u07FF') {
114+
// Two-byte character range.
107115
sum += 2;
108-
else
116+
} else {
117+
// Three-byte character range (0x0800-0xFFFF, excluding surrogate range).
109118
sum += 3;
119+
}
110120
}
121+
111122
return sum;
112123
}
113124

0 commit comments

Comments
 (0)