|
27 | 27 | import io.questdb.client.std.MemoryTag; |
28 | 28 | import io.questdb.client.std.QuietCloseable; |
29 | 29 | import io.questdb.client.std.Unsafe; |
| 30 | +import io.questdb.client.std.str.Utf8s; |
30 | 31 |
|
31 | 32 | /** |
32 | 33 | * A simple native memory buffer writer for encoding QWP v1 messages. |
@@ -61,24 +62,7 @@ public NativeBufferWriter(int initialCapacity) { |
61 | 62 | * @return the number of bytes needed to encode the string as UTF-8 |
62 | 63 | */ |
63 | 64 | public static int utf8Length(String s) { |
64 | | - if (s == null) return 0; |
65 | | - int len = 0; |
66 | | - for (int i = 0, n = s.length(); i < n; i++) { |
67 | | - char c = s.charAt(i); |
68 | | - if (c < 0x80) { |
69 | | - len++; |
70 | | - } else if (c < 0x800) { |
71 | | - len += 2; |
72 | | - } else if (c >= 0xD800 && c <= 0xDBFF && i + 1 < n && Character.isLowSurrogate(s.charAt(i + 1))) { |
73 | | - i++; |
74 | | - len += 4; |
75 | | - } else if (Character.isSurrogate(c)) { |
76 | | - len++; |
77 | | - } else { |
78 | | - len += 3; |
79 | | - } |
80 | | - } |
81 | | - return len; |
| 65 | + return s == null ? 0 : Utf8s.utf8Bytes(s); |
82 | 66 | } |
83 | 67 |
|
84 | 68 | /** |
@@ -274,7 +258,7 @@ public void putString(String value) { |
274 | 258 | int utf8Len = utf8Length(value); |
275 | 259 | putVarint(utf8Len); |
276 | 260 | ensureCapacity(utf8Len); |
277 | | - encodeUtf8(value); |
| 261 | + encodeUtf8(value, utf8Len); |
278 | 262 | } |
279 | 263 | } |
280 | 264 |
|
@@ -305,10 +289,9 @@ public void putUtf8(String value) { |
305 | 289 | // All ASCII — done in a single pass |
306 | 290 | position += charLen; |
307 | 291 | } else { |
308 | | - // Non-ASCII — fall back to two-pass (re-encodes from start) |
309 | | - int utf8Len = utf8Length(value); |
310 | | - ensureCapacity(utf8Len); |
311 | | - encodeUtf8(value); |
| 292 | + int utf8Len = Utf8s.utf8Bytes(value, i, charLen); |
| 293 | + ensureCapacity(i + utf8Len); |
| 294 | + position += i + Utf8s.strCpyUtf8(value, i, bufferPtr + position + i, utf8Len); |
312 | 295 | } |
313 | 296 | } |
314 | 297 |
|
@@ -355,35 +338,7 @@ private static void writeVarintDirect(long addr, long value) { |
355 | 338 | Unsafe.getUnsafe().putByte(addr, (byte) value); |
356 | 339 | } |
357 | 340 |
|
358 | | - private void encodeUtf8(String value) { |
359 | | - long addr = bufferPtr + position; |
360 | | - for (int i = 0, n = value.length(); i < n; i++) { |
361 | | - char c = value.charAt(i); |
362 | | - if (c < 0x80) { |
363 | | - Unsafe.getUnsafe().putByte(addr++, (byte) c); |
364 | | - } else if (c < 0x800) { |
365 | | - Unsafe.getUnsafe().putByte(addr++, (byte) (0xC0 | (c >> 6))); |
366 | | - Unsafe.getUnsafe().putByte(addr++, (byte) (0x80 | (c & 0x3F))); |
367 | | - } else if (c >= 0xD800 && c <= 0xDBFF && i + 1 < n) { |
368 | | - char c2 = value.charAt(++i); |
369 | | - if (Character.isLowSurrogate(c2)) { |
370 | | - int codePoint = 0x10000 + ((c - 0xD800) << 10) + (c2 - 0xDC00); |
371 | | - Unsafe.getUnsafe().putByte(addr++, (byte) (0xF0 | (codePoint >> 18))); |
372 | | - Unsafe.getUnsafe().putByte(addr++, (byte) (0x80 | ((codePoint >> 12) & 0x3F))); |
373 | | - Unsafe.getUnsafe().putByte(addr++, (byte) (0x80 | ((codePoint >> 6) & 0x3F))); |
374 | | - Unsafe.getUnsafe().putByte(addr++, (byte) (0x80 | (codePoint & 0x3F))); |
375 | | - } else { |
376 | | - Unsafe.getUnsafe().putByte(addr++, (byte) '?'); |
377 | | - i--; |
378 | | - } |
379 | | - } else if (Character.isSurrogate(c)) { |
380 | | - Unsafe.getUnsafe().putByte(addr++, (byte) '?'); |
381 | | - } else { |
382 | | - Unsafe.getUnsafe().putByte(addr++, (byte) (0xE0 | (c >> 12))); |
383 | | - Unsafe.getUnsafe().putByte(addr++, (byte) (0x80 | ((c >> 6) & 0x3F))); |
384 | | - Unsafe.getUnsafe().putByte(addr++, (byte) (0x80 | (c & 0x3F))); |
385 | | - } |
386 | | - } |
387 | | - position = (int) (addr - bufferPtr); |
| 341 | + private void encodeUtf8(String value, int utf8Len) { |
| 342 | + position += Utf8s.strCpyUtf8(value, bufferPtr + position, utf8Len); |
388 | 343 | } |
389 | 344 | } |
0 commit comments