Skip to content

Commit 98242a5

Browse files
committed
Optimize WireHeaderReader
1 parent 338e2ad commit 98242a5

2 files changed

Lines changed: 193 additions & 23 deletions

File tree

libs/vespera-bridge/src/main/java/com/devfive/vespera/bridge/WireHeaderReader.java

Lines changed: 176 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,11 @@ static void apply(
5252
int status = 500;
5353
if (r.peek() == '{') {
5454
r.beginObject();
55-
String name;
56-
while ((name = r.nextKey()) != null) {
57-
switch (name) {
58-
case "status" -> status = r.readInt();
59-
case "headers" -> {
55+
int key;
56+
while ((key = r.nextRootKey()) != KEY_END) {
57+
switch (key) {
58+
case KEY_STATUS -> status = r.readInt();
59+
case KEY_HEADERS -> {
6060
if (r.isObjectStart()) {
6161
r.beginObject();
6262
String k;
@@ -74,6 +74,8 @@ static void apply(
7474
r.skipValue();
7575
}
7676
}
77+
// KEY_OTHER: "v", "metadata", "validation_errors", … —
78+
// matched by bytes, value skipped, never materialised.
7779
default -> r.skipValue();
7880
}
7981
}
@@ -135,6 +137,97 @@ String nextKey() {
135137
return key;
136138
}
137139

140+
// Root-member-key codes for the allocation-free root-key matcher used
141+
// by apply(): the only root keys the reader acts on are "status" and
142+
// "headers"; every other key ("v", "metadata", "validation_errors", …)
143+
// is matched by length+bytes and its value skipped — never materialised
144+
// as a String.
145+
private static final int KEY_END = -2;
146+
private static final int KEY_OTHER = -1;
147+
private static final int KEY_STATUS = 0;
148+
private static final int KEY_HEADERS = 1;
149+
150+
/**
151+
* Advance past the next root member key WITHOUT allocating a String for
152+
* it, returning a {@code KEY_*} code ({@code KEY_END} at object end).
153+
* The allocation-free counterpart of {@link #nextKey()} for the fixed
154+
* root schema; header keys (delivered to the sink) still use
155+
* {@link #nextKey()}.
156+
*/
157+
int nextRootKey() {
158+
skipWs();
159+
int c = cur();
160+
if (c == ',') {
161+
pos++;
162+
skipWs();
163+
c = cur();
164+
}
165+
if (c == '}') {
166+
pos++;
167+
return KEY_END;
168+
}
169+
int code = matchRootKey();
170+
expect(':');
171+
return code;
172+
}
173+
174+
/**
175+
* Consume a quoted root key, returning {@code KEY_STATUS} /
176+
* {@code KEY_HEADERS} when its bytes equal those literals, else
177+
* {@code KEY_OTHER} — all without allocating. An escaped key (never
178+
* emitted for the fixed root field names) is consumed and reported as
179+
* {@code KEY_OTHER}.
180+
*/
181+
private int matchRootKey() {
182+
skipWs();
183+
if (cur() != '"') {
184+
throw err("expected string");
185+
}
186+
pos++;
187+
int start = pos;
188+
boolean simple = true;
189+
while (pos < end) {
190+
int b = buf.get(pos) & 0xFF;
191+
if (b == '"') {
192+
break;
193+
}
194+
if (b == '\\') {
195+
simple = false;
196+
pos++;
197+
if (pos < end) {
198+
pos++;
199+
}
200+
continue;
201+
}
202+
pos++;
203+
}
204+
if (pos >= end) {
205+
throw err("unterminated string");
206+
}
207+
int contentLen = pos - start;
208+
pos++; // consume closing quote
209+
if (!simple) {
210+
return KEY_OTHER;
211+
}
212+
if (contentLen == 6 && regionEquals(start, "status")) {
213+
return KEY_STATUS;
214+
}
215+
if (contentLen == 7 && regionEquals(start, "headers")) {
216+
return KEY_HEADERS;
217+
}
218+
return KEY_OTHER;
219+
}
220+
221+
/** Whether {@code buf[s .. s+lit.length())} equals the ASCII literal. */
222+
private boolean regionEquals(int s, String lit) {
223+
for (int i = 0; i < lit.length(); i++) {
224+
if ((buf.get(s + i) & 0xFF) != lit.charAt(i)) {
225+
return false;
226+
}
227+
}
228+
return true;
229+
}
230+
138231
void beginArray() {
139232
expect('[');
140233
}
@@ -175,10 +268,26 @@ String readString() {
175268
// decode loop below.
176269
int simpleLen = simpleAsciiRun();
177270
if (simpleLen >= 0) {
178-
byte[] tmp = new byte[simpleLen];
179-
buf.get(pos, tmp, 0, simpleLen); // absolute bulk get (Java 13+); position untouched
271+
String s;
272+
if (buf.hasArray()) {
273+
// Heap-backed buffer (ByteBuffer.wrap on the SYNC / streaming
274+
// / async paths): build the String straight from the backing
275+
// array — one copy, no intermediate byte[]. Direct buffers
276+
// (the DIRECT dispatch path) have no accessible array and keep
277+
// the absolute bulk-get copy below.
278+
s =
279+
new String(
280+
buf.array(),
281+
buf.arrayOffset() + pos,
282+
simpleLen,
283+
java.nio.charset.StandardCharsets.US_ASCII);
284+
} else {
285+
byte[] tmp = new byte[simpleLen];
286+
buf.get(pos, tmp, 0, simpleLen); // absolute bulk get (Java 13+); position untouched
287+
s = new String(tmp, java.nio.charset.StandardCharsets.US_ASCII);
288+
}
180289
pos += simpleLen + 1; // consume the run + the closing quote
181-
return new String(tmp, java.nio.charset.StandardCharsets.US_ASCII);
290+
return s;
182291
}
183292
StringBuilder sb = new StringBuilder();
184293
while (pos < end) {
@@ -313,19 +422,8 @@ private void skipNumberTail() {
313422
void skipValue() {
314423
int c = peek();
315424
switch (c) {
316-
case '{' -> {
317-
beginObject();
318-
while (nextKey() != null) {
319-
skipValue();
320-
}
321-
}
322-
case '[' -> {
323-
beginArray();
324-
while (hasNextElement()) {
325-
skipValue();
326-
}
327-
}
328-
case '"' -> readString();
425+
case '"' -> skipStringRaw();
426+
case '{', '[' -> skipContainerRaw();
329427
case 't', 'f', 'n' -> skipLiteral();
330428
default -> {
331429
if (c == '-' || (c >= '0' && c <= '9')) {
@@ -337,6 +435,63 @@ void skipValue() {
337435
}
338436
}
339437

438+
/**
439+
* Consume a JSON string token (pos at the opening quote) without
440+
* allocating — the skip path never needs the decoded text, so unlike
441+
* {@link #readString()} it builds no {@code String}.
442+
*/
443+
private void skipStringRaw() {
444+
pos++; // opening quote (peek() guarantees cur() == '"')
445+
while (pos < end) {
446+
int b = buf.get(pos++) & 0xFF;
447+
if (b == '"') {
448+
return;
449+
}
450+
if (b == '\\' && pos < end) {
451+
pos++; // skip the escaped char (so \" is not seen as the close)
452+
}
453+
}
454+
throw err("unterminated string");
455+
}
456+
457+
/**
458+
* Consume a balanced {@code {...}} / {@code [...]} (pos at the opening
459+
* bracket), string-literal aware, without allocating — replaces the
460+
* prior recursive skip that materialised every nested key and value of
461+
* skipped fields ({@code metadata}, {@code validation_errors}, …).
462+
*/
463+
private void skipContainerRaw() {
464+
int depth = 0;
465+
while (pos < end) {
466+
int b = buf.get(pos++) & 0xFF;
467+
switch (b) {
468+
case '"' -> {
469+
// Skip a nested string so its braces/brackets don't count.
470+
while (pos < end) {
471+
int x = buf.get(pos++) & 0xFF;
472+
if (x == '"') {
473+
break;
474+
}
475+
if (x == '\\' && pos < end) {
476+
pos++;
477+
}
478+
}
479+
}
480+
case '{', '[' -> depth++;
481+
case '}', ']' -> {
482+
depth--;
483+
if (depth == 0) {
484+
return;
485+
}
486+
}
487+
default -> {
488+
// ordinary byte inside the container — skip
489+
}
490+
}
491+
}
492+
throw err("unterminated container");
493+
}
494+
340495
private void skipLiteral() {
341496
while (pos < end) {
342497
int d = buf.get(pos) & 0xFF;

libs/vespera-bridge/src/test/java/com/devfive/vespera/bridge/WireHeaderReaderTest.java

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,25 @@ class WireHeaderReaderTest {
1313

1414
private record Captured(int status, List<String> headers) {}
1515

16-
/** Parse {@code headerJson} from a direct buffer laid out as the wire is. */
16+
/**
17+
* Parse {@code headerJson} through BOTH a direct buffer (the DIRECT
18+
* dispatch path, no backing array) and a heap buffer (the SYNC /
19+
* streaming / async {@code ByteBuffer.wrap} paths, which hit
20+
* {@code readString}'s backing-array fast path), asserting the two
21+
* agree. Returns the (identical) result.
22+
*/
1723
private static Captured run(String headerJson) {
24+
Captured direct = runWith(headerJson, true);
25+
Captured heap = runWith(headerJson, false);
26+
assertEquals(direct.status(), heap.status(), "direct vs heap status mismatch");
27+
assertEquals(direct.headers(), heap.headers(), "direct vs heap headers mismatch");
28+
return direct;
29+
}
30+
31+
private static Captured runWith(String headerJson, boolean direct) {
1832
byte[] hb = headerJson.getBytes(StandardCharsets.UTF_8);
19-
ByteBuffer buf = ByteBuffer.allocateDirect(4 + hb.length);
33+
ByteBuffer buf =
34+
direct ? ByteBuffer.allocateDirect(4 + hb.length) : ByteBuffer.allocate(4 + hb.length);
2035
buf.putInt(hb.length);
2136
buf.put(hb);
2237
int[] status = {-1};

0 commit comments

Comments
 (0)