Skip to content

Commit 132d676

Browse files
committed
Add bench
1 parent 98242a5 commit 132d676

2 files changed

Lines changed: 107 additions & 4 deletions

File tree

crates/vespera_inprocess/benches/dispatch.rs

Lines changed: 95 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,9 @@ use futures_util::FutureExt;
3737
use serde::{Deserialize, Serialize};
3838
use tokio::runtime::Runtime;
3939
use vespera_inprocess::{
40-
RequestChunk, RequestEnvelope, dispatch_bidirectional_streaming, dispatch_from_bytes,
41-
dispatch_owned, dispatch_streaming_async, dispatch_typed, register_app,
40+
DirectWriteResult, RequestChunk, RequestEnvelope, dispatch_bidirectional_streaming,
41+
dispatch_from_bytes, dispatch_into, dispatch_owned, dispatch_streaming_async, dispatch_typed,
42+
register_app,
4243
};
4344

4445
// ── Test fixtures ────────────────────────────────────────────────────
@@ -277,6 +278,96 @@ fn bench_wire_path(c: &mut Criterion) {
277278
drop(runtime);
278279
}
279280

281+
/// Raw-byte isolation: `dispatch_from_bytes` against `/echo/bytes`,
282+
/// which echoes the request body unchanged. Comparing this group with
283+
/// `wire_path` (JSON `/echo`) isolates the `serde_json`
284+
/// deserialize+reserialize cost from vespera's pure dispatch/copy
285+
/// overhead at identical body sizes.
286+
fn bench_bytes_path(c: &mut Criterion) {
287+
install_bench_app();
288+
289+
let runtime = Runtime::new().expect("tokio runtime");
290+
let mut group = c.benchmark_group("bytes_path");
291+
292+
for &body_kb in &[1_usize, 64, 1024] {
293+
let payload = vec![0xA5u8; body_kb * 1024];
294+
let wire = assemble_wire(
295+
"POST",
296+
"/echo/bytes",
297+
Some("application/octet-stream"),
298+
&payload,
299+
);
300+
group.throughput(Throughput::Bytes((body_kb * 1024) as u64));
301+
302+
group.bench_with_input(
303+
BenchmarkId::new("raw_bytes_dispatch_from_bytes", body_kb),
304+
&body_kb,
305+
|b, _| {
306+
b.iter(|| dispatch_from_bytes(wire.clone(), &runtime));
307+
},
308+
);
309+
}
310+
311+
group.finish();
312+
drop(runtime);
313+
}
314+
315+
/// Direct-write A/B: `dispatch_from_bytes` (materialises the wire
316+
/// response into a fresh `Vec` per call) vs `dispatch_into` (streams
317+
/// the wire response straight into a caller-owned, preallocated buffer
318+
/// — the JNI `dispatchDirect` path). Both echo a raw byte body via
319+
/// `/echo/bytes`, so the delta isolates the response `Vec` allocation +
320+
/// final body memcpy that the direct-write path removes.
321+
///
322+
/// The `dispatch_into` buffer is sized exactly once (outside the timed
323+
/// loop) and reused across iterations, mirroring the pooled direct
324+
/// buffer the Java bridge hands in.
325+
fn bench_direct_write_path(c: &mut Criterion) {
326+
install_bench_app();
327+
328+
let runtime = Runtime::new().expect("tokio runtime");
329+
let mut group = c.benchmark_group("direct_write_path");
330+
331+
for &body_kb in &[64_usize, 1024, 4096] {
332+
let payload = vec![0xA5u8; body_kb * 1024];
333+
let wire = assemble_wire(
334+
"POST",
335+
"/echo/bytes",
336+
Some("application/octet-stream"),
337+
&payload,
338+
);
339+
group.throughput(Throughput::Bytes((body_kb * 1024) as u64));
340+
341+
// Exact response size: one untimed probe with a generous buffer.
342+
let required = {
343+
let mut probe = vec![0u8; payload.len() + 4096];
344+
match dispatch_into(wire.clone(), &mut probe, &runtime) {
345+
DirectWriteResult::Complete(n) | DirectWriteResult::Overflow(n) => n,
346+
}
347+
};
348+
349+
group.bench_with_input(
350+
BenchmarkId::new("materialize_dispatch_from_bytes", body_kb),
351+
&body_kb,
352+
|b, _| {
353+
b.iter(|| dispatch_from_bytes(wire.clone(), &runtime));
354+
},
355+
);
356+
357+
group.bench_with_input(
358+
BenchmarkId::new("direct_write_dispatch_into", body_kb),
359+
&body_kb,
360+
|b, _| {
361+
let mut out = vec![0u8; required];
362+
b.iter(|| dispatch_into(wire.clone(), &mut out, &runtime));
363+
},
364+
);
365+
}
366+
367+
group.finish();
368+
drop(runtime);
369+
}
370+
280371
/// P2 isolation (within-run A/B): default-app resolution via the
281372
/// lock-free `OnceLock` fast path vs named-app resolution through the
282373
/// `RwLock<HashMap>` slow path. Identical router, identical wire
@@ -547,6 +638,8 @@ criterion_group!(
547638
bench_router_path,
548639
bench_dispatch_path,
549640
bench_wire_path,
641+
bench_bytes_path,
642+
bench_direct_write_path,
550643
bench_resolve_path,
551644
bench_contended_path,
552645
bench_headers_path,

libs/vespera-bridge/src/main/java/com/devfive/vespera/bridge/VesperaBridge.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1043,7 +1043,12 @@ public static DecodedResponse decodeResponse(byte[] wire) {
10431043
// the readTree path, unknown fields (incl. "v") are skipChildren'd.
10441044
int status = 500;
10451045
Map<String, Object> headers = null;
1046-
Map<String, String> metadata = new LinkedHashMap<>();
1046+
// Pre-size to the actual occupancy: the wire metadata object
1047+
// carries only a handful of keys (typically just "version"), so a
1048+
// capacity-4 table (Node[4]) is allocated instead of the default
1049+
// capacity-16 (Node[16]) on the first put — a deterministic
1050+
// per-response heap saving with no behavioural change.
1051+
Map<String, String> metadata = new LinkedHashMap<>(4);
10471052
List<Map<String, Object>> validationErrors = null;
10481053
try (JsonParser p = JSON_FACTORY.createParser(wire, 4, headerLen)) {
10491054
if (p.nextToken() == JsonToken.START_OBJECT) {
@@ -1056,7 +1061,12 @@ public static DecodedResponse decodeResponse(byte[] wire) {
10561061
if (t != JsonToken.START_OBJECT) { p.skipChildren(); break; }
10571062
while (p.nextToken() == JsonToken.FIELD_NAME) {
10581063
String k = p.currentName();
1059-
if (headers == null) headers = new LinkedHashMap<>();
1064+
// Pre-size for a typical response header count
1065+
// (content-type, content-length, a few more):
1066+
// capacity-8 table holds up to 6 entries before
1067+
// resizing, vs the default capacity-16 — a
1068+
// deterministic per-response heap saving.
1069+
if (headers == null) headers = new LinkedHashMap<>(8);
10601070
if (p.nextToken() == JsonToken.START_ARRAY) {
10611071
List<String> list = new ArrayList<>();
10621072
while (p.nextToken() != JsonToken.END_ARRAY) list.add(p.getValueAsString());

0 commit comments

Comments
 (0)