Skip to content

Commit 331e644

Browse files
fix(serialization): pass the official protobuf conformance suite (binary)
Ran the official protobuf conformance_test_runner (v29.3) against a testee backed by our lib. It surfaced real bugs, now fixed: - parser: negative numbers (e.g. negative enum values) split as `-` + digits were rejected; handle a leading sign in numbers and option values. Extension ranges / `extend` blocks (valid in editions) are now skipped, not rejected. - reader: `this.pos += this.uint32()` captured pos *before* uint32 advanced it, so unknown length-delimited fields were under-skipped by one byte and desynced the stream. Skip group wire types (3/4). Bounds-check every read and reject overlong (>10-byte) varints and truncated input (premature EOF) — previously malformed input was silently accepted. - decode: repeated occurrences of a singular message field now MERGE (proto spec) instead of overwriting; reject field number 0; map message values default to an empty message when the entry omits the value. Result: 2060 binary successes, 0 failures across proto3 + edition 2023 (JSON/JSPB/text-format/proto2 are skipped — out of scope). Added unit regressions for each fix; conformance-testee.mjs committed as the harness (needs PB_SRC pointing at a protobuf checkout). bench/protobuf-ours.mjs compares our decode/encode vs protobuf-es.
1 parent c59f1b4 commit 331e644

8 files changed

Lines changed: 359 additions & 28 deletions

File tree

CHANGELOG.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,10 @@ pre-`0.1.0` and the public API is unstable.
1515
`schema.parse(messageName, bytes)` / `schema.build(messageName, value)` decode
1616
and encode the binary wire format. Decoded objects use camelCase keys, BigInt
1717
for 64-bit ints, enum value-names, and `Uint8Array` for bytes; unknown fields
18-
are preserved across re-encode. Byte-for-byte verified against protobuf-es.
18+
are preserved across re-encode. Passes the **official protobuf conformance
19+
suite** for binary wire format (2060 successes, 0 failures across proto3 +
20+
edition 2023; JSON/text-format/proto2 out of scope); byte-for-byte verified
21+
against protobuf-es.
1922

2023
### Changed
2124

bench/protobuf-ours.mjs

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
// Decode throughput: our reflective Protobuf (runtime:serialization) vs the
2+
// reference protobuf-es, on identical bytes. Runs in esrun (has runtime:
3+
// serialization); protobuf-es also runs for an in-engine baseline.
4+
import { create, toBinary, fromBinary } from "@bufbuild/protobuf";
5+
import { CatalogSchema } from "./gen/catalog_pb.js";
6+
7+
const { Protobuf } = await import("runtime:serialization");
8+
const proto = `
9+
syntax = "proto3";
10+
package test;
11+
message Book {
12+
string id = 1; string author = 2; string title = 3; string genre = 4;
13+
double price = 5; string publish_date = 6; string description = 7;
14+
}
15+
message Catalog { repeated Book catalog = 1; }
16+
`;
17+
const ours = new Protobuf.Schema(proto);
18+
19+
const obj = { catalog: [] };
20+
for (let i = 0; i < 50000; i++) {
21+
obj.catalog.push({
22+
id: `bk${i}`, author: "Gambardella, Matthew", title: "XML Developer's Guide",
23+
genre: "Computer", price: 44.95, publishDate: "2000-10-01",
24+
description: "An in-depth look at creating applications with XML.",
25+
});
26+
}
27+
const bytes = toBinary(CatalogSchema, create(CatalogSchema, obj));
28+
console.log(`payload: ${bytes.length} bytes, 50000 books`);
29+
30+
function bench(name, fn, iters = 30) {
31+
for (let i = 0; i < 5; i++) fn();
32+
const t0 = performance.now();
33+
for (let i = 0; i < iters; i++) fn();
34+
const t1 = performance.now();
35+
console.log(`${name.padEnd(28)}: ${((t1 - t0) / iters).toFixed(1)} ms/op`);
36+
}
37+
38+
// correctness sanity
39+
const a = ours.parse("test.Catalog", bytes);
40+
console.log(`ours decoded ${a.catalog.length} books, first.price=${a.catalog[0].price}`);
41+
42+
const esMsg = create(CatalogSchema, obj);
43+
bench("protobuf-es decode", () => fromBinary(CatalogSchema, bytes));
44+
bench("ours decode (parse)", () => ours.parse("test.Catalog", bytes));
45+
bench("protobuf-es encode", () => toBinary(CatalogSchema, esMsg));
46+
bench("ours encode (build)", () => ours.build("test.Catalog", obj));
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
// Conformance testee: speaks the protobuf conformance wire protocol on
2+
// stdin/stdout (4-byte LE length prefix + ConformanceRequest/Response), backed
3+
// by our reflective Protobuf lib. Run by the official conformance_test_runner.
4+
//
5+
// PB_SRC=/path/to/protobuf bun conformance-testee.mjs
6+
//
7+
// We implement binary<->binary for proto3 + edition 2023. JSON / JSPB /
8+
// TEXT_FORMAT and proto2 message types are reported as `skipped`.
9+
import { readFileSync } from "node:fs";
10+
import { Schema } from "./serialization/protobuf/schema.ts";
11+
12+
const P = process.env.PB_SRC;
13+
const read = (rel) => readFileSync(`${P}/${rel}`, "utf8");
14+
15+
const conf = new Schema(read("conformance/conformance.proto"));
16+
const p3 = new Schema({ "google/protobuf/test_messages_proto3.proto": read("src/google/protobuf/test_messages_proto3.proto") });
17+
const ed = new Schema({ "x.proto": read("conformance/test_protos/test_messages_edition2023.proto") });
18+
const edP3 = new Schema({ "x.proto": read("editions/golden/test_messages_proto3_editions.proto") });
19+
const edP2 = new Schema({ "x.proto": read("editions/golden/test_messages_proto2_editions.proto") });
20+
21+
const REQ = "conformance.ConformanceRequest";
22+
const RESP = "conformance.ConformanceResponse";
23+
24+
function schemaFor(messageType) {
25+
// Most-specific package prefix first.
26+
if (messageType.startsWith("protobuf_test_messages.editions.proto3.")) return edP3;
27+
if (messageType.startsWith("protobuf_test_messages.editions.proto2.")) return edP2;
28+
if (messageType.startsWith("protobuf_test_messages.editions.")) return ed;
29+
if (messageType.startsWith("protobuf_test_messages.proto3.")) return p3;
30+
return null; // proto2 syntax (or unknown) — unsupported
31+
}
32+
33+
function handle(reqBytes) {
34+
const req = conf.parse(REQ, reqBytes);
35+
const mt = req.messageType ?? "";
36+
37+
if (mt === "conformance.FailureSet") {
38+
return conf.build(RESP, { protobufPayload: conf.build("conformance.FailureSet", {}) });
39+
}
40+
41+
// We only do binary <-> binary.
42+
const out = req.requestedOutputFormat; // enum name
43+
if (req.jsonPayload !== undefined || req.jspbPayload !== undefined || req.textPayload !== undefined) {
44+
return conf.build(RESP, { skipped: "non-protobuf input unsupported" });
45+
}
46+
if (out !== "PROTOBUF") {
47+
return conf.build(RESP, { skipped: `output ${out} unsupported` });
48+
}
49+
50+
const schema = schemaFor(mt);
51+
if (!schema) return conf.build(RESP, { skipped: "proto2/unknown message type unsupported" });
52+
53+
try {
54+
const msg = schema.parse(mt, req.protobufPayload ?? new Uint8Array(0));
55+
try {
56+
const bytes = schema.build(mt, msg);
57+
return conf.build(RESP, { protobufPayload: bytes });
58+
} catch (e) {
59+
return conf.build(RESP, { serializeError: String(e?.message ?? e) });
60+
}
61+
} catch (e) {
62+
const msg = String(e?.message ?? e);
63+
if (msg.includes("unknown message")) return conf.build(RESP, { skipped: msg });
64+
return conf.build(RESP, { parseError: msg });
65+
}
66+
}
67+
68+
// --- framed stdin/stdout loop ---
69+
const reader = Bun.stdin.stream().getReader();
70+
const sink = Bun.stdout.writer();
71+
let buf = new Uint8Array(0);
72+
73+
function concat(a, b) {
74+
const out = new Uint8Array(a.length + b.length);
75+
out.set(a); out.set(b, a.length);
76+
return out;
77+
}
78+
async function readExact(n) {
79+
while (buf.length < n) {
80+
const { done, value } = await reader.read();
81+
if (done) return null;
82+
buf = concat(buf, value);
83+
}
84+
const out = buf.subarray(0, n);
85+
buf = buf.slice(n);
86+
return out;
87+
}
88+
89+
for (;;) {
90+
const lenBuf = await readExact(4);
91+
if (!lenBuf) break;
92+
const len = new DataView(lenBuf.buffer, lenBuf.byteOffset, 4).getUint32(0, true);
93+
const reqBytes = await readExact(len);
94+
if (!reqBytes) break;
95+
96+
let resp;
97+
try {
98+
resp = handle(reqBytes.slice());
99+
} catch (e) {
100+
resp = conf.build(RESP, { runtimeError: String(e?.message ?? e) });
101+
}
102+
103+
const header = new Uint8Array(4);
104+
new DataView(header.buffer).setUint32(0, resp.length, true);
105+
sink.write(header);
106+
sink.write(resp);
107+
sink.flush();
108+
}

crates/runtime/js/serialization/protobuf/decode.ts

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,14 @@ function readSingle(r: Reader, type: FieldType): unknown {
5050
return decode(type.message, r.fork());
5151
}
5252

53-
export function decode(message: MessageType, r: Reader): Record<string, unknown> {
54-
const out: Record<string, unknown> = {};
53+
export function decode(message: MessageType, r: Reader, target?: Record<string, unknown>): Record<string, unknown> {
54+
const out: Record<string, unknown> = target ?? {};
5555
while (!r.eof()) {
5656
const tagStart = r.pos;
5757
const tag = r.uint32();
5858
const fieldNo = tag >>> 3;
5959
const wire = tag & 7;
60+
if (fieldNo === 0) throw new Error("protobuf: invalid field number 0");
6061
const field: Field | undefined = message.fieldByNumber.get(fieldNo);
6162

6263
if (!field) {
@@ -107,14 +108,20 @@ export function decode(message: MessageType, r: Reader): Record<string, unknown>
107108
);
108109
continue;
109110
}
110-
const value = readSingle(r, field.type);
111111
if (field.oneofIndex >= 0) {
112112
for (const num of message.oneofs[field.oneofIndex]!.fieldNumbers) {
113113
const other = message.fieldByNumber.get(num)!;
114114
if (other.jsonName !== field.jsonName) delete out[other.jsonName];
115115
}
116116
}
117-
out[field.jsonName] = value;
117+
if (field.type.kind === "message") {
118+
// Repeated occurrences of a singular message field merge (proto spec).
119+
const existing = out[field.jsonName];
120+
const into = existing && typeof existing === "object" ? (existing as Record<string, unknown>) : undefined;
121+
out[field.jsonName] = decode(field.type.message, r.fork(), into);
122+
} else {
123+
out[field.jsonName] = readSingle(r, field.type);
124+
}
118125
}
119126
return out;
120127
}
@@ -130,5 +137,5 @@ function defaultForType(type: FieldType): unknown {
130137
}
131138
}
132139
if (type.kind === "enum") return type.enum.byNumber.get(0) ?? 0;
133-
return undefined;
140+
return {}; // message: default to an empty message (e.g. map<…, Message> missing value)
134141
}

crates/runtime/js/serialization/protobuf/parser.ts

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -169,8 +169,9 @@ class Parser {
169169
this.skipBlock();
170170
break;
171171
case "extend":
172-
this.err("proto2 extensions (extend) are unsupported", t.line);
173-
// eslint-disable-next-line no-fallthrough
172+
this.lx.next();
173+
this.skipBlock();
174+
break;
174175
default:
175176
this.err(`unexpected '${t.value}' at top level`, t.line);
176177
}
@@ -231,8 +232,15 @@ class Parser {
231232
this.err("proto2 groups are unsupported", t.line);
232233
// eslint-disable-next-line no-fallthrough
233234
case "extensions":
235+
// extension *range* declaration (valid in editions too) — ignore.
236+
this.lx.next();
237+
this.skipToSemicolon();
238+
continue;
234239
case "extend":
235-
this.err("proto2 extensions are unsupported", t.line);
240+
// extension field definitions — skipped; such fields decode as unknown.
241+
this.lx.next();
242+
this.skipBlock();
243+
continue;
236244
}
237245
}
238246
// otherwise a field
@@ -340,6 +348,11 @@ class Parser {
340348
const t = this.lx.next();
341349
if (t.kind === "str") return t.value;
342350
if (t.kind === "ident" || t.kind === "num") return t.value;
351+
if (t.kind === "sym" && (t.value === "-" || t.value === "+")) {
352+
const num = this.lx.next();
353+
if (num.kind !== "num") this.err(`bad option value '${t.value}${num.value}'`, t.line);
354+
return (t.value === "-" ? "-" : "") + num.value;
355+
}
343356
if (t.kind === "sym" && t.value === "{") {
344357
// aggregate option value — skip to matching brace
345358
let depth = 1;
@@ -408,13 +421,19 @@ class Parser {
408421
}
409422

410423
private parseInt32(): number {
424+
let sign = 1;
425+
const s = this.lx.peek();
426+
if (s.kind === "sym" && (s.value === "-" || s.value === "+")) {
427+
this.lx.next();
428+
sign = s.value === "-" ? -1 : 1;
429+
}
411430
const t = this.lx.next();
412431
if (t.kind !== "num") this.err(`expected number, got '${t.value}'`, t.line);
413432
const n = t.value.startsWith("0x") || t.value.startsWith("0X")
414433
? parseInt(t.value, 16)
415434
: parseInt(t.value, 10);
416435
if (!Number.isFinite(n)) this.err(`bad number '${t.value}'`, t.line);
417-
return n;
436+
return sign * n;
418437
}
419438

420439
/** Reads a (possibly dotted, possibly leading-dot) qualified name. */

0 commit comments

Comments
 (0)