Skip to content

Commit 927e472

Browse files
feat(serialization): pure-JS Protobuf (reflection, proto3 + editions)
Adds Protobuf to runtime:serialization as our own pure-JS implementation — no native deps, no codegen, no toolchain. A .proto schema is parsed at runtime and messages are decoded/encoded reflectively against the descriptor. Library (TS, in crates/runtime/js, bundled to runtime_modules/serialization.js via `bun run build`): - lexer + recursive-descent parser for proto3 and edition 2023; proto2-only constructs (required/group/extend) are rejected with a clear error. - linker: type resolution, map lowering, oneofs, and editions/proto3 feature resolution into concrete presence/packed/closed flags. - wire reader/writer with inlined UTF-8 (no TextDecoder host call) and BigInt 64-bit; reflective decode/encode with unknown-field preservation. - Protobuf.Schema(proto).parse(name, bytes) / .build(name, value). Decoded shape: camelCase keys, BigInt 64-bit, enum value-names, Uint8Array bytes, plain-object maps, sparse (absent fields omitted). google/protobuf well-known types embedded. Correctness: - 14 bun unit tests (wire boundaries, all scalars, packed/expanded, maps, oneofs, editions presence, unknown fields, proto2 rejection). - crates/runtime/conformance/protobuf.js (6 tests, pass via esrun). - bench/protobuf-parity.mjs: byte-for-byte encode parity AND cross-decode vs protobuf-es on catalog + a rich all-types schema (enums, 64-bit, sint/fixed, packed, repeated message, map). 121 cargo lib tests pass; verified end-to-end in esrun via runtime:serialization.
1 parent 7cce11c commit 927e472

24 files changed

Lines changed: 3760 additions & 132 deletions

bench/gen/all_pb.js

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// @generated by protoc-gen-es v2.12.1 with parameter "target=js,import_extension=js"
2+
// @generated from file all.proto (package t, syntax proto3)
3+
/* eslint-disable */
4+
5+
import { enumDesc, fileDesc, messageDesc, tsEnum } from "@bufbuild/protobuf/codegenv2";
6+
7+
/**
8+
* Describes the file all.proto.
9+
*/
10+
export const file_all = /*@__PURE__*/
11+
fileDesc("CglhbGwucHJvdG8SAXQiHQoFSW5uZXISCQoBdhgBIAEoCRIJCgFuGAIgASgFIusCCgNBbGwSCwoDaTMyGAEgASgFEgsKA2k2NBgCIAEoAxILCgN1MzIYAyABKA0SCwoDdTY0GAQgASgEEgsKA3MzMhgFIAEoERILCgNzNjQYBiABKBISCwoDZjMyGAcgASgHEgsKA2Y2NBgIIAEoBhIMCgRzZjMyGAkgASgPEgwKBHNmNjQYCiABKBASCgoCZmwYCyABKAISCgoCZGIYDCABKAESCQoBYhgNIAEoCBIJCgFzGA4gASgJEgoKAmJ5GA8gASgMEhMKAWMYECABKA4yCC50LkNvbG9yEhcKBWlubmVyGBEgASgLMggudC5Jbm5lchIMCgRudW1zGBIgAygFEhcKBWl0ZW1zGBMgAygLMggudC5Jbm5lchIiCgZjb3VudHMYFCADKAsyEi50LkFsbC5Db3VudHNFbnRyeRotCgtDb3VudHNFbnRyeRILCgNrZXkYASABKAkSDQoFdmFsdWUYAiABKAU6AjgBKiUKBUNvbG9yEgcKA1JFRBAAEgkKBUdSRUVOEAESCAoEQkxVRRACYgZwcm90bzM");
12+
13+
/**
14+
* Describes the message t.Inner.
15+
* Use `create(InnerSchema)` to create a new message.
16+
*/
17+
export const InnerSchema = /*@__PURE__*/
18+
messageDesc(file_all, 0);
19+
20+
/**
21+
* Describes the message t.All.
22+
* Use `create(AllSchema)` to create a new message.
23+
*/
24+
export const AllSchema = /*@__PURE__*/
25+
messageDesc(file_all, 1);
26+
27+
/**
28+
* Describes the enum t.Color.
29+
*/
30+
export const ColorSchema = /*@__PURE__*/
31+
enumDesc(file_all, 0);
32+
33+
/**
34+
* @generated from enum t.Color
35+
*/
36+
export const Color = /*@__PURE__*/
37+
tsEnum(ColorSchema);
38+

bench/proto/all.proto

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
syntax = "proto3";
2+
package t;
3+
enum Color { RED = 0; GREEN = 1; BLUE = 2; }
4+
message Inner { string v = 1; int32 n = 2; }
5+
message All {
6+
int32 i32 = 1; int64 i64 = 2; uint32 u32 = 3; uint64 u64 = 4;
7+
sint32 s32 = 5; sint64 s64 = 6; fixed32 f32 = 7; fixed64 f64 = 8;
8+
sfixed32 sf32 = 9; sfixed64 sf64 = 10; float fl = 11; double db = 12;
9+
bool b = 13; string s = 14; bytes by = 15; Color c = 16;
10+
Inner inner = 17;
11+
repeated int32 nums = 18;
12+
repeated Inner items = 19;
13+
map<string, int32> counts = 20;
14+
}

bench/protobuf-parity.mjs

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
// Differential parity for our reflective protobuf lib (crates/runtime/js) vs the
2+
// reference protobuf-es, on the catalog + all schemas. Asserts byte-for-byte
3+
// encode equality and cross-implementation decode. Run: bun protobuf-parity.mjs
4+
import { readFileSync } from "node:fs";
5+
import { create, toBinary, fromBinary } from "@bufbuild/protobuf";
6+
import { CatalogSchema } from "./gen/catalog_pb.js";
7+
import { AllSchema } from "./gen/all_pb.js";
8+
import { Schema } from "../crates/runtime/js/serialization/protobuf/schema.ts";
9+
10+
const eq = (a, b) => a.length === b.length && [...a].every((x, i) => x === b[i]);
11+
let fails = 0;
12+
const check = (name, cond) => { console.log((cond ? "ok - " : "FAIL- ") + name); if (!cond) fails++; };
13+
14+
// --- catalog: strings, double, repeated message ---
15+
{
16+
const ours = new Schema(readFileSync(new URL("./proto/catalog.proto", import.meta.url), "utf8"));
17+
const obj = { catalog: [] };
18+
for (let i = 0; i < 1000; i++) {
19+
obj.catalog.push({ id: `bk${i}`, author: "A", title: "T", genre: "G", price: 44.95, publishDate: "2000-10-01", description: "D" });
20+
}
21+
const es = toBinary(CatalogSchema, create(CatalogSchema, obj));
22+
const our = ours.build("test.Catalog", obj);
23+
check("catalog: byte-equal encode", eq(es, our));
24+
check("catalog: our decode of es bytes", ours.parse("test.Catalog", es).catalog[0].title === "T");
25+
check("catalog: es decode of our bytes", fromBinary(CatalogSchema, our).catalog[999].id === "bk999");
26+
}
27+
28+
// --- all: enums, 64-bit BigInt, sint/fixed, packed, repeated message, map ---
29+
{
30+
const ours = new Schema(readFileSync(new URL("./proto/all.proto", import.meta.url), "utf8"));
31+
const esInput = {
32+
i32: -7, i64: 9007199254740993n, u32: 7, u64: 18446744073709551615n,
33+
s32: -123, s64: -9007199254740993n, f32: 4294967295, f64: 18446744073709551615n,
34+
sf32: -42, sf64: -99n, fl: 1.5, db: 44.95, b: true, s: "héllo 𐍈",
35+
by: new Uint8Array([1, 2, 3, 255]), c: 2, inner: { v: "x", n: 9 },
36+
nums: [1, 2, 300, -4], items: [{ v: "a" }, { n: 5 }], counts: { x: 1, y: 2 },
37+
};
38+
const es = toBinary(AllSchema, create(AllSchema, esInput));
39+
const our = ours.build("t.All", { ...esInput, c: "BLUE" });
40+
check("all: byte-equal encode", eq(es, our));
41+
const od = ours.parse("t.All", es);
42+
check("all: our decode 64-bit + enum name", od.i64 === 9007199254740993n && od.c === "BLUE");
43+
check("all: es decode of our bytes", fromBinary(AllSchema, our).c === 2);
44+
}
45+
46+
console.log(fails ? `\n${fails} PARITY FAILURE(S)` : "\nALL PARITY OK");
47+
process.exit(fails ? 1 : 0);
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
// Conformance for the pure-JS Protobuf implementation in runtime:serialization.
2+
// Each test() is one tallied assertion. BigInt-aware deep equality (JSON.stringify
3+
// can't serialize BigInt) compares values structurally.
4+
function deepEq(a, b, msg) {
5+
function norm(o) {
6+
if (typeof o === "bigint") return "b:" + o.toString();
7+
if (o instanceof Uint8Array) return "u:" + Array.from(o).join(",");
8+
if (Array.isArray(o)) return o.map(norm);
9+
if (o && typeof o === "object") {
10+
return Object.keys(o).sort().reduce((acc, k) => ((acc[k] = norm(o[k])), acc), {});
11+
}
12+
return o;
13+
}
14+
const x = JSON.stringify(norm(a));
15+
const y = JSON.stringify(norm(b));
16+
if (x !== y) throw new Error(`${msg}: expected ${y}, got ${x}`);
17+
}
18+
19+
test("protobuf: exact wire bytes + round-trip", async () => {
20+
const { Protobuf } = await import("runtime:serialization");
21+
const s = new Protobuf.Schema(`syntax="proto3"; message M { int32 a = 1; string b = 2; }`);
22+
const bytes = s.build("M", { a: 150, b: "hi" });
23+
deepEq(Array.from(bytes), [0x08, 0x96, 0x01, 0x12, 0x02, 0x68, 0x69], "wire bytes");
24+
deepEq(s.parse("M", bytes), { a: 150, b: "hi" }, "round-trip");
25+
});
26+
27+
test("protobuf: all scalar types incl 64-bit BigInt round-trip", async () => {
28+
const { Protobuf } = await import("runtime:serialization");
29+
const s = new Protobuf.Schema(`
30+
syntax = "proto3"; package t;
31+
enum Color { RED = 0; BLUE = 2; }
32+
message Inner { string v = 1; }
33+
message All {
34+
int32 i32 = 1; int64 i64 = 2; uint64 u64 = 4; sint64 s64 = 6;
35+
fixed64 f64 = 8; sfixed32 sf32 = 9; float fl = 11; double db = 12;
36+
bool b = 13; string s = 14; bytes by = 15; Color c = 16; Inner inner = 17;
37+
}
38+
`);
39+
const input = {
40+
i32: -7, i64: 9007199254740993n, u64: 18446744073709551615n, s64: -9007199254740993n,
41+
f64: 18446744073709551615n, sf32: -42, fl: 1.5, db: 44.95,
42+
b: true, s: 'héllo 𐍈', by: new Uint8Array([1, 2, 3, 255]), c: "BLUE", inner: { v: "x" },
43+
};
44+
deepEq(s.parse("t.All", s.build("t.All", input)), input, "All round-trip");
45+
});
46+
47+
test("protobuf: repeated packed/expanded, maps, oneof", async () => {
48+
const { Protobuf } = await import("runtime:serialization");
49+
const s = new Protobuf.Schema(`
50+
syntax = "proto3";
51+
message M {
52+
repeated int32 nums = 1;
53+
repeated string tags = 2;
54+
map<string, int32> counts = 3;
55+
oneof choice { int32 a = 5; string b = 6; }
56+
}
57+
`);
58+
const input = { nums: [1, 2, 300, -4], tags: ["x", "y"], counts: { x: 1, y: 2 }, b: "picked" };
59+
deepEq(s.parse("M", s.build("M", input)), input, "repeated/map/oneof round-trip");
60+
});
61+
62+
test("protobuf: implicit presence omits defaults; edition 2023 keeps them", async () => {
63+
const { Protobuf } = await import("runtime:serialization");
64+
const p3 = new Protobuf.Schema(`syntax="proto3"; message M { int32 a = 1; }`);
65+
if (p3.build("M", { a: 0 }).length !== 0) throw new Error("proto3 default should be omitted");
66+
const ed = new Protobuf.Schema(`edition="2023"; message M { int32 a = 1; }`);
67+
deepEq(Array.from(ed.build("M", { a: 0 })), [0x08, 0x00], "edition 2023 explicit presence");
68+
});
69+
70+
test("protobuf: unknown fields preserved across re-encode", async () => {
71+
const { Protobuf } = await import("runtime:serialization");
72+
const full = new Protobuf.Schema(`syntax="proto3"; message M { int32 a = 1; string b = 2; }`);
73+
const partial = new Protobuf.Schema(`syntax="proto3"; message M { int32 a = 1; }`);
74+
const original = full.build("M", { a: 5, b: "keep" });
75+
const reencoded = partial.build("M", partial.parse("M", original));
76+
deepEq(full.parse("M", reencoded), { a: 5, b: "keep" }, "unknown field survives");
77+
});
78+
79+
test("protobuf: rejects proto2", async () => {
80+
const { Protobuf } = await import("runtime:serialization");
81+
let threw = false;
82+
try { new Protobuf.Schema(`syntax="proto2"; message M {}`); } catch { threw = true; }
83+
if (!threw) throw new Error("proto2 should be rejected");
84+
});

crates/runtime/js/bun.lock

Lines changed: 18 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/runtime/js/package.json

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"name": "esrun-serialization",
3+
"private": true,
4+
"type": "module",
5+
"description": "Source for the runtime:serialization module — pure-JS Protobuf (reflection) + thin wrappers over the Rust-backed text/binary parser ops. Bundled into crates/runtime/src/runtime_modules/serialization.js.",
6+
"scripts": {
7+
"build": "bun build serialization/index.ts --format=esm --target=node --banner='// @generated by `bun run build` in crates/runtime/js — do not edit by hand.' --outfile=../src/runtime_modules/serialization.js",
8+
"test": "bun test"
9+
},
10+
"devDependencies": {
11+
"@bufbuild/protobuf": "^2.12.1",
12+
"typescript": "^5.7.0"
13+
}
14+
}
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
// runtime:serialization — entry bundled into
2+
// crates/runtime/src/runtime_modules/serialization.js (via `bun run build`).
3+
//
4+
// XML/YAML/TOML/JSONL/MessagePack are thin wrappers over the Rust host ops;
5+
// Protobuf is a pure-JS reflective implementation (./protobuf).
6+
export { Protobuf } from "./protobuf/schema.js";
7+
8+
const ops = (globalThis as unknown as { __ops: Record<string, (...a: any[]) => any> }).__ops;
9+
const {
10+
xml_parse, xml_validate, xml_build,
11+
yaml_parse, yaml_validate, yaml_build,
12+
toml_parse, toml_validate, toml_build,
13+
msgpack_parse, msgpack_validate, msgpack_build,
14+
xml_stream_new, xml_stream_push, xml_stream_close,
15+
} = ops;
16+
17+
interface ValidateOptions { detailed?: boolean; }
18+
function validateWith(fn: (s: any) => true | string, input: any, options: ValidateOptions = {}) {
19+
const result = fn(input);
20+
if (result === true) return options.detailed ? { valid: true } : true;
21+
return options.detailed ? { valid: false, error: result } : false;
22+
}
23+
24+
export const TOML = {
25+
validate: (toml: string, options?: ValidateOptions) => validateWith(toml_validate, toml, options),
26+
parse: (toml: string) => toml_parse(toml),
27+
build: (obj: unknown) => toml_build(obj),
28+
};
29+
30+
export const YAML = {
31+
validate: (yaml: string, options?: ValidateOptions) => validateWith(yaml_validate, yaml, options),
32+
parse: (yaml: string) => yaml_parse(yaml),
33+
build: (obj: unknown) => yaml_build(obj),
34+
};
35+
36+
export const MessagePack = {
37+
validate: (msgpack: Uint8Array, options?: ValidateOptions) => validateWith(msgpack_validate, msgpack, options),
38+
decode: (msgpack: Uint8Array) => JSON.parse(msgpack_parse(msgpack)),
39+
encode: (obj: unknown) => msgpack_build(obj),
40+
};
41+
42+
class JSONLDecoderStream extends TransformStream {
43+
onError: (cb: (e: { line: number; raw: string; cause: Error }) => void) => void;
44+
constructor(options: { skipInvalid?: boolean } = {}) {
45+
let buffer = "";
46+
const decoder = new TextDecoder();
47+
const skipInvalid = !!options.skipInvalid;
48+
let lineNumber = 0;
49+
let errorCallback: ((e: { line: number; raw: string; cause: Error }) => void) | null = null;
50+
51+
const emit = (controller: TransformStreamDefaultController, raw: string) => {
52+
const trimmed = raw.trim();
53+
if (!trimmed) return;
54+
try {
55+
controller.enqueue(JSON.parse(trimmed));
56+
} catch (err) {
57+
if (skipInvalid) errorCallback?.({ line: lineNumber, raw: trimmed, cause: err as Error });
58+
else controller.error(new SyntaxError(`Invalid JSONL line ${lineNumber}: ${(err as Error).message}`));
59+
}
60+
};
61+
62+
super({
63+
transform(chunk, controller) {
64+
const text = typeof chunk === "string" ? chunk : decoder.decode(chunk, { stream: true });
65+
buffer += text;
66+
const lines = buffer.split("\n");
67+
buffer = lines.pop() ?? "";
68+
for (const line of lines) { lineNumber++; emit(controller, line); }
69+
},
70+
flush(controller) {
71+
if (buffer) { lineNumber++; emit(controller, buffer); }
72+
},
73+
});
74+
75+
this.onError = (cb) => { errorCallback = cb; };
76+
}
77+
}
78+
79+
class JSONLEncoderStream extends TransformStream {
80+
private _writer: WritableStreamDefaultWriter | null = null;
81+
constructor() {
82+
super({
83+
transform(chunk, controller) {
84+
try {
85+
controller.enqueue(JSON.stringify(chunk) + "\n");
86+
} catch (err) {
87+
controller.error(new TypeError(`Cannot serialize to JSONL: ${(err as Error).message}`));
88+
}
89+
},
90+
});
91+
}
92+
pipeTo(destination: WritableStream, options?: StreamPipeOptions) {
93+
return this.readable.pipeTo(destination, options);
94+
}
95+
write(chunk: unknown) {
96+
this._writer ??= this.writable.getWriter();
97+
return this._writer.write(chunk);
98+
}
99+
close() {
100+
return (this._writer ?? this.writable.getWriter()).close();
101+
}
102+
}
103+
104+
export const JSONL = { DecoderStream: JSONLDecoderStream, EncoderStream: JSONLEncoderStream };
105+
106+
class XMLDecoderStream extends TransformStream {
107+
constructor() {
108+
let streamId: number | null = null;
109+
super({
110+
start() { streamId = xml_stream_new(); },
111+
transform(chunk, controller) {
112+
const text = typeof chunk === "string" ? chunk : new TextDecoder().decode(chunk);
113+
for (const obj of xml_stream_push(streamId, text)) controller.enqueue(obj);
114+
},
115+
flush() { xml_stream_close(streamId); },
116+
});
117+
}
118+
}
119+
120+
export const XML = {
121+
validate: (xml: string, options?: ValidateOptions) => validateWith(xml_validate, xml, options),
122+
parse: (xml: string) => xml_parse(xml),
123+
build: (obj: unknown) => xml_build(obj),
124+
DecoderStream: XMLDecoderStream,
125+
};

0 commit comments

Comments
 (0)