Skip to content

Commit 391bc6d

Browse files
authored
Add fast path for utf16le encoding in stringToBuffer()/bufferToString() (#981)
1 parent e720ae2 commit 391bc6d

8 files changed

Lines changed: 498 additions & 86 deletions

File tree

example/src/benchmarks/encoding/encoding.ts

Lines changed: 192 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@ import {
33
stringToBuffer,
44
Buffer as CraftzdogBuffer,
55
} from 'react-native-quick-crypto';
6+
// For utf16le, the native implementation could be disabled for non-Hermes runtimes or older versions of RN.
7+
// Use the fallbacks to meature the performance without causing errors, even if it could use Buffer polyfill.
8+
import { ab2str, binaryLikeToArrayBuffer } from 'react-native-quick-crypto';
69
import type { BenchFn } from '../../types/benchmarks';
710
import { Bench } from 'tinybench';
811

@@ -22,28 +25,37 @@ function stringToBuffer_old(
2225
}
2326

2427
// Generate test data
25-
const generate1MB = (): ArrayBuffer => {
26-
const bytes = new Uint8Array(1024 * 1024);
27-
for (let i = 0; i < bytes.length; i++) {
28-
bytes[i] = i & 0xff;
28+
const generateData = (size: number, asciiOnly: boolean = true): ArrayBuffer => {
29+
if (size < 2 || size % 2 !== 0) {
30+
throw new Error('Size must be at least 2 and even');
31+
}
32+
const bytes = new Uint8Array(size); // Implicitly filled with 0
33+
// Fill ASCII characters in UTF-16LE code units, which can also be represented as binary/ASCII/Latin1/UTF-8
34+
for (let i = 0; i < bytes.length; i += 2) {
35+
bytes[i] = i & 0x7f;
36+
}
37+
if (!asciiOnly) {
38+
// \xC3\xA9 in UTF-8 or \uA9C3 in UTF-16LE
39+
bytes[0] = 0xc3;
40+
bytes[1] = 0xa9;
2941
}
3042
return bytes.buffer as ArrayBuffer;
3143
};
3244

33-
const ab1MB = generate1MB();
34-
const ab32B = new Uint8Array(32).buffer as ArrayBuffer; // typical hash digest size
35-
// Fill 32B with non-zero data
36-
new Uint8Array(ab32B).set([
37-
0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe, 0xba, 0xbe, 0x01, 0x23, 0x45, 0x67, 0x89,
38-
0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0x11, 0x22,
39-
0x33, 0x44, 0x55, 0x66, 0x77, 0x88,
40-
]);
45+
const ab1MB_ascii = generateData(1024 * 1024, true);
46+
const ab1MB = generateData(1024 * 1024, false);
47+
const ab32B_ascii = generateData(32, true);
48+
const ab32B = generateData(32, false);
4149

4250
// Pre-encode strings for decode benchmarks
43-
const hex1MB = bufferToString(ab1MB, 'hex');
44-
const base64_1MB = bufferToString(ab1MB, 'base64');
45-
const hex32B = bufferToString(ab32B, 'hex');
46-
const base64_32B = bufferToString(ab32B, 'base64');
51+
const hex_1MB = ab2str(ab1MB, 'hex');
52+
const hex_32B = ab2str(ab32B, 'hex');
53+
const base64_1MB = ab2str(ab1MB, 'base64');
54+
const base64_32B = ab2str(ab32B, 'base64');
55+
const utf16le_1MB_ascii = ab2str(ab1MB_ascii, 'utf16le');
56+
const utf16le_32B_ascii = ab2str(ab32B_ascii, 'utf16le');
57+
const utf16le_1MB_non_ascii = ab2str(ab1MB, 'utf16le');
58+
const utf16le_32B_non_ascii = ab2str(ab32B, 'utf16le');
4759

4860
// --- Encode benchmarks (ArrayBuffer → string) ---
4961

@@ -123,6 +135,82 @@ const encode_base64_1mb: BenchFn = () => {
123135
return bench;
124136
};
125137

138+
const encode_utf16le_32b: BenchFn = () => {
139+
const bench = new Bench({
140+
name: 'utf16le encode 32B',
141+
iterations: 100,
142+
warmupIterations: 10,
143+
time: 0,
144+
});
145+
146+
bench
147+
.add('rnqc', () => {
148+
ab2str(ab32B, 'utf16le');
149+
})
150+
.add('Buffer polyfill', () => {
151+
ab2str_old(ab32B, 'utf16le');
152+
});
153+
154+
return bench;
155+
};
156+
157+
const encode_utf16le_1mb: BenchFn = () => {
158+
const bench = new Bench({
159+
name: 'utf16le encode 1MB',
160+
iterations: 10,
161+
warmupIterations: 2,
162+
time: 0,
163+
});
164+
165+
bench
166+
.add('rnqc', () => {
167+
ab2str(ab1MB, 'utf16le');
168+
})
169+
.add('Buffer polyfill', () => {
170+
ab2str_old(ab1MB, 'utf16le');
171+
});
172+
173+
return bench;
174+
};
175+
176+
const encode_utf16le_32b_ascii: BenchFn = () => {
177+
const bench = new Bench({
178+
name: 'utf16le encode 32B (ASCII only)',
179+
iterations: 100,
180+
warmupIterations: 10,
181+
time: 0,
182+
});
183+
184+
bench
185+
.add('rnqc', () => {
186+
ab2str(ab32B_ascii, 'utf16le');
187+
})
188+
.add('Buffer polyfill', () => {
189+
ab2str_old(ab32B_ascii, 'utf16le');
190+
});
191+
192+
return bench;
193+
};
194+
195+
const encode_utf16le_1mb_ascii: BenchFn = () => {
196+
const bench = new Bench({
197+
name: 'utf16le encode 1MB (ASCII only)',
198+
iterations: 10,
199+
warmupIterations: 2,
200+
time: 0,
201+
});
202+
203+
bench
204+
.add('rnqc', () => {
205+
ab2str(ab1MB_ascii, 'utf16le');
206+
})
207+
.add('Buffer polyfill', () => {
208+
ab2str_old(ab1MB_ascii, 'utf16le');
209+
});
210+
211+
return bench;
212+
};
213+
126214
// --- Decode benchmarks (string → ArrayBuffer) ---
127215

128216
const decode_hex_32b: BenchFn = () => {
@@ -135,10 +223,10 @@ const decode_hex_32b: BenchFn = () => {
135223

136224
bench
137225
.add('rnqc', () => {
138-
stringToBuffer(hex32B, 'hex');
226+
stringToBuffer(hex_32B, 'hex');
139227
})
140228
.add('Buffer polyfill', () => {
141-
stringToBuffer_old(hex32B, 'hex');
229+
stringToBuffer_old(hex_32B, 'hex');
142230
});
143231

144232
return bench;
@@ -154,10 +242,10 @@ const decode_hex_1mb: BenchFn = () => {
154242

155243
bench
156244
.add('rnqc', () => {
157-
stringToBuffer(hex1MB, 'hex');
245+
stringToBuffer(hex_1MB, 'hex');
158246
})
159247
.add('Buffer polyfill', () => {
160-
stringToBuffer_old(hex1MB, 'hex');
248+
stringToBuffer_old(hex_1MB, 'hex');
161249
});
162250

163251
return bench;
@@ -201,13 +289,97 @@ const decode_base64_1mb: BenchFn = () => {
201289
return bench;
202290
};
203291

292+
const decode_utf16le_32b: BenchFn = () => {
293+
const bench = new Bench({
294+
name: 'utf16le decode 32B',
295+
iterations: 100,
296+
warmupIterations: 10,
297+
time: 0,
298+
});
299+
300+
bench
301+
.add('rnqc', () => {
302+
binaryLikeToArrayBuffer(utf16le_32B_non_ascii, 'utf16le');
303+
})
304+
.add('Buffer polyfill', () => {
305+
stringToBuffer_old(utf16le_32B_non_ascii, 'utf16le');
306+
});
307+
308+
return bench;
309+
};
310+
311+
const decode_utf16le_1mb: BenchFn = () => {
312+
const bench = new Bench({
313+
name: 'utf16le decode 1MB',
314+
iterations: 10,
315+
warmupIterations: 2,
316+
time: 0,
317+
});
318+
319+
bench
320+
.add('rnqc', () => {
321+
binaryLikeToArrayBuffer(utf16le_1MB_non_ascii, 'utf16le');
322+
})
323+
.add('Buffer polyfill', () => {
324+
stringToBuffer_old(utf16le_1MB_non_ascii, 'utf16le');
325+
});
326+
327+
return bench;
328+
};
329+
330+
const decode_utf16le_32b_ascii: BenchFn = () => {
331+
const bench = new Bench({
332+
name: 'utf16le decode 32B (ASCII only)',
333+
iterations: 100,
334+
warmupIterations: 10,
335+
time: 0,
336+
});
337+
338+
bench
339+
.add('rnqc', () => {
340+
binaryLikeToArrayBuffer(utf16le_32B_ascii, 'utf16le');
341+
})
342+
.add('Buffer polyfill', () => {
343+
stringToBuffer_old(utf16le_32B_ascii, 'utf16le');
344+
});
345+
346+
return bench;
347+
};
348+
349+
const decode_utf16le_1mb_ascii: BenchFn = () => {
350+
const bench = new Bench({
351+
name: 'utf16le decode 1MB (ASCII only)',
352+
iterations: 10,
353+
warmupIterations: 2,
354+
time: 0,
355+
});
356+
357+
bench
358+
.add('rnqc', () => {
359+
binaryLikeToArrayBuffer(utf16le_1MB_ascii, 'utf16le');
360+
})
361+
.add('Buffer polyfill', () => {
362+
stringToBuffer_old(utf16le_1MB_ascii, 'utf16le');
363+
});
364+
365+
return bench;
366+
};
367+
204368
export default [
205369
encode_hex_32b,
206370
encode_hex_1mb,
207371
encode_base64_32b,
208372
encode_base64_1mb,
373+
encode_utf16le_32b,
374+
encode_utf16le_1mb,
375+
encode_utf16le_32b_ascii,
376+
encode_utf16le_1mb_ascii,
209377
decode_hex_32b,
210378
decode_hex_1mb,
211379
decode_base64_32b,
212380
decode_base64_1mb,
381+
decode_utf16le_32b,
382+
decode_utf16le_1mb,
383+
decode_utf16le_32b_ascii,
384+
decode_utf16le_1mb_ascii,
213385
];

example/src/tests/utils/encoding_tests.ts

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -578,6 +578,71 @@ test(
578578
},
579579
);
580580

581+
// --- UTF-16LE ---
582+
583+
test(SUITE, '[Node.js] Roundtrips ASCII text through utf16le encoding.', () => {
584+
const str = 'foo';
585+
const ab = stringToBuffer(str, 'utf16le');
586+
expect(bufferToString(ab, 'utf16le')).to.equal(str);
587+
});
588+
589+
test(
590+
SUITE,
591+
'Roundtrips UTF-16LE text containing an unpaired high surrogate.',
592+
() => {
593+
const str = 'A\uD83DB';
594+
const ab = stringToBuffer(str, 'utf16le');
595+
expect(toU8(ab)).to.deep.equal(
596+
new Uint8Array([0x41, 0x00, 0x3d, 0xd8, 0x42, 0x00]),
597+
);
598+
expect(bufferToString(ab, 'utf16le')).to.equal(str);
599+
},
600+
);
601+
602+
test(
603+
SUITE,
604+
'Roundtrips UTF-16LE text containing an unpaired low surrogate.',
605+
() => {
606+
const str = 'A\uDC00B';
607+
const ab = stringToBuffer(str, 'utf16le');
608+
expect(toU8(ab)).to.deep.equal(
609+
new Uint8Array([0x41, 0x00, 0x00, 0xdc, 0x42, 0x00]),
610+
);
611+
expect(bufferToString(ab, 'utf16le')).to.equal(str);
612+
},
613+
);
614+
615+
test(SUITE, '[Node.js] UTF-16LE encoding of "über"', () => {
616+
expect(toU8(stringToBuffer('über', 'utf16le'))).to.deep.equal(
617+
new Uint8Array([252, 0, 98, 0, 101, 0, 114, 0]),
618+
);
619+
});
620+
621+
test(SUITE, '[Node.js] UTF-16LE encoding of "привет"', () => {
622+
const encoded = toU8(stringToBuffer('привет', 'utf16le'));
623+
expect(encoded).to.deep.equal(
624+
new Uint8Array([63, 4, 64, 4, 56, 4, 50, 4, 53, 4, 66, 4]),
625+
);
626+
expect(bufferToString(encoded.buffer as ArrayBuffer, 'utf16le')).to.equal(
627+
'привет',
628+
);
629+
});
630+
631+
test(SUITE, '[Node.js] UTF-16LE encoding of Thumbs up sign (U+1F44D)', () => {
632+
expect(toU8(stringToBuffer('\uD83D\uDC4D', 'utf16le'))).to.deep.equal(
633+
new Uint8Array([0x3d, 0xd8, 0x4d, 0xdc]),
634+
);
635+
});
636+
637+
test(SUITE, '[Node.js] Decodes UTF-16LE bytes back to Japanese text.', () => {
638+
const bytes = new Uint8Array([
639+
0x42, 0x30, 0x44, 0x30, 0x46, 0x30, 0x48, 0x30, 0x4a, 0x30,
640+
]);
641+
expect(bufferToString(bytes.buffer as ArrayBuffer, 'utf16le')).to.equal(
642+
'あいうえお',
643+
);
644+
});
645+
581646
// --- Latin1 / Binary ---
582647

583648
test(

0 commit comments

Comments
 (0)