Skip to content

Commit 0872bcd

Browse files
xiaoxiaojxclaude
andauthored
perf: use lookup table in splitIntoPotentialTokens (#240)
Replace multi-comparison chains with a Uint8Array bitmask lookup per character. Phase 1 checked 4 conditions (cc !== 10/59/123/125) and phase 2 checked 6 conditions (cc === 59/32/123/125/13/9) on every character. The lookup table reduces both to a single indexed read plus one bitwise AND. Benchmark results (ops/s, higher is better): splitIntoPotentialTokens fixture: 533 → 574 (+7.7%) splitIntoPotentialTokens big: 392 → 474 (+20.9%) original-source streamChunks(): 904 → 983 (+8.7%) original-source streamChunks(big): 334 → 419 (+25.4%) realistic cold sourceAndMap(): 864 → 901 (+4.3%) Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 4874fd7 commit 0872bcd

2 files changed

Lines changed: 49 additions & 26 deletions

File tree

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
"webpack-sources": patch
3+
---
4+
5+
perf: use lookup table in splitIntoPotentialTokens for faster character classification
6+
7+
Replace multi-comparison chains (4 comparisons in phase 1, 6 in phase 2) with a single Uint8Array bitmask lookup per character. This reduces per-character branching overhead, yielding ~7% improvement on typical source and ~21% on large sources.

lib/helpers/splitIntoPotentialTokens.js

Lines changed: 42 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,29 @@
55

66
"use strict";
77

8-
// \n = 10
9-
// ; = 59
10-
// { = 123
11-
// } = 125
12-
// <space> = 32
13-
// \r = 13
14-
// \t = 9
8+
// Character classification via a lookup table. A single bitmask test
9+
// replaces the multi-comparison chains in each inner loop phase.
10+
//
11+
// BIT layout per character:
12+
// bit 0 (STOP1 = 1): stops phase-1 scan (\n ; { })
13+
// bit 1 (CONT2 = 2): continues phase-2 scan (; { } space \r \t)
14+
//
15+
// Phase 1: scan regular source chars that are NOT a phase-1 stop.
16+
// Phase 2: consume runs of statement-boundary / whitespace chars.
17+
// Phase 3: consume a trailing \n if present.
18+
19+
const STOP1 = 1;
20+
const CONT2 = 2;
21+
22+
/** @type {Uint8Array} */
23+
const CF = new Uint8Array(128);
24+
CF[10] = STOP1; // \n – stops phase 1, NOT consumed in phase 2
25+
CF[59] = STOP1 | CONT2; // ;
26+
CF[123] = STOP1 | CONT2; // {
27+
CF[125] = STOP1 | CONT2; // }
28+
CF[32] = CONT2; // space
29+
CF[13] = CONT2; // \r
30+
CF[9] = CONT2; // \t
1531

1632
/**
1733
* @param {string} str string
@@ -22,28 +38,28 @@ const splitIntoPotentialTokens = (str) => {
2238
if (len === 0) return null;
2339
const results = [];
2440
let i = 0;
25-
while (i < len) {
41+
outer: while (i < len) {
2642
const start = i;
27-
block: {
28-
let cc = str.charCodeAt(i);
29-
while (cc !== 10 && cc !== 59 && cc !== 123 && cc !== 125) {
30-
if (++i >= len) break block;
31-
cc = str.charCodeAt(i);
32-
}
33-
while (
34-
cc === 59 ||
35-
cc === 32 ||
36-
cc === 123 ||
37-
cc === 125 ||
38-
cc === 13 ||
39-
cc === 9
40-
) {
41-
if (++i >= len) break block;
42-
cc = str.charCodeAt(i);
43+
// Phase 1 – skip regular (non-stop) characters
44+
let cc = str.charCodeAt(i);
45+
while (cc > 127 || !(CF[cc] & STOP1)) {
46+
if (++i >= len) {
47+
results.push(str.slice(start, i));
48+
break outer;
4349
}
44-
if (cc === 10) {
45-
i++;
50+
cc = str.charCodeAt(i);
51+
}
52+
// Phase 2 – consume delimiter / whitespace run (; { } space \r \t)
53+
while (cc < 128 && CF[cc] & CONT2) {
54+
if (++i >= len) {
55+
results.push(str.slice(start, i));
56+
break outer;
4657
}
58+
cc = str.charCodeAt(i);
59+
}
60+
// Phase 3 – consume trailing newline
61+
if (cc === 10) {
62+
i++;
4763
}
4864
results.push(str.slice(start, i));
4965
}

0 commit comments

Comments
 (0)