-
-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Expand file tree
/
Copy pathdetectBadJsonStrings.ts
More file actions
82 lines (73 loc) · 2.83 KB
/
Copy pathdetectBadJsonStrings.ts
File metadata and controls
82 lines (73 loc) · 2.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
/**
* Detects unpaired UTF-16 surrogate escape sequences in JSON-encoded text.
*
* Returns true if the input contains a `\uD8XX`/`\uD9XX`/`\uDAXX`/`\uDBXX`
* high-surrogate escape not immediately followed by a `\uDC..`–`\uDF..` low
* surrogate, or a `\uDC..`–`\uDF..` low surrogate not immediately preceded by
* a high surrogate. Strict JSON parsers (e.g. ClickHouse `JSONEachRow`)
* reject input containing such sequences.
*
* Surrogate hex ranges (case-insensitive — inputs from `JSON.stringify` are
* lowercase):
* - High surrogate (U+D800–U+DBFF): `\uD[8-B][0-9A-F][0-9A-F]`
* - Low surrogate (U+DC00–U+DFFF): `\uD[C-F][0-9A-F][0-9A-F]`
*/
export function detectBadJsonStrings(jsonString: string): boolean {
// Fast path: skip everything if no \u
let idx = jsonString.indexOf("\\u");
if (idx === -1) return false;
// Use a more efficient scanning strategy
const length = jsonString.length;
while (idx !== -1 && idx < length - 5) {
// Only check if we have enough characters left
if (idx + 6 > length) break;
if (jsonString[idx + 1] === "u" && jsonString[idx + 2] === "d") {
const third = jsonString[idx + 3];
// High surrogate check — third nibble is 8, 9, a, or b (U+D800–U+DBFF)
if (
/[89ab]/.test(third) &&
/[0-9a-f]/.test(jsonString[idx + 4]) &&
/[0-9a-f]/.test(jsonString[idx + 5])
) {
// Check for low surrogate after (need at least 6 more chars)
if (idx + 12 > length) {
return true; // Incomplete high surrogate (not enough chars left)
}
if (
jsonString[idx + 6] !== "\\" ||
jsonString[idx + 7] !== "u" ||
jsonString[idx + 8] !== "d" ||
!/[c-f]/.test(jsonString[idx + 9]) ||
!/[0-9a-f]/.test(jsonString[idx + 10]) ||
!/[0-9a-f]/.test(jsonString[idx + 11])
) {
return true; // Incomplete high surrogate
}
}
// Low surrogate check — third nibble is c, d, e, or f (U+DC00–U+DFFF)
if (
/[c-f]/.test(third) &&
/[0-9a-f]/.test(jsonString[idx + 4]) &&
/[0-9a-f]/.test(jsonString[idx + 5])
) {
// Check for high surrogate before (need at least 6 chars before)
if (idx < 6) {
return true; // Incomplete low surrogate (not enough chars before)
}
if (
jsonString[idx - 6] !== "\\" ||
jsonString[idx - 5] !== "u" ||
jsonString[idx - 4] !== "d" ||
!/[89ab]/.test(jsonString[idx - 3]) ||
!/[0-9a-f]/.test(jsonString[idx - 2]) ||
!/[0-9a-f]/.test(jsonString[idx - 1])
) {
return true; // Incomplete low surrogate
}
}
}
// More efficient next search - skip ahead by 2 to avoid overlapping matches
idx = jsonString.indexOf("\\u", idx + 2);
}
return false;
}