-
-
Notifications
You must be signed in to change notification settings - Fork 20
Expand file tree
/
Copy pathpii.js
More file actions
33 lines (31 loc) · 1.36 KB
/
Copy pathpii.js
File metadata and controls
33 lines (31 loc) · 1.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
// Lightweight PII redaction for chat transcripts. Runs at write time so
// stored content is already scrubbed — the creator can review what visitors
// asked without ever holding raw email/phone/card data.
//
// This is the regex pass Presidio runs in its Analyzer; we skip the NER half
// because the transcripts dashboard doesn't need names/locations redacted
// (those frequently aren't sensitive) and NER would push a 200MB ML model
// onto the request path.
const PATTERNS = [
{ token: '[email]', re: /[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}/g },
{ token: '[card]', re: /\b(?:\d[ -]?){13,19}\b/g },
{ token: '[ssn]', re: /\b\d{3}-\d{2}-\d{4}\b/g },
{ token: '[phone]', re: /(?:\+?\d{1,3}[\s.-]?)?(?:\(?\d{3}\)?[\s.-]?)\d{3}[\s.-]?\d{4}\b/g },
{ token: '[key]', re: /\b(?:sk|pk|rk|api)[_-][A-Za-z0-9_-]{12,}\b/g },
];
export function redactPii(text) {
let out = String(text ?? '');
let redacted = false;
for (const { token, re } of PATTERNS) {
// Reset lastIndex before each use: global regexes are module-level
// singletons and .test() advances lastIndex, which could cause a
// concurrent call (or a second call in the same tick) to start scanning
// from a non-zero offset and miss matches near the start of the string.
re.lastIndex = 0;
out = out.replace(re, (match) => {
redacted = true;
return token;
});
}
return { content: out, redacted };
}