Skip to content

Commit c120ad8

Browse files
authored
Merge pull request #165 from 2Toad/jp-issue-164
Fix #164: Runtime performance improvements
2 parents 22cee7a + 8191617 commit c120ad8

3 files changed

Lines changed: 62 additions & 40 deletions

File tree

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
"lint": "oxlint",
2121
"lint:fix": "oxlint --fix",
2222
"format": "prettier . --write",
23+
"check": "npm run format && npm run lint && npm test && npm run build",
2324
"prepublishOnly": "npm run lint && npm test",
2425
"translate": "docker compose -f ./src/tools/translate/docker-compose.yml up -d && ts-node ./src/tools/translate/translate.ts && docker compose -f ./src/tools/translate/docker-compose.yml down",
2526
"benchmark": "docker compose -f ./src/tools/benchmark/docker-compose.yml up --build"

src/profanity.ts

Lines changed: 60 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@ import { List, CensorType } from "./models";
33
import { escapeRegExp } from "./utils";
44
import { profaneWords } from "./data";
55

6+
const FIRST_VOWEL_RE = /[aeiou]/i;
7+
const ALL_VOWELS_RE = /[aeiou]/gi;
8+
const UNICODE_WORD_CHAR_RE = /[\p{L}\p{N}\p{M}_-]/u;
9+
const ASCII_WORD_CHAR_RE = /[\w-_]/;
10+
611
export class Profanity {
712
options: ProfanityOptions;
813
whitelist: List;
@@ -84,7 +89,7 @@ export class Profanity {
8489
return this.options.grawlixChar + word.slice(1);
8590
case CensorType.FirstVowel:
8691
case CensorType.AllVowels: {
87-
const vowelRegex = new RegExp("[aeiou]", censorType === CensorType.FirstVowel ? "i" : "ig");
92+
const vowelRegex = censorType === CensorType.FirstVowel ? FIRST_VOWEL_RE : ALL_VOWELS_RE;
8893
return word.replace(vowelRegex, this.options.grawlixChar);
8994
}
9095
default:
@@ -153,32 +158,31 @@ export class Profanity {
153158
* @returns True if the match is whitelisted, false otherwise.
154159
*/
155160
private isWhitelisted(matchStart: number, matchEnd: number, text: string): boolean {
161+
const wholeWord = this.options.wholeWord;
162+
const wordCharRe = wholeWord ? (this.options.unicodeWordBoundaries ? UNICODE_WORD_CHAR_RE : ASCII_WORD_CHAR_RE) : null;
163+
156164
for (const whitelistedWord of this.whitelist.words) {
157165
const whitelistedIndex = text.indexOf(whitelistedWord, Math.max(0, matchStart - whitelistedWord.length + 1));
158-
if (whitelistedIndex !== -1) {
159-
const whitelistedEnd = whitelistedIndex + whitelistedWord.length;
160-
161-
if (this.options.wholeWord) {
162-
const isWordChar = (ch: string | undefined) => {
163-
if (!ch) return false;
164-
return this.options.unicodeWordBoundaries ? /[\p{L}\p{N}\p{M}_-]/u.test(ch) : /[\w-_]/.test(ch);
165-
};
166-
if (
167-
matchStart === whitelistedIndex &&
168-
matchEnd === whitelistedEnd &&
169-
(matchStart === 0 || !isWordChar(text.charAt(matchStart - 1))) &&
170-
(matchEnd === text.length || !isWordChar(text.charAt(matchEnd)))
171-
) {
172-
return true;
173-
}
174-
} else {
175-
if (
176-
(matchStart >= whitelistedIndex && matchStart < whitelistedEnd) ||
177-
(matchEnd > whitelistedIndex && matchEnd <= whitelistedEnd) ||
178-
(whitelistedIndex >= matchStart && whitelistedEnd <= matchEnd)
179-
) {
180-
return true;
181-
}
166+
if (whitelistedIndex === -1) continue;
167+
168+
const whitelistedEnd = whitelistedIndex + whitelistedWord.length;
169+
170+
if (wordCharRe) {
171+
if (
172+
matchStart === whitelistedIndex &&
173+
matchEnd === whitelistedEnd &&
174+
(matchStart === 0 || !wordCharRe.test(text.charAt(matchStart - 1))) &&
175+
(matchEnd === text.length || !wordCharRe.test(text.charAt(matchEnd)))
176+
) {
177+
return true;
178+
}
179+
} else {
180+
if (
181+
(matchStart >= whitelistedIndex && matchStart < whitelistedEnd) ||
182+
(matchEnd > whitelistedIndex && matchEnd <= whitelistedEnd) ||
183+
(whitelistedIndex >= matchStart && whitelistedEnd <= matchEnd)
184+
) {
185+
return true;
182186
}
183187
}
184188
}
@@ -199,20 +203,22 @@ export class Profanity {
199203
replacer: (word: string, start: number, end: number) => string,
200204
regex: RegExp,
201205
): string {
202-
let result = text;
203-
let offset = 0;
206+
const parts: string[] = [];
207+
let lastIndex = 0;
204208

205209
let match: RegExpExecArray | null;
206210
while ((match = regex.exec(lowercaseText)) !== null) {
207211
const matchStart = match.index;
208212
const matchEnd = matchStart + match[0].length;
209-
const originalWord = text.slice(matchStart + offset, matchEnd + offset);
213+
const originalWord = text.slice(matchStart, matchEnd);
210214
const censoredWord = replacer(originalWord, matchStart, matchEnd);
211-
result = result.slice(0, matchStart + offset) + censoredWord + result.slice(matchEnd + offset);
212-
offset += censoredWord.length - originalWord.length;
215+
parts.push(text.slice(lastIndex, matchStart), censoredWord);
216+
lastIndex = matchEnd;
213217
}
214218

215-
return result;
219+
if (lastIndex === 0) return text;
220+
parts.push(text.slice(lastIndex));
221+
return parts.join("");
216222
}
217223

218224
/**
@@ -237,20 +243,35 @@ export class Profanity {
237243
throw new Error("At least one language must be provided");
238244
}
239245

240-
const uniqueLanguages = [...new Set(languages.map((language) => language.trim().toLowerCase()))];
241-
242-
const regexKey = uniqueLanguages.toSorted().join(",");
243-
if (this.regexes.has(regexKey)) {
244-
return this.regexes.get(regexKey)!;
246+
const seen = new Set<string>();
247+
const uniqueLanguages: string[] = [];
248+
for (const language of languages) {
249+
const normalized = language.trim().toLowerCase();
250+
if (!seen.has(normalized)) {
251+
seen.add(normalized);
252+
uniqueLanguages.push(normalized);
253+
}
245254
}
246255

247-
const allWords = uniqueLanguages.flatMap((language) => {
256+
uniqueLanguages.sort();
257+
const regexKey = uniqueLanguages.join(",");
258+
259+
const cached = this.regexes.get(regexKey);
260+
if (cached) return cached;
261+
262+
const removedWords = this.removed.words;
263+
const allWords: string[] = [];
264+
for (const language of uniqueLanguages) {
248265
const words = profaneWords.get(language);
249266
if (!words) {
250267
throw new Error(`Invalid language: "${language}"`);
251268
}
252-
return words.filter((word) => !this.removed.words.has(word));
253-
});
269+
for (const word of words) {
270+
if (!removedWords.has(word)) {
271+
allWords.push(word);
272+
}
273+
}
274+
}
254275

255276
const regex = this.buildRegex(allWords);
256277
this.regexes.set(regexKey, regex);

src/tools/benchmark/benchmark.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ const startMainSuite = () =>
177177
.on("cycle", (event: Event) => {
178178
console.log(String(event.target));
179179
})
180-
.on("complete", function () {
180+
.on("complete", function (this: any) {
181181
console.log(`Fastest: ${this.filter("fastest").map("name")[0]}`);
182182
})
183183
.run({ async: true });

0 commit comments

Comments
 (0)