Skip to content

Commit de80a21

Browse files
authored
Merge pull request #53 from 2Toad/jp-issue-41
Fixes #41: ProfanityOptions wholeWord does not appear to be working as intended
2 parents 49eaf04 + e841aaa commit de80a21

18 files changed

Lines changed: 767 additions & 235 deletions

.github/workflows/ci.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,5 @@ jobs:
3131
- name: Lint
3232
run: npm run lint
3333

34-
- name: Build
35-
run: npm run build
36-
3734
- name: Unit Tests
3835
run: npm test

README.md

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,17 @@ Install the package
1414
npm i @2toad/profanity
1515
```
1616

17-
>If you're using Node 11.x or older, you'll need to install [Profanity 1.x](https://github.com/2Toad/Profanity/releases) (e.g., `npm i @2toad/profanity@1.4.0`)
17+
>If you're using Node 11.x or older, you'll need to install [Profanity 1.x](https://github.com/2Toad/Profanity/releases) (e.g., `npm i @2toad/profanity@1.4.1`)
1818
1919
## Usage 📚
2020

2121
```JavaScript
22-
import { profanity } from '@2toad/profanity';
22+
import { profanity, CensorType } from '@2toad/profanity';
2323
// or
24-
var profanity = require('@2toad/profanity').profanity;
25-
24+
const { profanity, CensorType } = require('@2toad/profanity');
25+
```
2626

27+
```JavaScript
2728
profanity.exists('I like big butts and I cannot lie');
2829
// true
2930

@@ -41,14 +42,13 @@ profanity.censor('I like big butts (aka arses) and I cannot lie', CensorType.Fir
4142
Create an instance of the Profanity class to change the default options:
4243

4344
```JavaScript
44-
import { Profanity, ProfanityOptions } from '@2toad/profanity';
45-
46-
const options = new ProfanityOptions();
47-
options.wholeWord = false;
48-
options.grawlix = '*****';
49-
options.grawlixChar = '$';
45+
import { Profanity } from '@2toad/profanity';
5046

51-
const profanity = new Profanity(options);
47+
const profanity = new Profanity({
48+
wholeWord: false,
49+
grawlix: '*****',
50+
grawlixChar: '$',
51+
});
5252
```
5353

5454
### wholeWord 🔤
@@ -65,6 +65,23 @@ profanity.exists('Arsenic is poisonous but not profane');
6565
// true (matched on arse)
6666
```
6767

68+
#### Compound Words
69+
Profanity detection works on parts of compound words, rather than treating hyphenated or underscore-separated words as indivisible.
70+
71+
When `wholeWord` is `true`, each portion of a compound word is analyzed for a match:
72+
```JavaScript
73+
profanity.exists("Don't be an arsenic-monster");
74+
// false
75+
76+
profanity.exists("Don't be an arse-monster");
77+
// true (matched on arse)
78+
```
79+
Setting `wholeWord` to `false`, results in partial word matches on each portion of a compound word:
80+
```JavaScript
81+
profanity.exists("Don't be an arsenic-monster");
82+
// true (matched on arse)
83+
```
84+
6885
### grawlix 💥
6986

7087
By default this is set to `@#$%&!`:

package.json

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,14 @@
1414
"clean": "npx rimraf dist",
1515
"build": "npm run clean && npx tsc",
1616
"local": "npm run build && concurrently -p \"none\" \"npx tsc --watch\" \"nodemon -q dist/index.js\"",
17-
"test": "mocha -r ts-node/register -r mocha-suppress-logs tests/**/*.spec.ts",
17+
"pretest": "npm run build",
18+
"test": "mocha -r ts-node/register tests/**/*.spec.ts",
19+
"test:watch": "npm run test -- --watch",
1820
"lint": "eslint . --cache",
1921
"lint:fix": "eslint . --cache --fix",
2022
"prettier": "prettier --check **/*.ts",
2123
"prettier:fix": "prettier --write **/*.ts",
22-
"prepublishOnly": "npm run lint && npm run prettier && npm run build && npm test",
24+
"prepublishOnly": "npm run lint && npm run prettier && npm test",
2325
"prepare": "husky"
2426
},
2527
"repository": {
@@ -29,18 +31,23 @@
2931
"keywords": [
3032
"profanity",
3133
"profane",
32-
"obscene",
3334
"obscenity",
34-
"obscenities",
35+
"obscene",
3536
"cussing",
37+
"curse",
3638
"cursing",
3739
"swearing",
3840
"swearwords",
39-
"vulgar",
41+
"swear-words",
4042
"vulgarity",
41-
"bad words",
42-
"bad language",
43-
"dirty words"
43+
"badwords",
44+
"bad-words",
45+
"badlanguage",
46+
"bad-language",
47+
"dirtywords",
48+
"dirty-words",
49+
"censor",
50+
"filter"
4451
],
4552
"devDependencies": {
4653
"@types/chai": "^4.3.19",

src/data/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
export { profaneWords } from "./profane-words";

src/data/profane-words.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// WARNING: this file contains profanity. The below list of profane words is necessary for this tool to function properly.
22
// Do not read below this line if you do not wish to be exposed to lots of profane words
33

4-
export default [
4+
export const profaneWords: readonly string[] = [
55
"4r5e",
66
"5h1t",
77
"5hit",

src/index.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1-
export * from "./profanity";
2-
export * from "./profanity-options";
1+
export { Profanity, profanity } from "./profanity";
2+
export { ProfanityOptions } from "./profanity-options";
33
export { CensorType } from "./models";
4+
export { profaneWords } from "./data";

src/models/index.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
export * from "./censor-type";
2-
export * from "./list";
1+
export { CensorType } from "./censor-type";
2+
export { List } from "./list";

src/models/list.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ export class List {
1717
this.onListChanged();
1818
}
1919

20-
addWords(words: string[]): void {
21-
this.words = this.words.concat(words);
20+
addWords(words: readonly string[] | string[]): void {
21+
this.words = this.words.concat(words.map((word: string) => word.toLowerCase()));
2222
this.onListChanged();
2323
}
2424
}

src/profanity-options.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@ export class ProfanityOptions {
55

66
grawlixChar: string;
77

8-
constructor() {
9-
this.wholeWord = true;
10-
this.grawlix = "@#$%&!";
11-
this.grawlixChar = "*";
8+
constructor(options: Partial<ProfanityOptions> = {}) {
9+
this.wholeWord = options.wholeWord ?? true;
10+
this.grawlix = options.grawlix ?? "@#$%&!";
11+
this.grawlixChar = options.grawlixChar ?? "*";
1212
}
1313
}

src/profanity.ts

Lines changed: 75 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { ProfanityOptions } from "./profanity-options";
22
import { List, CensorType } from "./models";
3-
import { escapeRegExp } from "./utils/misc";
4-
import profaneWords from "./data/profane-words";
3+
import { escapeRegExp } from "./utils";
4+
import { profaneWords } from "./data";
55

66
export class Profanity {
77
options: ProfanityOptions;
@@ -12,67 +12,111 @@ export class Profanity {
1212

1313
private regex: RegExp;
1414

15-
constructor(options?: ProfanityOptions) {
16-
this.options = options || new ProfanityOptions();
17-
15+
constructor(options?: ProfanityOptions | Partial<ProfanityOptions>) {
16+
this.options = options ? { ...new ProfanityOptions(), ...options } : new ProfanityOptions();
1817
this.whitelist = new List(() => this.buildRegex());
1918
this.blacklist = new List(() => this.buildRegex());
20-
2119
this.blacklist.addWords(profaneWords);
2220
}
2321

2422
exists(text: string): boolean {
2523
this.regex.lastIndex = 0;
26-
return this.regex.test(text);
24+
const lowercaseText = text.toLowerCase();
25+
26+
let match: RegExpExecArray | null;
27+
do {
28+
match = this.regex.exec(lowercaseText);
29+
if (match !== null) {
30+
const matchStart = match.index;
31+
const matchEnd = matchStart + match[0].length;
32+
33+
// Check if the matched word is part of a whitelisted word
34+
const isWhitelisted = this.whitelist.words.some((whitelistedWord) => {
35+
const whitelistedIndex = lowercaseText.indexOf(whitelistedWord, Math.max(0, matchStart - whitelistedWord.length + 1));
36+
if (whitelistedIndex === -1) return false;
37+
38+
const whitelistedEnd = whitelistedIndex + whitelistedWord.length;
39+
40+
if (this.options.wholeWord) {
41+
// For whole word matching, ensure the whitelisted word exactly matches the profane word
42+
// and is not part of a hyphenated or underscore-separated word
43+
return (
44+
matchStart === whitelistedIndex &&
45+
matchEnd === whitelistedEnd &&
46+
(matchStart === 0 || !/[\w-_]/.test(lowercaseText[matchStart - 1])) &&
47+
(matchEnd === lowercaseText.length || !/[\w-_]/.test(lowercaseText[matchEnd]))
48+
);
49+
}
50+
51+
// For partial matching, check if the profane word is contained within the whitelisted word
52+
return (matchStart >= whitelistedIndex && matchStart < whitelistedEnd) || (matchEnd > whitelistedIndex && matchEnd <= whitelistedEnd);
53+
});
54+
55+
if (!isWhitelisted) {
56+
return true;
57+
}
58+
}
59+
} while (match !== null);
60+
61+
return false;
2762
}
2863

2964
censor(text: string, censorType: CensorType = CensorType.Word): string {
65+
const lowercaseText = text.toLowerCase();
66+
3067
switch (censorType) {
3168
case CensorType.Word:
32-
return text.replace(this.regex, this.options.grawlix);
33-
case CensorType.FirstChar: {
34-
let output = text;
35-
36-
Array.from(text.matchAll(this.regex)).forEach((match) => {
37-
const word = match[0];
38-
const grawlix = this.options.grawlixChar + word.slice(1, word.length);
39-
output = output.replace(word, grawlix);
69+
return text.replace(this.regex, (match) => {
70+
const underscore = match.includes("_") ? "_" : "";
71+
return this.options.grawlix + underscore;
4072
});
41-
return output;
73+
case CensorType.FirstChar: {
74+
return this.replaceProfanity(text, lowercaseText, (word) => this.options.grawlixChar + word.slice(1));
4275
}
4376
case CensorType.FirstVowel:
4477
case CensorType.AllVowels: {
45-
const regex = new RegExp("[aeiou]", censorType === CensorType.FirstVowel ? "i" : "ig");
46-
let output = text;
47-
Array.from(text.matchAll(this.regex)).forEach((match) => {
48-
const word = match[0];
49-
const grawlix = word.replace(regex, this.options.grawlixChar);
50-
output = output.replace(word, grawlix);
51-
});
52-
return output;
78+
const vowelRegex = new RegExp("[aeiou]", censorType === CensorType.FirstVowel ? "i" : "ig");
79+
return this.replaceProfanity(text, lowercaseText, (word) => word.replace(vowelRegex, this.options.grawlixChar));
5380
}
5481
default:
5582
throw new Error(`Invalid replacement type: "${censorType}"`);
5683
}
5784
}
5885

86+
private replaceProfanity(text: string, lowercaseText: string, replacer: (word: string) => string): string {
87+
let result = text;
88+
let offset = 0;
89+
90+
this.regex.lastIndex = 0;
91+
let match: RegExpExecArray | null;
92+
do {
93+
match = this.regex.exec(lowercaseText);
94+
if (match !== null) {
95+
const matchStart = match.index;
96+
const matchEnd = matchStart + match[0].length;
97+
const originalWord = text.slice(matchStart + offset, matchEnd + offset);
98+
const censoredWord = replacer(originalWord);
99+
result = result.slice(0, matchStart + offset) + censoredWord + result.slice(matchEnd + offset);
100+
offset += censoredWord.length - originalWord.length;
101+
}
102+
} while (match !== null);
103+
104+
return result;
105+
}
106+
59107
addWords(words: string[]): void {
60108
this.blacklist.addWords(words);
61109
}
62110

63111
removeWords(words: string[]): void {
64-
this.blacklist.removeWords(words);
112+
this.blacklist.removeWords(words.map((word) => word.toLowerCase()));
65113
}
66114

67115
private buildRegex(): void {
68116
const escapedBlacklistWords = this.blacklist.words.map(escapeRegExp);
69-
const escapedWhitelistWords = this.whitelist.words.map(escapeRegExp);
70-
71-
const blacklistPattern = `${this.options.wholeWord ? "\\b" : ""}(${escapedBlacklistWords.join("|")})${this.options.wholeWord ? "\\b" : ""}`;
72-
const whitelistPattern = this.whitelist.empty ? "" : `(?!${escapedWhitelistWords.join("|")})`;
73-
this.regex = new RegExp(whitelistPattern + blacklistPattern, "ig");
117+
const profanityPattern = `${this.options.wholeWord ? "(?:\\b|_)" : ""}(${escapedBlacklistWords.join("|")})${this.options.wholeWord ? "(?:\\b|_)" : ""}`;
118+
this.regex = new RegExp(profanityPattern, "gi");
74119
}
75120
}
76121

77122
export const profanity = new Profanity();
78-
export default profanity;

0 commit comments

Comments
 (0)