Merge pull request #53 from 2Toad/jp-issue-41

JasonPierce · web-flow · commit de80a21786e0 · 2024-08-30T15:30:14.000-04:00
Fixes #41: ProfanityOptions wholeWord does not appear to be working as intended
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -31,8 +31,5 @@ jobs:
       - name: Lint
         run: npm run lint
 
-      - name: Build
-        run: npm run build
-
       - name: Unit Tests
         run: npm test
diff --git a/README.md b/README.md
@@ -14,16 +14,17 @@ Install the package
 npm i @2toad/profanity
 ```
 
->If you're using Node 11.x or older, you'll need to install [Profanity 1.x](https://github.com/2Toad/Profanity/releases) (e.g., `npm i @2toad/profanity@1.4.0`)
+>If you're using Node 11.x or older, you'll need to install [Profanity 1.x](https://github.com/2Toad/Profanity/releases) (e.g., `npm i @2toad/profanity@1.4.1`)
 
 ## Usage 📚
 
 ```JavaScript
-import { profanity } from '@2toad/profanity';
+import { profanity, CensorType } from '@2toad/profanity';
 // or
-var profanity = require('@2toad/profanity').profanity;
-
+const { profanity, CensorType } = require('@2toad/profanity');
+```
 
+```JavaScript
 profanity.exists('I like big butts and I cannot lie');
 // true
 
@@ -41,14 +42,13 @@ profanity.censor('I like big butts (aka arses) and I cannot lie', CensorType.Fir
 Create an instance of the Profanity class to change the default options:
 
 ```JavaScript
-import { Profanity, ProfanityOptions } from '@2toad/profanity';
-
-const options = new ProfanityOptions();
-options.wholeWord = false;
-options.grawlix = '*****';
-options.grawlixChar = '$';
+import { Profanity } from '@2toad/profanity';
 
-const profanity = new Profanity(options);
+const profanity = new Profanity({
+    wholeWord: false,
+    grawlix: '*****',
+    grawlixChar: '$',
+});
 ```
 
 ### wholeWord 🔤
@@ -65,6 +65,23 @@ profanity.exists('Arsenic is poisonous but not profane');
 // true (matched on arse)
 ```
 
+#### Compound Words  
+Profanity detection works on parts of compound words, rather than treating hyphenated or underscore-separated words as indivisible.
+
+When `wholeWord` is `true`, each portion of a compound word is analyzed for a match:
+```JavaScript
+profanity.exists("Don't be an arsenic-monster");
+// false
+
+profanity.exists("Don't be an arse-monster");
+// true (matched on arse)
+```
+Setting `wholeWord` to `false`, results in partial word matches on each portion of a compound word:
+```JavaScript
+profanity.exists("Don't be an arsenic-monster");
+// true (matched on arse)
+```
+
 ### grawlix 💥
 
 By default this is set to `@#$%&!`:
diff --git a/package.json b/package.json
@@ -14,12 +14,14 @@
     "clean": "npx rimraf dist",
     "build": "npm run clean && npx tsc",
     "local": "npm run build && concurrently -p \"none\" \"npx tsc --watch\" \"nodemon -q dist/index.js\"",
-    "test": "mocha -r ts-node/register -r mocha-suppress-logs tests/**/*.spec.ts",
+    "pretest": "npm run build",
+    "test": "mocha -r ts-node/register tests/**/*.spec.ts",
+    "test:watch": "npm run test -- --watch",
     "lint": "eslint . --cache",
     "lint:fix": "eslint . --cache --fix",
     "prettier": "prettier --check **/*.ts",
     "prettier:fix": "prettier --write **/*.ts",
-    "prepublishOnly": "npm run lint && npm run prettier && npm run build && npm test",
+    "prepublishOnly": "npm run lint && npm run prettier && npm test",
     "prepare": "husky"
   },
   "repository": {
@@ -29,18 +31,23 @@
   "keywords": [
     "profanity",
     "profane",
-    "obscene",
     "obscenity",
-    "obscenities",
+    "obscene",
     "cussing",
+    "curse",
     "cursing",
     "swearing",
     "swearwords",
-    "vulgar",
+    "swear-words",
     "vulgarity",
-    "bad words",
-    "bad language",
-    "dirty words"
+    "badwords",
+    "bad-words",
+    "badlanguage",
+    "bad-language",
+    "dirtywords",
+    "dirty-words",
+    "censor",
+    "filter"
   ],
   "devDependencies": {
     "@types/chai": "^4.3.19",
diff --git a/src/data/index.ts b/src/data/index.ts
@@ -0,0 +1 @@
+export { profaneWords } from "./profane-words";
diff --git a/src/data/profane-words.ts b/src/data/profane-words.ts
@@ -1,7 +1,7 @@
 // WARNING: this file contains profanity. The below list of profane words is necessary for this tool to function properly.
 // Do not read below this line if you do not wish to be exposed to lots of profane words
 
-export default [
+export const profaneWords: readonly string[] = [
   "4r5e",
   "5h1t",
   "5hit",
diff --git a/src/index.ts b/src/index.ts
@@ -1,3 +1,4 @@
-export * from "./profanity";
-export * from "./profanity-options";
+export { Profanity, profanity } from "./profanity";
+export { ProfanityOptions } from "./profanity-options";
 export { CensorType } from "./models";
+export { profaneWords } from "./data";
diff --git a/src/models/index.ts b/src/models/index.ts
@@ -1,2 +1,2 @@
-export * from "./censor-type";
-export * from "./list";
+export { CensorType } from "./censor-type";
+export { List } from "./list";
diff --git a/src/models/list.ts b/src/models/list.ts
@@ -17,8 +17,8 @@ export class List {
     this.onListChanged();
   }
 
-  addWords(words: string[]): void {
-    this.words = this.words.concat(words);
+  addWords(words: readonly string[] | string[]): void {
+    this.words = this.words.concat(words.map((word: string) => word.toLowerCase()));
     this.onListChanged();
   }
 }
diff --git a/src/profanity-options.ts b/src/profanity-options.ts
@@ -5,9 +5,9 @@ export class ProfanityOptions {
 
   grawlixChar: string;
 
-  constructor() {
-    this.wholeWord = true;
-    this.grawlix = "@#$%&!";
-    this.grawlixChar = "*";
+  constructor(options: Partial<ProfanityOptions> = {}) {
+    this.wholeWord = options.wholeWord ?? true;
+    this.grawlix = options.grawlix ?? "@#$%&!";
+    this.grawlixChar = options.grawlixChar ?? "*";
   }
 }
diff --git a/src/profanity.ts b/src/profanity.ts
@@ -1,7 +1,7 @@
 import { ProfanityOptions } from "./profanity-options";
 import { List, CensorType } from "./models";
-import { escapeRegExp } from "./utils/misc";
-import profaneWords from "./data/profane-words";
+import { escapeRegExp } from "./utils";
+import { profaneWords } from "./data";
 
 export class Profanity {
   options: ProfanityOptions;
@@ -12,67 +12,111 @@ export class Profanity {
 
   private regex: RegExp;
 
-  constructor(options?: ProfanityOptions) {
-    this.options = options || new ProfanityOptions();
-
+  constructor(options?: ProfanityOptions | Partial<ProfanityOptions>) {
+    this.options = options ? { ...new ProfanityOptions(), ...options } : new ProfanityOptions();
     this.whitelist = new List(() => this.buildRegex());
     this.blacklist = new List(() => this.buildRegex());
-
     this.blacklist.addWords(profaneWords);
   }
 
   exists(text: string): boolean {
     this.regex.lastIndex = 0;
-    return this.regex.test(text);
+    const lowercaseText = text.toLowerCase();
+
+    let match: RegExpExecArray | null;
+    do {
+      match = this.regex.exec(lowercaseText);
+      if (match !== null) {
+        const matchStart = match.index;
+        const matchEnd = matchStart + match[0].length;
+
+        // Check if the matched word is part of a whitelisted word
+        const isWhitelisted = this.whitelist.words.some((whitelistedWord) => {
+          const whitelistedIndex = lowercaseText.indexOf(whitelistedWord, Math.max(0, matchStart - whitelistedWord.length + 1));
+          if (whitelistedIndex === -1) return false;
+
+          const whitelistedEnd = whitelistedIndex + whitelistedWord.length;
+
+          if (this.options.wholeWord) {
+            // For whole word matching, ensure the whitelisted word exactly matches the profane word
+            // and is not part of a hyphenated or underscore-separated word
+            return (
+              matchStart === whitelistedIndex &&
+              matchEnd === whitelistedEnd &&
+              (matchStart === 0 || !/[\w-_]/.test(lowercaseText[matchStart - 1])) &&
+              (matchEnd === lowercaseText.length || !/[\w-_]/.test(lowercaseText[matchEnd]))
+            );
+          }
+
+          // For partial matching, check if the profane word is contained within the whitelisted word
+          return (matchStart >= whitelistedIndex && matchStart < whitelistedEnd) || (matchEnd > whitelistedIndex && matchEnd <= whitelistedEnd);
+        });
+
+        if (!isWhitelisted) {
+          return true;
+        }
+      }
+    } while (match !== null);
+
+    return false;
   }
 
   censor(text: string, censorType: CensorType = CensorType.Word): string {
+    const lowercaseText = text.toLowerCase();
+
     switch (censorType) {
       case CensorType.Word:
-        return text.replace(this.regex, this.options.grawlix);
-      case CensorType.FirstChar: {
-        let output = text;
-
-        Array.from(text.matchAll(this.regex)).forEach((match) => {
-          const word = match[0];
-          const grawlix = this.options.grawlixChar + word.slice(1, word.length);
-          output = output.replace(word, grawlix);
+        return text.replace(this.regex, (match) => {
+          const underscore = match.includes("_") ? "_" : "";
+          return this.options.grawlix + underscore;
         });
-        return output;
+      case CensorType.FirstChar: {
+        return this.replaceProfanity(text, lowercaseText, (word) => this.options.grawlixChar + word.slice(1));
       }
       case CensorType.FirstVowel:
       case CensorType.AllVowels: {
-        const regex = new RegExp("[aeiou]", censorType === CensorType.FirstVowel ? "i" : "ig");
-        let output = text;
-        Array.from(text.matchAll(this.regex)).forEach((match) => {
-          const word = match[0];
-          const grawlix = word.replace(regex, this.options.grawlixChar);
-          output = output.replace(word, grawlix);
-        });
-        return output;
+        const vowelRegex = new RegExp("[aeiou]", censorType === CensorType.FirstVowel ? "i" : "ig");
+        return this.replaceProfanity(text, lowercaseText, (word) => word.replace(vowelRegex, this.options.grawlixChar));
       }
       default:
         throw new Error(`Invalid replacement type: "${censorType}"`);
     }
   }
 
+  private replaceProfanity(text: string, lowercaseText: string, replacer: (word: string) => string): string {
+    let result = text;
+    let offset = 0;
+
+    this.regex.lastIndex = 0;
+    let match: RegExpExecArray | null;
+    do {
+      match = this.regex.exec(lowercaseText);
+      if (match !== null) {
+        const matchStart = match.index;
+        const matchEnd = matchStart + match[0].length;
+        const originalWord = text.slice(matchStart + offset, matchEnd + offset);
+        const censoredWord = replacer(originalWord);
+        result = result.slice(0, matchStart + offset) + censoredWord + result.slice(matchEnd + offset);
+        offset += censoredWord.length - originalWord.length;
+      }
+    } while (match !== null);
+
+    return result;
+  }
+
   addWords(words: string[]): void {
     this.blacklist.addWords(words);
   }
 
   removeWords(words: string[]): void {
-    this.blacklist.removeWords(words);
+    this.blacklist.removeWords(words.map((word) => word.toLowerCase()));
   }
 
   private buildRegex(): void {
     const escapedBlacklistWords = this.blacklist.words.map(escapeRegExp);
-    const escapedWhitelistWords = this.whitelist.words.map(escapeRegExp);
-
-    const blacklistPattern = `${this.options.wholeWord ? "\\b" : ""}(${escapedBlacklistWords.join("|")})${this.options.wholeWord ? "\\b" : ""}`;
-    const whitelistPattern = this.whitelist.empty ? "" : `(?!${escapedWhitelistWords.join("|")})`;
-    this.regex = new RegExp(whitelistPattern + blacklistPattern, "ig");
+    const profanityPattern = `${this.options.wholeWord ? "(?:\\b|_)" : ""}(${escapedBlacklistWords.join("|")})${this.options.wholeWord ? "(?:\\b|_)" : ""}`;
+    this.regex = new RegExp(profanityPattern, "gi");
   }
 }
 
 export const profanity = new Profanity();
-export default profanity;
diff --git a/src/utils/index.ts b/src/utils/index.ts
@@ -0,0 +1 @@
+export { escapeRegExp } from "./misc";
diff --git a/tests/import.spec.ts b/tests/import.spec.ts
@@ -0,0 +1,28 @@
+import { expect } from "chai";
+import { profanity, Profanity, CensorType, ProfanityOptions } from "../dist";
+
+describe("ES Module Import", () => {
+  it("should import profanity correctly", () => {
+    expect(profanity).to.be.an.instanceOf(Profanity);
+  });
+
+  it("should import Profanity class correctly", () => {
+    expect(Profanity).to.be.a("function");
+    const instance = new Profanity();
+    expect(instance).to.be.an.instanceOf(Profanity);
+  });
+
+  it("should import CensorType enum correctly", () => {
+    expect(CensorType).to.be.an("object");
+    expect(CensorType.Word).to.exist;
+    expect(CensorType.FirstChar).to.exist;
+    expect(CensorType.FirstVowel).to.exist;
+    expect(CensorType.AllVowels).to.exist;
+  });
+
+  it("should import ProfanityOptions class correctly", () => {
+    expect(ProfanityOptions).to.be.a("function");
+    const options = new ProfanityOptions();
+    expect(options).to.be.an.instanceOf(ProfanityOptions);
+  });
+});
diff --git a/tests/profanity-censor.spec.ts b/tests/profanity-censor.spec.ts
diff --git a/tests/profanity-exists.spec.ts b/tests/profanity-exists.spec.ts
diff --git a/tests/profanity-options.spec.ts b/tests/profanity-options.spec.ts
diff --git a/tests/profanity.spec.ts b/tests/profanity.spec.ts
diff --git a/tests/require.spec.ts b/tests/require.spec.ts
diff --git a/tsconfig.json b/tsconfig.json

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+export { profaneWords } from "./profane-words";`
Original file line number	Diff line number	Diff line change
`@@ -17,8 +17,8 @@ export class List {`
`17`	`17`	`this.onListChanged();`
`18`	`18`	`}`
`19`	`19`
`20`		`- addWords(words: string[]): void {`
`21`		`- this.words = this.words.concat(words);`
	`20`	`+ addWords(words: readonly string[] \| string[]): void {`
	`21`	`+ this.words = this.words.concat(words.map((word: string) => word.toLowerCase()));`
`22`	`22`	`this.onListChanged();`
`23`	`23`	`}`
`24`	`24`	`}`
Original file line number	Diff line number	Diff line change
`@@ -5,9 +5,9 @@ export class ProfanityOptions {`
`5`	`5`
`6`	`6`	`grawlixChar: string;`
`7`	`7`
`8`		`- constructor() {`
`9`		`- this.wholeWord = true;`
`10`		`- this.grawlix = "@#$%&!";`
`11`		`- this.grawlixChar = "*";`
	`8`	`+ constructor(options: Partial<ProfanityOptions> = {}) {`
	`9`	`+ this.wholeWord = options.wholeWord ?? true;`
	`10`	`+ this.grawlix = options.grawlix ?? "@#$%&!";`
	`11`	`+ this.grawlixChar = options.grawlixChar ?? "*";`
`12`	`12`	`}`
`13`	`13`	`}`