11import { ProfanityOptions } from "./profanity-options" ;
22import { List , CensorType } from "./models" ;
3- import { escapeRegExp } from "./utils/misc " ;
4- import profaneWords from "./data/profane-words " ;
3+ import { escapeRegExp } from "./utils" ;
4+ import { profaneWords } from "./data" ;
55
66export class Profanity {
77 options : ProfanityOptions ;
@@ -12,67 +12,111 @@ export class Profanity {
1212
1313 private regex : RegExp ;
1414
15- constructor ( options ?: ProfanityOptions ) {
16- this . options = options || new ProfanityOptions ( ) ;
17-
15+ constructor ( options ?: ProfanityOptions | Partial < ProfanityOptions > ) {
16+ this . options = options ? { ...new ProfanityOptions ( ) , ...options } : new ProfanityOptions ( ) ;
1817 this . whitelist = new List ( ( ) => this . buildRegex ( ) ) ;
1918 this . blacklist = new List ( ( ) => this . buildRegex ( ) ) ;
20-
2119 this . blacklist . addWords ( profaneWords ) ;
2220 }
2321
2422 exists ( text : string ) : boolean {
2523 this . regex . lastIndex = 0 ;
26- return this . regex . test ( text ) ;
24+ const lowercaseText = text . toLowerCase ( ) ;
25+
26+ let match : RegExpExecArray | null ;
27+ do {
28+ match = this . regex . exec ( lowercaseText ) ;
29+ if ( match !== null ) {
30+ const matchStart = match . index ;
31+ const matchEnd = matchStart + match [ 0 ] . length ;
32+
33+ // Check if the matched word is part of a whitelisted word
34+ const isWhitelisted = this . whitelist . words . some ( ( whitelistedWord ) => {
35+ const whitelistedIndex = lowercaseText . indexOf ( whitelistedWord , Math . max ( 0 , matchStart - whitelistedWord . length + 1 ) ) ;
36+ if ( whitelistedIndex === - 1 ) return false ;
37+
38+ const whitelistedEnd = whitelistedIndex + whitelistedWord . length ;
39+
40+ if ( this . options . wholeWord ) {
41+ // For whole word matching, ensure the whitelisted word exactly matches the profane word
42+ // and is not part of a hyphenated or underscore-separated word
43+ return (
44+ matchStart === whitelistedIndex &&
45+ matchEnd === whitelistedEnd &&
46+ ( matchStart === 0 || ! / [ \w - _ ] / . test ( lowercaseText [ matchStart - 1 ] ) ) &&
47+ ( matchEnd === lowercaseText . length || ! / [ \w - _ ] / . test ( lowercaseText [ matchEnd ] ) )
48+ ) ;
49+ }
50+
51+ // For partial matching, check if the profane word is contained within the whitelisted word
52+ return ( matchStart >= whitelistedIndex && matchStart < whitelistedEnd ) || ( matchEnd > whitelistedIndex && matchEnd <= whitelistedEnd ) ;
53+ } ) ;
54+
55+ if ( ! isWhitelisted ) {
56+ return true ;
57+ }
58+ }
59+ } while ( match !== null ) ;
60+
61+ return false ;
2762 }
2863
2964 censor ( text : string , censorType : CensorType = CensorType . Word ) : string {
65+ const lowercaseText = text . toLowerCase ( ) ;
66+
3067 switch ( censorType ) {
3168 case CensorType . Word :
32- return text . replace ( this . regex , this . options . grawlix ) ;
33- case CensorType . FirstChar : {
34- let output = text ;
35-
36- Array . from ( text . matchAll ( this . regex ) ) . forEach ( ( match ) => {
37- const word = match [ 0 ] ;
38- const grawlix = this . options . grawlixChar + word . slice ( 1 , word . length ) ;
39- output = output . replace ( word , grawlix ) ;
69+ return text . replace ( this . regex , ( match ) => {
70+ const underscore = match . includes ( "_" ) ? "_" : "" ;
71+ return this . options . grawlix + underscore ;
4072 } ) ;
41- return output ;
73+ case CensorType . FirstChar : {
74+ return this . replaceProfanity ( text , lowercaseText , ( word ) => this . options . grawlixChar + word . slice ( 1 ) ) ;
4275 }
4376 case CensorType . FirstVowel :
4477 case CensorType . AllVowels : {
45- const regex = new RegExp ( "[aeiou]" , censorType === CensorType . FirstVowel ? "i" : "ig" ) ;
46- let output = text ;
47- Array . from ( text . matchAll ( this . regex ) ) . forEach ( ( match ) => {
48- const word = match [ 0 ] ;
49- const grawlix = word . replace ( regex , this . options . grawlixChar ) ;
50- output = output . replace ( word , grawlix ) ;
51- } ) ;
52- return output ;
78+ const vowelRegex = new RegExp ( "[aeiou]" , censorType === CensorType . FirstVowel ? "i" : "ig" ) ;
79+ return this . replaceProfanity ( text , lowercaseText , ( word ) => word . replace ( vowelRegex , this . options . grawlixChar ) ) ;
5380 }
5481 default :
5582 throw new Error ( `Invalid replacement type: "${ censorType } "` ) ;
5683 }
5784 }
5885
86+ private replaceProfanity ( text : string , lowercaseText : string , replacer : ( word : string ) => string ) : string {
87+ let result = text ;
88+ let offset = 0 ;
89+
90+ this . regex . lastIndex = 0 ;
91+ let match : RegExpExecArray | null ;
92+ do {
93+ match = this . regex . exec ( lowercaseText ) ;
94+ if ( match !== null ) {
95+ const matchStart = match . index ;
96+ const matchEnd = matchStart + match [ 0 ] . length ;
97+ const originalWord = text . slice ( matchStart + offset , matchEnd + offset ) ;
98+ const censoredWord = replacer ( originalWord ) ;
99+ result = result . slice ( 0 , matchStart + offset ) + censoredWord + result . slice ( matchEnd + offset ) ;
100+ offset += censoredWord . length - originalWord . length ;
101+ }
102+ } while ( match !== null ) ;
103+
104+ return result ;
105+ }
106+
59107 addWords ( words : string [ ] ) : void {
60108 this . blacklist . addWords ( words ) ;
61109 }
62110
63111 removeWords ( words : string [ ] ) : void {
64- this . blacklist . removeWords ( words ) ;
112+ this . blacklist . removeWords ( words . map ( ( word ) => word . toLowerCase ( ) ) ) ;
65113 }
66114
67115 private buildRegex ( ) : void {
68116 const escapedBlacklistWords = this . blacklist . words . map ( escapeRegExp ) ;
69- const escapedWhitelistWords = this . whitelist . words . map ( escapeRegExp ) ;
70-
71- const blacklistPattern = `${ this . options . wholeWord ? "\\b" : "" } (${ escapedBlacklistWords . join ( "|" ) } )${ this . options . wholeWord ? "\\b" : "" } ` ;
72- const whitelistPattern = this . whitelist . empty ? "" : `(?!${ escapedWhitelistWords . join ( "|" ) } )` ;
73- this . regex = new RegExp ( whitelistPattern + blacklistPattern , "ig" ) ;
117+ const profanityPattern = `${ this . options . wholeWord ? "(?:\\b|_)" : "" } (${ escapedBlacklistWords . join ( "|" ) } )${ this . options . wholeWord ? "(?:\\b|_)" : "" } ` ;
118+ this . regex = new RegExp ( profanityPattern , "gi" ) ;
74119 }
75120}
76121
77122export const profanity = new Profanity ( ) ;
78- export default profanity ;
0 commit comments