@@ -3,6 +3,11 @@ import { List, CensorType } from "./models";
33import { escapeRegExp } from "./utils" ;
44import { profaneWords } from "./data" ;
55
6+ const FIRST_VOWEL_RE = / [ a e i o u ] / i;
7+ const ALL_VOWELS_RE = / [ a e i o u ] / gi;
8+ const UNICODE_WORD_CHAR_RE = / [ \p{ L} \p{ N} \p{ M} _ - ] / u;
9+ const ASCII_WORD_CHAR_RE = / [ \w - _ ] / ;
10+
611export class Profanity {
712 options : ProfanityOptions ;
813 whitelist : List ;
@@ -84,7 +89,7 @@ export class Profanity {
8489 return this . options . grawlixChar + word . slice ( 1 ) ;
8590 case CensorType . FirstVowel :
8691 case CensorType . AllVowels : {
87- const vowelRegex = new RegExp ( "[aeiou]" , censorType === CensorType . FirstVowel ? "i" : "ig" ) ;
92+ const vowelRegex = censorType === CensorType . FirstVowel ? FIRST_VOWEL_RE : ALL_VOWELS_RE ;
8893 return word . replace ( vowelRegex , this . options . grawlixChar ) ;
8994 }
9095 default :
@@ -153,32 +158,31 @@ export class Profanity {
153158 * @returns True if the match is whitelisted, false otherwise.
154159 */
155160 private isWhitelisted ( matchStart : number , matchEnd : number , text : string ) : boolean {
161+ const wholeWord = this . options . wholeWord ;
162+ const wordCharRe = wholeWord ? ( this . options . unicodeWordBoundaries ? UNICODE_WORD_CHAR_RE : ASCII_WORD_CHAR_RE ) : null ;
163+
156164 for ( const whitelistedWord of this . whitelist . words ) {
157165 const whitelistedIndex = text . indexOf ( whitelistedWord , Math . max ( 0 , matchStart - whitelistedWord . length + 1 ) ) ;
158- if ( whitelistedIndex !== - 1 ) {
159- const whitelistedEnd = whitelistedIndex + whitelistedWord . length ;
160-
161- if ( this . options . wholeWord ) {
162- const isWordChar = ( ch : string | undefined ) => {
163- if ( ! ch ) return false ;
164- return this . options . unicodeWordBoundaries ? / [ \p{ L} \p{ N} \p{ M} _ - ] / u. test ( ch ) : / [ \w - _ ] / . test ( ch ) ;
165- } ;
166- if (
167- matchStart === whitelistedIndex &&
168- matchEnd === whitelistedEnd &&
169- ( matchStart === 0 || ! isWordChar ( text . charAt ( matchStart - 1 ) ) ) &&
170- ( matchEnd === text . length || ! isWordChar ( text . charAt ( matchEnd ) ) )
171- ) {
172- return true ;
173- }
174- } else {
175- if (
176- ( matchStart >= whitelistedIndex && matchStart < whitelistedEnd ) ||
177- ( matchEnd > whitelistedIndex && matchEnd <= whitelistedEnd ) ||
178- ( whitelistedIndex >= matchStart && whitelistedEnd <= matchEnd )
179- ) {
180- return true ;
181- }
166+ if ( whitelistedIndex === - 1 ) continue ;
167+
168+ const whitelistedEnd = whitelistedIndex + whitelistedWord . length ;
169+
170+ if ( wordCharRe ) {
171+ if (
172+ matchStart === whitelistedIndex &&
173+ matchEnd === whitelistedEnd &&
174+ ( matchStart === 0 || ! wordCharRe . test ( text . charAt ( matchStart - 1 ) ) ) &&
175+ ( matchEnd === text . length || ! wordCharRe . test ( text . charAt ( matchEnd ) ) )
176+ ) {
177+ return true ;
178+ }
179+ } else {
180+ if (
181+ ( matchStart >= whitelistedIndex && matchStart < whitelistedEnd ) ||
182+ ( matchEnd > whitelistedIndex && matchEnd <= whitelistedEnd ) ||
183+ ( whitelistedIndex >= matchStart && whitelistedEnd <= matchEnd )
184+ ) {
185+ return true ;
182186 }
183187 }
184188 }
@@ -199,20 +203,22 @@ export class Profanity {
199203 replacer : ( word : string , start : number , end : number ) => string ,
200204 regex : RegExp ,
201205 ) : string {
202- let result = text ;
203- let offset = 0 ;
206+ const parts : string [ ] = [ ] ;
207+ let lastIndex = 0 ;
204208
205209 let match : RegExpExecArray | null ;
206210 while ( ( match = regex . exec ( lowercaseText ) ) !== null ) {
207211 const matchStart = match . index ;
208212 const matchEnd = matchStart + match [ 0 ] . length ;
209- const originalWord = text . slice ( matchStart + offset , matchEnd + offset ) ;
213+ const originalWord = text . slice ( matchStart , matchEnd ) ;
210214 const censoredWord = replacer ( originalWord , matchStart , matchEnd ) ;
211- result = result . slice ( 0 , matchStart + offset ) + censoredWord + result . slice ( matchEnd + offset ) ;
212- offset += censoredWord . length - originalWord . length ;
215+ parts . push ( text . slice ( lastIndex , matchStart ) , censoredWord ) ;
216+ lastIndex = matchEnd ;
213217 }
214218
215- return result ;
219+ if ( lastIndex === 0 ) return text ;
220+ parts . push ( text . slice ( lastIndex ) ) ;
221+ return parts . join ( "" ) ;
216222 }
217223
218224 /**
@@ -237,20 +243,35 @@ export class Profanity {
237243 throw new Error ( "At least one language must be provided" ) ;
238244 }
239245
240- const uniqueLanguages = [ ...new Set ( languages . map ( ( language ) => language . trim ( ) . toLowerCase ( ) ) ) ] ;
241-
242- const regexKey = uniqueLanguages . toSorted ( ) . join ( "," ) ;
243- if ( this . regexes . has ( regexKey ) ) {
244- return this . regexes . get ( regexKey ) ! ;
246+ const seen = new Set < string > ( ) ;
247+ const uniqueLanguages : string [ ] = [ ] ;
248+ for ( const language of languages ) {
249+ const normalized = language . trim ( ) . toLowerCase ( ) ;
250+ if ( ! seen . has ( normalized ) ) {
251+ seen . add ( normalized ) ;
252+ uniqueLanguages . push ( normalized ) ;
253+ }
245254 }
246255
247- const allWords = uniqueLanguages . flatMap ( ( language ) => {
256+ uniqueLanguages . sort ( ) ;
257+ const regexKey = uniqueLanguages . join ( "," ) ;
258+
259+ const cached = this . regexes . get ( regexKey ) ;
260+ if ( cached ) return cached ;
261+
262+ const removedWords = this . removed . words ;
263+ const allWords : string [ ] = [ ] ;
264+ for ( const language of uniqueLanguages ) {
248265 const words = profaneWords . get ( language ) ;
249266 if ( ! words ) {
250267 throw new Error ( `Invalid language: "${ language } "` ) ;
251268 }
252- return words . filter ( ( word ) => ! this . removed . words . has ( word ) ) ;
253- } ) ;
269+ for ( const word of words ) {
270+ if ( ! removedWords . has ( word ) ) {
271+ allWords . push ( word ) ;
272+ }
273+ }
274+ }
254275
255276 const regex = this . buildRegex ( allWords ) ;
256277 this . regexes . set ( regexKey , regex ) ;
0 commit comments