@@ -61,30 +61,45 @@ class CharacterClassEscapeRxGene(
6161 private val nonHorizontalSpaceMultiCharRange = MultiCharacterRange (true , horizontalSpaceSet)
6262 private val nonVerticalSpaceMultiCharRange = MultiCharacterRange (true , verticalSpaceSet)
6363
64- // US-ASCII POSIX character classes (\p{X})
65- private val posixMultiCharRanges = mapOf (
66- " Lower" to listOf (CharacterRange (' a' , ' z' )),
67- " Upper" to listOf (CharacterRange (' A' , ' Z' )),
68- " ASCII" to listOf (CharacterRange (0 , 0x7f )),
69- " Alpha" to asciiLetterSet,
70- " Digit" to digitSet,
71- " Alnum" to digitSet + asciiLetterSet,
72- " Punct" to punctuationSet,
73- " Graph" to digitSet + asciiLetterSet + punctuationSet,
74- " Print" to digitSet + asciiLetterSet + punctuationSet + stringToListOfCharacterRanges(" \u0020 " ),
75- " Blank" to stringToListOfCharacterRanges(" \t " ),
76- " Cntrl" to listOf (CharacterRange (0 , 0x1f )) + stringToListOfCharacterRanges(" \u007f " ),
77- " XDigit" to listOf (CharacterRange (' 0' , ' 9' ), CharacterRange (' a' , ' f' ), CharacterRange (' A' , ' F' )),
78- " Space" to spaceSet,
79- " Pe" to stringToListOfCharacterRanges(" )]}" )
80- ).mapValues { (_, value) -> MultiCharacterRange (false , value) }
64+ private val pEscapesMultiCharRanges: Map <String , MultiCharacterRange > = run {
65+ // US-ASCII POSIX character classes (\p{X})
66+ val posixAsciiSets = mapOf (
67+ " Lower" to listOf (CharacterRange (' a' , ' z' )),
68+ " Upper" to listOf (CharacterRange (' A' , ' Z' )),
69+ " ASCII" to listOf (CharacterRange (0 , 0x7f )),
70+ " Alpha" to asciiLetterSet,
71+ " Digit" to digitSet,
72+ " Alnum" to digitSet + asciiLetterSet,
73+ " Punct" to punctuationSet,
74+ " Graph" to digitSet + asciiLetterSet + punctuationSet,
75+ " Print" to digitSet + asciiLetterSet + punctuationSet + stringToListOfCharacterRanges(" \u0020 " ),
76+ " Blank" to stringToListOfCharacterRanges(" \t " ),
77+ " Cntrl" to listOf (CharacterRange (0 , 0x1f )) + stringToListOfCharacterRanges(" \u007f " ),
78+ " XDigit" to listOf (CharacterRange (' 0' , ' 9' ), CharacterRange (' a' , ' f' ), CharacterRange (' A' , ' F' )),
79+ " Space" to spaceSet,
80+ )
81+
82+ // Unicode category character classes (\p{X})
83+ val unicodeCategorySets = mapOf (
84+ " Pe" to stringToListOfCharacterRanges(" )]}" )
85+ // more Unicode categories will be added here
86+ )
87+
88+ // create both normal and negated version for all
89+ (posixAsciiSets + unicodeCategorySets).flatMap { (key, value) ->
90+ listOf (
91+ key to MultiCharacterRange (false , value),
92+ " ^$key " to MultiCharacterRange (true , value)
93+ )
94+ }.toMap()
95+ }
8196 }
8297
8398 var value: String = " "
8499 var multiCharRange: MultiCharacterRange
85100
86101 init {
87- if (type[0 ] !in " wWdDsSvVhHp " ) {
102+ if (type[0 ] !in " wWdDsSvVhHpP " ) {
88103 throw IllegalArgumentException (" Invalid type: $type " )
89104 }
90105
@@ -99,12 +114,16 @@ class CharacterClassEscapeRxGene(
99114 ' V' -> nonVerticalSpaceMultiCharRange
100115 ' h' -> horizontalSpaceMultiCharRange
101116 ' H' -> nonHorizontalSpaceMultiCharRange
102- ' p' ->
103- if (type.substring(2 , type.length - 1 ) !in posixMultiCharRanges){
104- throw IllegalArgumentException (" $type invalid/unsupported POSIX character class" )
117+ ' p' , ' P' -> {
118+ val pLabel = type.substring(2 , type.length - 1 )
119+ val negated = type[0 ].isUpperCase()
120+ val lookupKey = if (negated) " ^$pLabel " else pLabel
121+ if (lookupKey !in pEscapesMultiCharRanges) {
122+ throw IllegalArgumentException (" $type invalid/unsupported \\ p escape character class" )
105123 } else {
106- posixMultiCharRanges[type.substring( 2 , type.length - 1 ) ]!!
124+ pEscapesMultiCharRanges[lookupKey ]!!
107125 }
126+ }
108127 else -> // this should never happen due to check in init
109128 throw IllegalStateException (" Type '\\ $type ' not supported yet" )
110129 }
0 commit comments