|
| 1 | +/*-------------------------------------------------------------------------- |
| 2 | +
|
| 3 | +TypeBox |
| 4 | +
|
| 5 | +The MIT License (MIT) |
| 6 | +
|
| 7 | +Copyright (c) 2017-2026 Haydn Paterson |
| 8 | +
|
| 9 | +Permission is hereby granted, free of charge, to any person obtaining a copy |
| 10 | +of this software and associated documentation files (the "Software"), to deal |
| 11 | +in the Software without restriction, including without limitation the rights |
| 12 | +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 13 | +copies of the Software, and to permit persons to whom the Software is |
| 14 | +furnished to do so, subject to the following conditions: |
| 15 | +
|
| 16 | +The above copyright notice and this permission notice shall be included in |
| 17 | +all copies or substantial portions of the Software. |
| 18 | +
|
| 19 | +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 20 | +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 21 | +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 22 | +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 23 | +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 24 | +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
| 25 | +THE SOFTWARE. |
| 26 | +
|
| 27 | +---------------------------------------------------------------------------*/ |
| 28 | + |
| 29 | +import * as Puny from './_puny.ts' |
| 30 | + |
| 31 | +// ------------------------------------------------------------------ |
| 32 | +// Unicode General Category Helper (RFC 5892) |
| 33 | +// ------------------------------------------------------------------ |
| 34 | +function IsNonspacingMark(cp: number): boolean { |
| 35 | + return /\p{Mn}/u.test(String.fromCodePoint(cp)) |
| 36 | +} |
| 37 | +function IsSpacingCombiningMark(cp: number): boolean { |
| 38 | + return /\p{Mc}/u.test(String.fromCodePoint(cp)) |
| 39 | +} |
| 40 | +function IsEnclosingMark(cp: number): boolean { |
| 41 | + return /\p{Me}/u.test(String.fromCodePoint(cp)) |
| 42 | +} |
| 43 | +function IsCombiningMark(cp: number): boolean { |
| 44 | + return IsNonspacingMark(cp) || IsSpacingCombiningMark(cp) || IsEnclosingMark(cp) |
| 45 | +} |
| 46 | +// ------------------------------------------------------------------ |
| 47 | +// RFC 5892 §2.6 DISALLOWED exceptions |
| 48 | +// |
| 49 | +// https://tools.ietf.org/html/rfc5892#section-2.6 |
| 50 | +// ------------------------------------------------------------------ |
| 51 | +const RFC5892_DISALLOWED = new Set([ |
| 52 | + 0x0640, // ARABIC TATWEEL |
| 53 | + 0x07fa, // NKO LAJANYALAN |
| 54 | + 0x302e, // HANGUL SINGLE DOT TONE MARK |
| 55 | + 0x302f, // HANGUL DOUBLE DOT TONE MARK |
| 56 | + 0x3031, // VERTICAL KANA REPEAT MARK |
| 57 | + 0x3032, // VERTICAL KANA REPEAT WITH VOICED ITERATION MARK |
| 58 | + 0x3033, // VERTICAL KANA REPEAT MARK UPPER HALF |
| 59 | + 0x3034, // VERTICAL KANA REPEAT WITH VOICED ITERATION MARK UPPER HALF |
| 60 | + 0x3035, // VERTICAL KANA REPEAT MARK LOWER HALF |
| 61 | + 0x303b // VERTICAL IDEOGRAPHIC ITERATION MARK |
| 62 | +]) |
| 63 | +// ------------------------------------------------------------------ |
| 64 | +// A set of Virama (halant) code points used to validate CONTEXTJ |
| 65 | +// rules (RFC 5892 Appendix A.1). These characters allow a subsequent |
| 66 | +// Zero Width Joiner (U+200D) to be valid in a label. |
| 67 | +// ------------------------------------------------------------------ |
| 68 | +const VIRAMA_CPS = new Set<number>([ |
| 69 | + 0x094d, |
| 70 | + 0x09cd, |
| 71 | + 0x0a4d, |
| 72 | + 0x0acd, |
| 73 | + 0x0b4d, |
| 74 | + 0x0bcd, |
| 75 | + 0x0c4d, |
| 76 | + 0x0ccd, |
| 77 | + 0x0d3b, |
| 78 | + 0x0d3c, |
| 79 | + 0x0d4d, |
| 80 | + 0x0dca, |
| 81 | + 0x1b44, |
| 82 | + 0x1baa, |
| 83 | + 0x1bab, |
| 84 | + 0xa9c0, |
| 85 | + 0x11046, |
| 86 | + 0x1107f, |
| 87 | + 0x110b9, |
| 88 | + 0x11133, |
| 89 | + 0x11134, |
| 90 | + 0x111c0, |
| 91 | + 0x11235, |
| 92 | + 0x1134d, |
| 93 | + 0x11442, |
| 94 | + 0x114c2, |
| 95 | + 0x115bf, |
| 96 | + 0x1163f, |
| 97 | + 0x116b6, |
| 98 | + 0x11c3f, |
| 99 | + 0x11d44, |
| 100 | + 0x11d45 |
| 101 | +]) |
| 102 | +// ------------------------------------------------------------------ |
| 103 | +// Guards for CONTEXTO rules (RFC 5892 Appendix A) |
| 104 | +// ------------------------------------------------------------------ |
| 105 | +function IsGreek(cp: number): boolean { |
| 106 | + return /\p{Script=Greek}/u.test(String.fromCodePoint(cp)) |
| 107 | +} |
| 108 | +function IsHebrew(cp: number): boolean { |
| 109 | + return /\p{Script=Hebrew}/u.test(String.fromCodePoint(cp)) |
| 110 | +} |
| 111 | +function IsHiragana(cp: number): boolean { |
| 112 | + return /\p{Script=Hiragana}/u.test(String.fromCodePoint(cp)) |
| 113 | +} |
| 114 | +function IsKatakana(cp: number): boolean { |
| 115 | + return /\p{Script=Katakana}/u.test(String.fromCodePoint(cp)) |
| 116 | +} |
| 117 | +function IsHan(cp: number): boolean { |
| 118 | + return /\p{Script=Han}/u.test(String.fromCodePoint(cp)) |
| 119 | +} |
| 120 | +function IsArabicIndicDigit(cp: number): boolean { |
| 121 | + return cp >= 0x0660 && cp <= 0x0669 |
| 122 | +} |
| 123 | +function IsExtendedArabicIndicDigit(cp: number): boolean { |
| 124 | + return cp >= 0x06f0 && cp <= 0x06f9 |
| 125 | +} |
| 126 | +function IsVirama(cp: number): boolean { |
| 127 | + return VIRAMA_CPS.has(cp) |
| 128 | +} |
| 129 | +// ------------------------------------------------------------------ |
| 130 | +// IsUnicodeLabel |
| 131 | +// ------------------------------------------------------------------ |
| 132 | +function IsUnicodeLabel(value: string): boolean { |
| 133 | + if (value.length === 0) return false |
| 134 | + // Use spread to handle surrogate pairs and provide O(1) neighbor access |
| 135 | + const cps = [...value].map((c) => c.codePointAt(0)!) |
| 136 | + const len = cps.length |
| 137 | + // RFC 5891 §4.2.3.2: Hyphen rules |
| 138 | + if (cps[0] === 0x2d || cps[len - 1] === 0x2d) return false |
| 139 | + if (len >= 4 && cps[2] === 0x2d && cps[3] === 0x2d) return false |
| 140 | + // RFC 5891 §4.2.3.2 - Must not begin with a combining mark |
| 141 | + if (IsCombiningMark(cps[0])) return false |
| 142 | + let hasJapanese = false |
| 143 | + let hasArabicIndic = false |
| 144 | + let hasExtendedArabicIndic = false |
| 145 | + for (let i = 0; i < len; i++) { |
| 146 | + const cp = cps[i] |
| 147 | + // 1. DISALLOWED exceptions |
| 148 | + if (RFC5892_DISALLOWED.has(cp)) return false |
| 149 | + // 2. Collect Flags |
| 150 | + if (IsHiragana(cp) || IsKatakana(cp) || IsHan(cp)) hasJapanese = true |
| 151 | + if (IsArabicIndicDigit(cp)) hasArabicIndic = true |
| 152 | + if (IsExtendedArabicIndicDigit(cp)) hasExtendedArabicIndic = true |
| 153 | + // 3. CONTEXTO / CONTEXTJ Neighbor Rules |
| 154 | + const prev = cps[i - 1], next = cps[i + 1] |
| 155 | + switch (cp) { |
| 156 | + case 0x00b7: |
| 157 | + if (prev !== 0x006c || next !== 0x006c) return false |
| 158 | + break // MIDDLE DOT (Catalan) |
| 159 | + case 0x0375: |
| 160 | + if (next === undefined || !IsGreek(next)) return false |
| 161 | + break // Greek KERAIA |
| 162 | + case 0x05f3: |
| 163 | + case 0x05f4: |
| 164 | + if (prev === undefined || !IsHebrew(prev)) return false |
| 165 | + break // Hebrew GERESH |
| 166 | + case 0x200d: |
| 167 | + if (prev === undefined || !IsVirama(prev)) return false |
| 168 | + break // ZWJ |
| 169 | + case 0x30fb: /* Checked at end via hasJapanese */ |
| 170 | + break // KATAKANA MIDDLE DOT |
| 171 | + } |
| 172 | + } |
| 173 | + // 4. Global Context Validations (Post-loop) |
| 174 | + // RFC 5892 Appendix A.7 - Katakana Middle Dot requirement |
| 175 | + if (value.includes('\u30fb') && !hasJapanese) return false |
| 176 | + // RFC 5892 Appendix A.8/A.9 - Mixing Arabic Digits |
| 177 | + if (hasArabicIndic && hasExtendedArabicIndic) return false |
| 178 | + return true |
| 179 | +} |
| 180 | +// ------------------------------------------------------------------ |
| 181 | +// IsAsciiLabel |
| 182 | +// ------------------------------------------------------------------ |
| 183 | +function IsAsciiLabel(value: string): boolean { |
| 184 | + // Must not start or end with a hyphen |
| 185 | + if (value.charCodeAt(0) === 45 || value.charCodeAt(value.length - 1) === 45) return false |
| 186 | + // RFC 5891 §4.2.3.1 : "--" at positions 3-4 is reserved for A-labels only |
| 187 | + if (value.length >= 4 && value.charCodeAt(2) === 45 && value.charCodeAt(3) === 45) return false |
| 188 | + // All characters must be alphanumeric or hyphen |
| 189 | + for (let i = 0; i < value.length; i++) { |
| 190 | + const ch = value.charCodeAt(i) |
| 191 | + if ( |
| 192 | + !( |
| 193 | + (ch >= 97 && ch <= 122) || // a-z |
| 194 | + (ch >= 65 && ch <= 90) || // A-Z |
| 195 | + (ch >= 48 && ch <= 57) || // 0-9 |
| 196 | + ch === 45 // '-' |
| 197 | + ) |
| 198 | + ) return false |
| 199 | + } |
| 200 | + |
| 201 | + return true |
| 202 | +} |
| 203 | +// ------------------------------------------------------------------ |
| 204 | +// IsPunyLabel |
| 205 | +// ------------------------------------------------------------------ |
| 206 | +function IsPuny(value: string): boolean { |
| 207 | + return value.toLowerCase().startsWith('xn--') |
| 208 | +} |
| 209 | +function IsPunyLabel(value: string): boolean { |
| 210 | + try { |
| 211 | + return IsUnicodeLabel(Puny.Decode(value.slice(4))) |
| 212 | + } catch { |
| 213 | + return false // invalid punycode encoding |
| 214 | + } |
| 215 | +} |
| 216 | +// ------------------------------------------------------------------ |
| 217 | +// IsIdnLabel |
| 218 | +// ------------------------------------------------------------------ |
| 219 | +export function IsIdnLabel(value: string): boolean { |
| 220 | + if (value.length === 0 || value.length > 63) return false |
| 221 | + return IsPuny(value) ? IsPunyLabel(value) : IsUnicodeLabel(value) |
| 222 | +} |
| 223 | +// ------------------------------------------------------------------ |
| 224 | +// IsLabel |
| 225 | +// ------------------------------------------------------------------ |
| 226 | +export function IsLabel(value: string): boolean { |
| 227 | + if (value.length === 0 || value.length > 63) return false |
| 228 | + return IsPuny(value) ? IsPunyLabel(value) : IsAsciiLabel(value) |
| 229 | +} |
0 commit comments