@@ -10,6 +10,33 @@ import (
1010 "golang.org/x/text/language"
1111)
1212
13+ // languageTagRe is the compiled BCP 47 language tag regex, built once at
14+ // package init to avoid recompiling on every validation call.
15+ var languageTagRe = regexp .MustCompile (strings .Join ([]string {
16+ // group 1:
17+ `^(` ,
18+ // irregular
19+ `EN-GB-OED|I-AMI|I-BNN|I-DEFAULT|I-ENOCHIAN|I-HAK|I-KLINGON|I-LUX|I-MINGO|I-NAVAJO|I-PWN|I-TAO|I-TAY|I-TSU|` ,
20+ `SGN-BE-FR|SGN-BE-NL|SGN-CH-DE|` ,
21+ // regular
22+ `ART-LOJBAN|CEL-GAULISH|NO-BOK|NO-NYN|ZH-GUOYU|ZH-HAKKA|ZH-MIN|ZH-MIN-NAN|ZH-XIANG|` ,
23+ // privateuse
24+ `X-[A-Z0-9]{1,8}` ,
25+ `)$` ,
26+
27+ `|` ,
28+
29+ // langtag
30+ `^` ,
31+ `((?:[A-Z]{2,3}(?:-[A-Z]{3}){0,3})|[A-Z]{4}|[A-Z]{5,8})` , // group 2: language
32+ `(?:-([A-Z]{4}))?` , // group 3: script
33+ `(?:-([A-Z]{2}|[0-9]{3}))?` , // group 4: region
34+ `(?:-((?:[A-Z0-9]{5,8}|[0-9][A-Z0-9]{3})(?:-(?:[A-Z0-9]{5,8}|[0-9][A-Z0-9]{3}))*))?` , // group 5: variant
35+ `(?:-((?:[A-WYZ0-9](?:-[A-Z0-9]{2,8})+)(?:-(?:[A-WYZ0-9](?:-[A-Z0-9]{2,8})+))*))?` , // group 6: extension
36+ `(?:-X(?:-[A-Z0-9]{1,8})+)?` ,
37+ `$` ,
38+ }, "" ))
39+
1340// isBCP47StrictLanguageTag validates a BCP 47 language tag according to
1441// https://www.rfc-editor.org/rfc/bcp/bcp47.txt, rejecting POSIX-style tags
1542// (e.g. en_GB) and Unicode extensions unlike the built-in bcp47_language_tag.
@@ -27,31 +54,6 @@ func isBCP47StrictLanguageTag(fl validator.FieldLevel) bool {
2754}
2855
2956func isValidBCP47StrictLanguageTag (s string ) bool {
30- languageTagRe := regexp .MustCompile (strings .Join ([]string {
31- // group 1:
32- `^(` ,
33- // irregular
34- `EN-GB-OED|I-AMI|I-BNN|I-DEFAULT|I-ENOCHIAN|I-HAK|I-KLINGON|I-LUX|I-MINGO|I-NAVAJO|I-PWN|I-TAO|I-TAY|I-TSU|` ,
35- `SGN-BE-FR|SGN-BE-NL|SGN-CH-DE|` ,
36- // regular
37- `ART-LOJBAN|CEL-GAULISH|NO-BOK|NO-NYN|ZH-GUOYU|ZH-HAKKA|ZH-MIN|ZH-MIN-NAN|ZH-XIANG|` ,
38- // privateuse
39- `X-[A-Z0-9]{1,8}` ,
40- `)$` ,
41-
42- `|` ,
43-
44- // langtag
45- `^` ,
46- `((?:[A-Z]{2,3}(?:-[A-Z]{3}){0,3})|[A-Z]{4}|[A-Z]{5,8})` , // group 2: language
47- `(?:-([A-Z]{4}))?` , // group 3: script
48- `(?:-([A-Z]{2}|[0-9]{3}))?` , // group 4: region
49- `(?:-((?:[A-Z0-9]{5,8}|[0-9][A-Z0-9]{3})(?:-(?:[A-Z0-9]{5,8}|[0-9][A-Z0-9]{3}))*))?` , // group 5: variant
50- `(?:-((?:[A-WYZ0-9](?:-[A-Z0-9]{2,8})+)(?:-(?:[A-WYZ0-9](?:-[A-Z0-9]{2,8})+))*))?` , // group 6: extension
51- `(?:-X(?:-[A-Z0-9]{1,8})+)?` ,
52- `$` ,
53- }, "" ))
54-
5557 languageTag := strings .ToUpper (s )
5658
5759 m := languageTagRe .FindStringSubmatch (languageTag )
0 commit comments