Skip to content

Commit 47474cb

Browse files
committed
perf: compile BCP 47 language tag regex once at package init
1 parent 629ec5e commit 47474cb

1 file changed

Lines changed: 27 additions & 25 deletions

File tree

validators/bcp47.go

Lines changed: 27 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,33 @@ import (
1010
"golang.org/x/text/language"
1111
)
1212

13+
// languageTagRe is the compiled BCP 47 language tag regex, built once at
14+
// package init to avoid recompiling on every validation call.
15+
var languageTagRe = regexp.MustCompile(strings.Join([]string{
16+
// group 1:
17+
`^(`,
18+
// irregular
19+
`EN-GB-OED|I-AMI|I-BNN|I-DEFAULT|I-ENOCHIAN|I-HAK|I-KLINGON|I-LUX|I-MINGO|I-NAVAJO|I-PWN|I-TAO|I-TAY|I-TSU|`,
20+
`SGN-BE-FR|SGN-BE-NL|SGN-CH-DE|`,
21+
// regular
22+
`ART-LOJBAN|CEL-GAULISH|NO-BOK|NO-NYN|ZH-GUOYU|ZH-HAKKA|ZH-MIN|ZH-MIN-NAN|ZH-XIANG|`,
23+
// privateuse
24+
`X-[A-Z0-9]{1,8}`,
25+
`)$`,
26+
27+
`|`,
28+
29+
// langtag
30+
`^`,
31+
`((?:[A-Z]{2,3}(?:-[A-Z]{3}){0,3})|[A-Z]{4}|[A-Z]{5,8})`, // group 2: language
32+
`(?:-([A-Z]{4}))?`, // group 3: script
33+
`(?:-([A-Z]{2}|[0-9]{3}))?`, // group 4: region
34+
`(?:-((?:[A-Z0-9]{5,8}|[0-9][A-Z0-9]{3})(?:-(?:[A-Z0-9]{5,8}|[0-9][A-Z0-9]{3}))*))?`, // group 5: variant
35+
`(?:-((?:[A-WYZ0-9](?:-[A-Z0-9]{2,8})+)(?:-(?:[A-WYZ0-9](?:-[A-Z0-9]{2,8})+))*))?`, // group 6: extension
36+
`(?:-X(?:-[A-Z0-9]{1,8})+)?`,
37+
`$`,
38+
}, ""))
39+
1340
// isBCP47StrictLanguageTag validates a BCP 47 language tag according to
1441
// https://www.rfc-editor.org/rfc/bcp/bcp47.txt, rejecting POSIX-style tags
1542
// (e.g. en_GB) and Unicode extensions unlike the built-in bcp47_language_tag.
@@ -27,31 +54,6 @@ func isBCP47StrictLanguageTag(fl validator.FieldLevel) bool {
2754
}
2855

2956
func isValidBCP47StrictLanguageTag(s string) bool {
30-
languageTagRe := regexp.MustCompile(strings.Join([]string{
31-
// group 1:
32-
`^(`,
33-
// irregular
34-
`EN-GB-OED|I-AMI|I-BNN|I-DEFAULT|I-ENOCHIAN|I-HAK|I-KLINGON|I-LUX|I-MINGO|I-NAVAJO|I-PWN|I-TAO|I-TAY|I-TSU|`,
35-
`SGN-BE-FR|SGN-BE-NL|SGN-CH-DE|`,
36-
// regular
37-
`ART-LOJBAN|CEL-GAULISH|NO-BOK|NO-NYN|ZH-GUOYU|ZH-HAKKA|ZH-MIN|ZH-MIN-NAN|ZH-XIANG|`,
38-
// privateuse
39-
`X-[A-Z0-9]{1,8}`,
40-
`)$`,
41-
42-
`|`,
43-
44-
// langtag
45-
`^`,
46-
`((?:[A-Z]{2,3}(?:-[A-Z]{3}){0,3})|[A-Z]{4}|[A-Z]{5,8})`, // group 2: language
47-
`(?:-([A-Z]{4}))?`, // group 3: script
48-
`(?:-([A-Z]{2}|[0-9]{3}))?`, // group 4: region
49-
`(?:-((?:[A-Z0-9]{5,8}|[0-9][A-Z0-9]{3})(?:-(?:[A-Z0-9]{5,8}|[0-9][A-Z0-9]{3}))*))?`, // group 5: variant
50-
`(?:-((?:[A-WYZ0-9](?:-[A-Z0-9]{2,8})+)(?:-(?:[A-WYZ0-9](?:-[A-Z0-9]{2,8})+))*))?`, // group 6: extension
51-
`(?:-X(?:-[A-Z0-9]{1,8})+)?`,
52-
`$`,
53-
}, ""))
54-
5557
languageTag := strings.ToUpper(s)
5658

5759
m := languageTagRe.FindStringSubmatch(languageTag)

0 commit comments

Comments
 (0)