Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions validators/bcp47.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,12 @@ func isValidBCP47StrictLanguageTag(s string) bool {
return false
}
case n == 4:
return false
// 4-alpha primary subtags are reserved for future use per RFC 5646 §2.1;
// accept them syntactically even though none are currently assigned.
default:
return false
// 5-8 alpha: registered language subtag per RFC 5646 §2.1.
// golang.org/x/text/language does not cover these, so syntactic
// validation by the regex above is sufficient.
}

if script != "" {
Expand Down
94 changes: 94 additions & 0 deletions validators/bcp47_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
package validators

import (
"testing"
)

func TestIsValidBCP47StrictLanguageTag(t *testing.T) {
tests := []struct {
tag string
valid bool
}{
// 2-char primary subtags (ISO 639-1)
{"en", true},
{"it", true},
{"fr", true},
{"de", true},

// 3-char primary subtags with no ISO 639-1 2-char equivalent —
// these normalize to themselves in golang.org/x/text/language.
// 3-char codes that do have a 2-char form (e.g. "eng"→"en") are
// rejected because the validator requires the canonical form.
{"sgn", true},
{"tlh", true},
{"jbo", true},

// 5-8 char primary subtags (RFC 5646 §2.1 registered language subtag).
// Were incorrectly rejected before this fix.
{"abcde", true}, // 5 chars
{"abcdefg", true}, // 7 chars
{"abcdefgh", true}, // 8 chars

// 4-char primary subtag (reserved for future use per RFC 5646 §2.1)
{"abcd", true},

// With region subtag
{"en-US", true},
{"it-IT", true},
{"en-GB", true},
{"zh-CN", true},

// With script subtag
{"zh-Hant", true},
{"zh-Hans", true},
{"sr-Latn", true},

// With script and region
{"zh-Hant-TW", true},
{"sr-Latn-RS", true},

// With extlang subtag
{"zh-cmn", true},

// Grandfathered irregular tags
{"i-ami", true},
{"i-bnn", true},
{"art-lojban", true},
{"zh-min", true},

// Private use
{"x-private", true},
{"x-12345678", true},

// Empty string
{"", false},

// POSIX-style (underscore separator)
{"en_US", false},
{"en_GB", false},

// Primary subtag too long (> 8 chars)
{"abcdefghi", false},

// Digits in primary subtag position
{"1234", false},

// 3-char code with a 2-char canonical form: requires canonical "en"
{"eng", false},

// Unknown extlang subtag
{"en-xyz", false},

// Invalid region
{"en-ZZZ", false},
}

for _, tt := range tests {
t.Run(tt.tag, func(t *testing.T) {
got := isValidBCP47StrictLanguageTag(tt.tag)
if got != tt.valid {
t.Errorf("isValidBCP47StrictLanguageTag(%q) = %v, want %v", tt.tag, got, tt.valid)
}
})
}
}