Skip to content

Commit 3726d4b

Browse files
committed
fix wrong auto-detection of country by language
When locale contains only language, but not country, the phone generator tries to guess the country. For few specific languages, the guess was wrong. It happened for languages which code occasionally matches some other country's code. ``` new Faker(new Language("am").phoneNumber(); // generated Armenian phone instead of Ethiopian new Faker(new Language("ar").phoneNumber(); // generated Argentina phone instead of Saudi Arabia ``` etc. Inspired by #1788
1 parent 4117c61 commit 3726d4b

File tree

2 files changed

+66
-11
lines changed

2 files changed

+66
-11
lines changed

src/main/java/net/datafaker/providers/base/PhoneNumber.java

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -80,29 +80,48 @@ private static String countryCodeIso2(Locale locale) {
8080
/**
8181
* A hack to detect country when only a language is given.
8282
* <p>
83-
* It's not correct because most languages are used in multiple countries.
84-
* If users need to generate random phone number, they should create locale with country,
85-
* e.g. {@code new Locale("ta_IN")}, and not just {@code new Locale("ta")}.
83+
* It's not correct because most languages are used in multiple countries.
84+
* If users need to generate random phone number, they should create locale with country,
85+
* e.g. {@code new Locale("ta_IN")}, and not just {@code new Locale("ta")}.
8686
* </p>
8787
* <p>
88-
* We keep this mapping here just for backward compatibility.
88+
* We keep this mapping here just for backward compatibility.
8989
* </p>
9090
*/
9191
private static String detectCountryByLanguage(String language) {
9292
return switch (language) {
93+
case "af" -> "ZA"; // Afrikaans language -> South Africa
94+
case "ar" -> "SA"; // Arabic language -> Saudi Arabia (SA)
95+
case "am" -> "ET"; // Amharic language -> Ethiopia (ET)
96+
case "be" -> "BY"; // Belarus
97+
case "bn" -> "BD"; // Bengali language -> Bangladesh (BD)
98+
case "bs" -> "BA"; // Bosnian language -> Bosnia & Herzegovina (BA)
99+
case "ca" -> "ES"; // Catalan language -> Spain (ES)
100+
case "cy" -> "GB"; // Welsh language -> United Kingdom (GB)
101+
case "cs" -> "CZ"; // Czech Republic
102+
case "el" -> "GR"; // Greece
103+
case "et" -> "EE"; // Estonian language -> Estonia (EE)
93104
case "en" -> "US"; // it has been used by default for English
94-
case "test" -> "US"; // What the hell is "test" language?
105+
case "eu" -> "ES"; // Basque (Basque Country | Spain)
106+
case "fa" -> "IR"; // Persian language (Farsi) -> Iran (IR)
107+
case "ga" -> "IE"; // Irish/Gaelic language -> Ireland (IE)
108+
case "gl" -> "ES"; // Galician (Spain)
109+
case "he" -> "IL"; // Israel
110+
case "hi" -> "IN"; // Hindi language -> India
95111
case "hy" -> "AM"; // Armenia
96-
case "uk" -> "UA"; // Ukraine
97112
case "ja" -> "JP"; // Japan
98-
case "fa" -> "IR"; // Iran
99113
case "ka" -> "GE"; // Georgia
100-
case "sq" -> "AL"; // Albania
101-
case "cs" -> "CZ"; // Czech Republic
102-
case "be" -> "BY"; // Belarus
114+
case "km" -> "KH"; // Khmer language -> Cambodia (KH)
103115
case "ko" -> "KR"; // Korea
104-
case "he" -> "IL"; // Israel
116+
case "mo" -> "MD"; // Moldavian language -> Moldova
117+
case "sq" -> "AL"; // Albania
118+
case "sw" -> "TZ"; // Swahili language -> Tanzania (TZ)
119+
case "ug" -> "CN"; // Uyghur language -> China (CN)
120+
case "ur" -> "PK"; // Urdu language -> Pakistan (PK)
105121
case "ta" -> "IN"; // Tamil language -> India (though, Tamil is used in multiple countries)
122+
case "test" -> "US"; // What the hell is "test" language?
123+
case "uk" -> "UA"; // Ukraine
124+
case "zh" -> "CN"; // Chinese language -> China (CN)
106125
default -> language.toUpperCase(ROOT);
107126
};
108127
}

src/test/java/net/datafaker/providers/base/PhoneNumberValidityFinderTest.java

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import org.junit.jupiter.api.TestInstance;
1010
import org.junit.jupiter.params.ParameterizedTest;
1111
import org.junit.jupiter.params.provider.Arguments;
12+
import org.junit.jupiter.params.provider.CsvSource;
1213
import org.junit.jupiter.params.provider.MethodSource;
1314

1415
import java.util.Locale;
@@ -53,6 +54,41 @@ void testLanguageOnlyPersianLocaleUsesIran() {
5354
assertThat(localFaker.phoneNumber().countryCodeIso2()).isEqualTo("IR");
5455
}
5556

57+
@ParameterizedTest
58+
@CsvSource({
59+
"en, US",
60+
"hy, AM", // Armenian
61+
"uk, UA", // Ukranian
62+
"ja, JP", // Japanese
63+
"fa, IR", // fa=Persian (Farsi) primarly used in Iran
64+
"ka, GE", // Georgian
65+
"sq, AL", // Albanian
66+
"cs, CZ", // Czech
67+
"be, BY", // Belarusian
68+
"he, IL", // he = Hebrew - a Semitic language used by the ancient Hebrews and in modern Israel
69+
"ta, IN", // Tamil
70+
"et, EE", // Estonian
71+
"el, GR", // Greek → Greece (el ≠ gr)
72+
"eu, ES", // Basque → Spain (eu ≠ es; Basque Country)
73+
"ca, ES", // Catalan → Spain (ca is also Canada's TLD, but lang→ES)
74+
"cy, GB", // Welsh → United Kingdom
75+
"ga, IE", // Irish/Gaelic → Ireland
76+
"is, IS", // Icelandic → Iceland (happens to match here — keep for completeness)
77+
"bs, BA", // Bosnian → Bosnia & Herzegovina
78+
"ar, SA", // Arabic → Saudi Arabia (ar ≠ sa; Arabic used across 20+ countries)
79+
"hi, IN", // Hindi → India (hi ≠ in)
80+
"zh, CN", // Chinese → China (zh ≠ cn)
81+
"am, ET", // Amharic → Ethiopia (am ≠ et; am is Armenia's country code!)
82+
"sw, TZ", // Swahili → Tanzania (sw ≠ tz; also widely used in KE, UG)
83+
"af, ZA", // Afrikaans → South Africa (af ≠ za; af is Afghanistan's code!)
84+
85+
})
86+
void detectsCountryByLanguage(String language, String expectedCountry) {
87+
BaseFaker localFaker = new BaseFaker(new Locale(language));
88+
89+
assertThat(localFaker.phoneNumber().countryCodeIso2()).isEqualTo(expectedCountry);
90+
}
91+
5692
@ParameterizedTest
5793
@MethodSource("allSupportedLocales")
5894
void testAllPhoneNumbers(Locale supportedLocale) throws NumberParseException {

0 commit comments

Comments
 (0)