Skip to content

Commit 8b576e6

Browse files
committed
fix wrong auto-detection of country by language
When locale contains only language, but not country, the phone generator tries to guess the country. For few specific languages, the guess was wrong. It happened for languages which code occasionally matches some other country's code. ``` new Faker(new Language("am").phoneNumber(); // generated Armenian phone instead of Ethiopian new Faker(new Language("ar").phoneNumber(); // generated Argentina phone instead of Saudi Arabia ``` etc. Inspired by #1788
1 parent 4117c61 commit 8b576e6

2 files changed

Lines changed: 67 additions & 11 deletions

File tree

src/main/java/net/datafaker/providers/base/PhoneNumber.java

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -80,29 +80,48 @@ private static String countryCodeIso2(Locale locale) {
8080
/**
8181
* A hack to detect country when only a language is given.
8282
* <p>
83-
* It's not correct because most languages are used in multiple countries.
84-
* If users need to generate random phone number, they should create locale with country,
85-
* e.g. {@code new Locale("ta_IN")}, and not just {@code new Locale("ta")}.
83+
* It's not correct because most languages are used in multiple countries.
84+
* If users need to generate random phone number, they should create locale with country,
85+
* e.g. {@code new Locale("ta_IN")}, and not just {@code new Locale("ta")}.
8686
* </p>
8787
* <p>
88-
* We keep this mapping here just for backward compatibility.
88+
* We keep this mapping here just for backward compatibility.
8989
* </p>
9090
*/
9191
private static String detectCountryByLanguage(String language) {
9292
return switch (language) {
93+
case "af" -> "ZA"; // Afrikaans language -> South Africa
94+
case "ar" -> "SA"; // Arabic language -> Saudi Arabia (SA)
95+
case "am" -> "ET"; // Amharic language -> Ethiopia (ET)
96+
case "be" -> "BY"; // Belarus
97+
case "bn" -> "BD"; // Bengali language -> Bangladesh (BD)
98+
case "bs" -> "BA"; // Bosnian language -> Bosnia & Herzegovina (BA)
99+
case "ca" -> "ES"; // Catalan language -> Spain (ES)
100+
case "cy" -> "GB"; // Welsh language -> United Kingdom (GB)
101+
case "cs" -> "CZ"; // Czech Republic
102+
case "el" -> "GR"; // Greece
103+
case "et" -> "EE"; // Estonian language -> Estonia (EE)
93104
case "en" -> "US"; // it has been used by default for English
94-
case "test" -> "US"; // What the hell is "test" language?
105+
case "eu" -> "ES"; // Basque (Basque Country | Spain)
106+
case "fa" -> "IR"; // Persian language (Farsi) -> Iran (IR)
107+
case "ga" -> "IE"; // Irish/Gaelic language -> Ireland (IE)
108+
case "gl" -> "ES"; // Galician (Spain)
109+
case "he" -> "IL"; // Israel
110+
case "hi" -> "IN"; // Hindi language -> India
95111
case "hy" -> "AM"; // Armenia
96-
case "uk" -> "UA"; // Ukraine
97112
case "ja" -> "JP"; // Japan
98-
case "fa" -> "IR"; // Iran
99113
case "ka" -> "GE"; // Georgia
100-
case "sq" -> "AL"; // Albania
101-
case "cs" -> "CZ"; // Czech Republic
102-
case "be" -> "BY"; // Belarus
114+
case "km" -> "KH"; // Khmer language -> Cambodia (KH)
103115
case "ko" -> "KR"; // Korea
104-
case "he" -> "IL"; // Israel
116+
case "mo" -> "MD"; // Moldavian language -> Moldova
117+
case "sq" -> "AL"; // Albania
118+
case "sw" -> "TZ"; // Swahili language -> Tanzania (TZ)
119+
case "ug" -> "CN"; // Uyghur language -> China (CN)
120+
case "ur" -> "PK"; // Urdu language -> Pakistan (PK)
105121
case "ta" -> "IN"; // Tamil language -> India (though, Tamil is used in multiple countries)
122+
case "test" -> "US"; // What the hell is "test" language?
123+
case "uk" -> "UA"; // Ukraine
124+
case "zh" -> "CN"; // Chinese language -> China (CN)
106125
default -> language.toUpperCase(ROOT);
107126
};
108127
}

src/test/java/net/datafaker/providers/base/PhoneNumberValidityFinderTest.java

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import org.junit.jupiter.api.TestInstance;
1010
import org.junit.jupiter.params.ParameterizedTest;
1111
import org.junit.jupiter.params.provider.Arguments;
12+
import org.junit.jupiter.params.provider.CsvSource;
1213
import org.junit.jupiter.params.provider.MethodSource;
1314

1415
import java.util.Locale;
@@ -53,6 +54,42 @@ void testLanguageOnlyPersianLocaleUsesIran() {
5354
assertThat(localFaker.phoneNumber().countryCodeIso2()).isEqualTo("IR");
5455
}
5556

57+
@ParameterizedTest
58+
@CsvSource({
59+
"en, US, English",
60+
"hy, AM, Armenian → Armenia",
61+
"uk, UA, Ukranian → Ukraine",
62+
"ja, JP, Japanese → Japan",
63+
"fa, IR, Persian (Farsi) → Iran",
64+
"ka, GE, Georgia",
65+
"sq, AL, Albanian → Albania",
66+
"cs, CZ, Czech",
67+
"be, BY, Belarusian",
68+
"he, IL, Hebrew → Israel",
69+
"ta, IN, Tamil → India",
70+
"et, EE, Estonian → Estonia",
71+
"el, GR, Greek → Greece",
72+
"eu, ES, Basque → Spain",
73+
"ca, ES, Catalan → Spain",
74+
"cy, GB, Welsh → United Kingdom",
75+
"ga, IE, Irish/Gaelic → Ireland",
76+
"is, IS, Icelandic → Iceland",
77+
"bs, BA, Bosnian → Bosnia & Herzegovina",
78+
"ar, SA, Arabic → Saudi Arabia",
79+
"hi, IN, Hindi → India",
80+
"zh, CN, Chinese → China",
81+
"am, ET, Amharic → Ethiopia",
82+
"sw, TZ, Swahili → Tanzania",
83+
"af, ZA, Afrikaans → South Africa",
84+
})
85+
void detectsCountryByLanguage(String language, String expectedCountry, String description) {
86+
BaseFaker localFaker = new BaseFaker(new Locale(language));
87+
88+
assertThat(localFaker.phoneNumber().countryCodeIso2())
89+
.as(description)
90+
.isEqualTo(expectedCountry);
91+
}
92+
5693
@ParameterizedTest
5794
@MethodSource("allSupportedLocales")
5895
void testAllPhoneNumbers(Locale supportedLocale) throws NumberParseException {

0 commit comments

Comments
 (0)