@@ -1142,10 +1142,11 @@ class Language:
11421142 In some situations there are competeing 2-char codes in code books, such as Lib of Congress (LOC)
11431143 """
11441144
1145- def __init__ (self , iso3 , iso2 , nmlist : list ):
1145+ def __init__ (self , iso3 , iso2 , nmlist : list , locale = None ):
11461146 self .code_iso3 = iso3
11471147 self .code = iso2
11481148 self .names = nmlist
1149+ self .locale = locale
11491150 if nmlist :
11501151 if not isinstance (nmlist , list ):
11511152 raise Exception ("Name list is a list of names for the language. The first one is the default." )
@@ -1171,6 +1172,7 @@ def __str__(self):
11711172def list_languages ():
11721173 """
11731174 List out a flattened list of languages, de-duplicated by ISO2 language ID.
1175+
11741176 TODO: alternatively list out every language
11751177 :return:
11761178 """
@@ -1191,15 +1193,26 @@ def list_languages():
11911193
11921194
11931195def add_language (lg : Language , override = False ):
1196+ """
1197+ The language map for ISO 2-alpha and 3-alpha codes should be protected from language IDs that are dialect or locale
1198+
1199+ "en" ==> en-au, en-gb, en-us, etc.? This is ambiguous
1200+ The reverse is true -- "en-gb" is at least "en" or "eng" english
1201+
1202+ :param lg:
1203+ :param override:
1204+ :return:
1205+ """
11941206 if not lg :
11951207 return
11961208
11971209 codes = []
11981210 if lg .code :
11991211 codes .append (lg .code .lower ())
1200-
12011212 if lg .code_iso3 :
12021213 codes .append (lg .code_iso3 .lower ())
1214+ if lg .locale :
1215+ codes .append (lg .locale .lower ())
12031216
12041217 if lg .names :
12051218 for nm in lg .names :
@@ -1211,7 +1224,13 @@ def add_language(lg: Language, override=False):
12111224 override = True
12121225
12131226 for k in set (codes ):
1214- if k in language_map and not override :
1227+ exists = k in language_map
1228+
1229+ # coding rule: 2 or 3 char alpha codes for ISO or Biblio code books are not overriden.
1230+ if len (k ) <= 3 and exists :
1231+ continue
1232+
1233+ if exists and not override :
12151234 raise Exception (f"Forcibly remap language code? { k } " )
12161235
12171236 language_map [k ] = lg
@@ -1331,31 +1350,34 @@ def load_languages():
13311350 if iso3 in IGNORE_LANGUAGES :
13321351 continue
13331352
1334- L = Language (lang [0 ], lang [2 ], lang_names )
1353+ iso2 = lang [2 ]
1354+ L = Language (iso3 , iso2 , lang_names )
13351355 add_language (L )
13361356 if bib3 :
1337- L = Language (bib3 , lang [ 2 ] , lang_names )
1357+ L = Language (bib3 , iso2 , lang_names )
13381358 add_language (L , override = True )
13391359
13401360 # Some odd additions -- Bibliographic vs. Terminologic codes may vary.
13411361 # FRE vs. FRA is valid for French, for example.
13421362 #
13431363 for lg in [Language ("fra" , "fr" , ["French" ]),
13441364
1345- Language ("zh-cn " , "zh" , ["Chinese" ]),
1365+ Language ("zho " , "zh" , ["Chinese" ], locale = "zh-cn" ),
13461366
13471367 Language (None , "zt" , ["Traditionl Chinese" ]),
1348- Language ("zh-tw" , "zt" , ["Traditionl Chinese/Taiwain" ]),
1368+ Language (None , "zt" , ["Traditionl Chinese/Taiwain" ], locale = "zh-tw" ),
13491369
1350- Language ("fa-AF " , "dr" , ["Dari" , "Afghan Persian" ]),
1370+ Language ("prs " , "dr" , ["Dari" , "Afghan Persian" ], locale = "fa-AF" ),
13511371 Language ("prs" , "dr" , ["Dari" , "Afghan Persian" ]),
13521372
13531373 Language ("eng" , "en" , ["English" ]),
1354- Language ("en-gb" , "en" , ["English" ]),
1355- Language ("en-us" , "en" , ["English" ]),
1356- Language ("en-uk" , "en" , ["English" ]),
1357- Language ("en-ca" , "en" , ["English" ]),
1358- Language ("en-au" , "en" , ["English" ])]:
1374+
1375+ Language ("eng" , "en" , ["English/British" ], locale = "en-gb" ),
1376+ Language ("eng" , "en" , ["English/USA" ], locale = "en-us" ),
1377+ Language ("eng" , "en" , ["English/United Kingdom" ], locale = "en-uk" ),
1378+ Language ("eng" , "en" , ["English/Canadian" ], locale = "en-ca" ),
1379+ Language ("eng" , "en" , ["English/Australian" ], locale = "en-au" )]:
1380+
13591381 add_language (lg , override = True )
13601382
13611383 __language_map_init = True
0 commit comments