Skip to content

Commit ae41792

Browse files
authored
sdk: fix langtag constraints in LangStringSet (#478)
Previously the LangStringSet checked language tags that were added to match the format of xx or xx-XX. This did not follow the documented behavior, which requires all IETF BCP 47 conform language tags to be accepted. These changes replaced the previous constraint check, with a check by regular expression, following the IETF BCP 47 standard. Fixes #157
1 parent 702d74e commit ae41792

2 files changed

Lines changed: 49 additions & 16 deletions

File tree

sdk/basyx/aas/model/base.py

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -301,11 +301,36 @@ def __init__(self, dict_: Dict[str, str]):
301301

302302
@classmethod
303303
def _check_language_tag_constraints(cls, ltag: str):
304-
split = ltag.split("-", 1)
305-
lang_code = split[0]
306-
if len(lang_code) != 2 or not lang_code.isalpha() or not lang_code.islower():
307-
raise ValueError(f"The language code of the language tag must consist of exactly two lower-case letters! "
308-
f"Given language tag and language code: '{ltag}', '{lang_code}'")
304+
alphanum = "[a-zA-Z0-9]"
305+
singleton = "[0-9A-WY-Za-wy-z]"
306+
extension = f"{singleton}(-({alphanum}){{2,8}})+"
307+
extlang = "[a-zA-Z]{3}(-[a-zA-Z]{3}){0,2}"
308+
irregular = (
309+
"(en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|"
310+
"i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|"
311+
"i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)"
312+
)
313+
regular = (
314+
"(art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|"
315+
"zh-min|zh-min-nan|zh-xiang)"
316+
)
317+
grandfathered = f"({irregular}|{regular})"
318+
language = f"([a-zA-Z]{{2,3}}(-{extlang})?|[a-zA-Z]{{4}}|[a-zA-Z]{{5,8}})"
319+
script = "[a-zA-Z]{4}"
320+
region = "([a-zA-Z]{2}|[0-9]{3})"
321+
variant = f"(({alphanum}){{5,8}}|[0-9]({alphanum}){{3}})"
322+
privateuse = f"[xX](-({alphanum}){{1,8}})+"
323+
langtag = (
324+
f"{language}(-{script})?(-{region})?(-{variant})*(-{extension})*(-"
325+
f"{privateuse})?"
326+
)
327+
language_tag = f"({langtag}|{privateuse}|{grandfathered})"
328+
329+
pattern = f"^{language_tag}$"
330+
331+
if re.match(pattern, ltag) is None:
332+
raise ValueError(f"The language tag must follow the format defined in BCP 47. "
333+
f"Given language tag: {ltag}")
309334

310335
def __getitem__(self, item: str) -> str:
311336
return self._dict[item]

sdk/test/model/test_base.py

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1230,20 +1230,28 @@ def hook(itm: int, _list: List[int]) -> None:
12301230

12311231
class LangStringSetTest(unittest.TestCase):
12321232
def test_language_tag_constraints(self) -> None:
1233+
with self.assertRaises(ValueError):
1234+
model.LangStringSet({"": "bar"})
1235+
12331236
with self.assertRaises(ValueError) as cm:
1234-
model.LangStringSet({"foo": "bar"})
1235-
self.assertEqual("The language code of the language tag must consist of exactly two lower-case letters! "
1236-
"Given language tag and language code: 'foo', 'foo'", str(cm.exception))
1237+
model.LangStringSet({"x": "bar"})
1238+
self.assertEqual(f"The language tag must follow the format defined in BCP 47. "
1239+
f"Given language tag: x", cm.exception.args[0])
12371240

1238-
lss = model.LangStringSet({"fo-OO": "bar"})
12391241
with self.assertRaises(ValueError) as cm:
1240-
lss["foo"] = "bar"
1241-
self.assertEqual("The language code of the language tag must consist of exactly two lower-case letters! "
1242-
"Given language tag and language code: 'foo', 'foo'", str(cm.exception))
1243-
self.assertNotIn("foo", lss)
1244-
self.assertNotIn("fo", lss)
1245-
lss["fo"] = "bar"
1246-
self.assertIn("fo", lss)
1242+
model.LangStringSet({"foo-oo1": "bar"})
1243+
self.assertEqual(f"The language tag must follow the format defined in BCP 47. "
1244+
f"Given language tag: foo-oo1", cm.exception.args[0])
1245+
1246+
lss = model.LangStringSet({"fo-OO": "bar"})
1247+
self.assertIn("fo-OO", lss)
1248+
with self.assertRaises(ValueError):
1249+
lss["foo-oo1"] = "bar"
1250+
self.assertNotIn("foo-oo1", lss)
1251+
1252+
self.assertNotIn("foo-ASDF-OO", lss)
1253+
lss["foo-ASDF-OO"] = "bar"
1254+
self.assertIn("foo-ASDF-OO", lss)
12471255

12481256
def test_empty(self) -> None:
12491257
lss = model.LangStringSet({"fo": "bar", "fo-OO": "baz"})

0 commit comments

Comments
 (0)