diff --git a/sdk/basyx/aas/model/base.py b/sdk/basyx/aas/model/base.py index f6b55fa8..3edaac6e 100644 --- a/sdk/basyx/aas/model/base.py +++ b/sdk/basyx/aas/model/base.py @@ -301,11 +301,36 @@ def __init__(self, dict_: Dict[str, str]): @classmethod def _check_language_tag_constraints(cls, ltag: str): - split = ltag.split("-", 1) - lang_code = split[0] - if len(lang_code) != 2 or not lang_code.isalpha() or not lang_code.islower(): - raise ValueError(f"The language code of the language tag must consist of exactly two lower-case letters! " - f"Given language tag and language code: '{ltag}', '{lang_code}'") + alphanum = "[a-zA-Z0-9]" + singleton = "[0-9A-WY-Za-wy-z]" + extension = f"{singleton}(-({alphanum}){{2,8}})+" + extlang = "[a-zA-Z]{3}(-[a-zA-Z]{3}){0,2}" + irregular = ( + "(en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|" + "i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|" + "i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)" + ) + regular = ( + "(art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|" + "zh-min|zh-min-nan|zh-xiang)" + ) + grandfathered = f"({irregular}|{regular})" + language = f"([a-zA-Z]{{2,3}}(-{extlang})?|[a-zA-Z]{{4}}|[a-zA-Z]{{5,8}})" + script = "[a-zA-Z]{4}" + region = "([a-zA-Z]{2}|[0-9]{3})" + variant = f"(({alphanum}){{5,8}}|[0-9]({alphanum}){{3}})" + privateuse = f"[xX](-({alphanum}){{1,8}})+" + langtag = ( + f"{language}(-{script})?(-{region})?(-{variant})*(-{extension})*(-" + f"{privateuse})?" + ) + language_tag = f"({langtag}|{privateuse}|{grandfathered})" + + pattern = f"^{language_tag}$" + + if re.match(pattern, ltag) is None: + raise ValueError(f"The language tag must follow the format defined in BCP 47. " + f"Given language tag: {ltag}") def __getitem__(self, item: str) -> str: return self._dict[item] diff --git a/sdk/test/model/test_base.py b/sdk/test/model/test_base.py index 98c6cfb8..b40174b5 100644 --- a/sdk/test/model/test_base.py +++ b/sdk/test/model/test_base.py @@ -1230,20 +1230,28 @@ def hook(itm: int, _list: List[int]) -> None: class LangStringSetTest(unittest.TestCase): def test_language_tag_constraints(self) -> None: + with self.assertRaises(ValueError): + model.LangStringSet({"": "bar"}) + with self.assertRaises(ValueError) as cm: - model.LangStringSet({"foo": "bar"}) - self.assertEqual("The language code of the language tag must consist of exactly two lower-case letters! " - "Given language tag and language code: 'foo', 'foo'", str(cm.exception)) + model.LangStringSet({"x": "bar"}) + self.assertEqual(f"The language tag must follow the format defined in BCP 47. " + f"Given language tag: x", cm.exception.args[0]) - lss = model.LangStringSet({"fo-OO": "bar"}) with self.assertRaises(ValueError) as cm: - lss["foo"] = "bar" - self.assertEqual("The language code of the language tag must consist of exactly two lower-case letters! " - "Given language tag and language code: 'foo', 'foo'", str(cm.exception)) - self.assertNotIn("foo", lss) - self.assertNotIn("fo", lss) - lss["fo"] = "bar" - self.assertIn("fo", lss) + model.LangStringSet({"foo-oo1": "bar"}) + self.assertEqual(f"The language tag must follow the format defined in BCP 47. " + f"Given language tag: foo-oo1", cm.exception.args[0]) + + lss = model.LangStringSet({"fo-OO": "bar"}) + self.assertIn("fo-OO", lss) + with self.assertRaises(ValueError): + lss["foo-oo1"] = "bar" + self.assertNotIn("foo-oo1", lss) + + self.assertNotIn("foo-ASDF-OO", lss) + lss["foo-ASDF-OO"] = "bar" + self.assertIn("foo-ASDF-OO", lss) def test_empty(self) -> None: lss = model.LangStringSet({"fo": "bar", "fo-OO": "baz"})