diff --git a/src/openedx_content/applets/collections/models.py b/src/openedx_content/applets/collections/models.py index a1c0fa4a4..ecea3005c 100644 --- a/src/openedx_content/applets/collections/models.py +++ b/src/openedx_content/applets/collections/models.py @@ -109,7 +109,8 @@ class Collection(models.Model): # collection's opaque key: # e.g. "lib-collection:{org_code}:{library_code}:{collection_code}" # is the opaque key for a library collection. - collection_code = code_field() + # TODO: Consider supporting unicode https://github.com/openedx/openedx-platform/issues/38413 + collection_code = code_field(unicode=False) title = case_insensitive_char_field( null=False, @@ -179,7 +180,7 @@ class Meta: ], name="oel_coll_uniq_lp_key", ), - code_field_check("collection_code", name="oel_coll_collection_code_regex"), + code_field_check("collection_code", name="oel_coll_collection_code_regex", unicode=False), ] indexes = [ models.Index( diff --git a/src/openedx_content/applets/components/models.py b/src/openedx_content/applets/components/models.py index 1852aae6f..5dd0dee7c 100644 --- a/src/openedx_content/applets/components/models.py +++ b/src/openedx_content/applets/components/models.py @@ -180,7 +180,7 @@ def pk(self): # component_code is an identifier that is local to the learning_package and # component_type. The publishable.entity_ref is derived from component_type # and component_code. - component_code = code_field() + component_code = code_field(unicode=True) class Meta: constraints = [ @@ -198,7 +198,7 @@ class Meta: ], name="oel_component_uniq_lc_ct_lk", ), - code_field_check("component_code", name="oel_component_code_regex"), + code_field_check("component_code", name="oel_component_code_regex", unicode=True), ] indexes = [ # Global Component-Type/Component-Code Index: diff --git a/src/openedx_content/applets/containers/models.py b/src/openedx_content/applets/containers/models.py index 8f9de8d23..c234b04b5 100644 --- a/src/openedx_content/applets/containers/models.py +++ b/src/openedx_content/applets/containers/models.py @@ -189,7 +189,7 @@ class Container(PublishableEntityMixin): # container_code is an identifier that is local to the learning_package. # Unlike component_code, it is unique across all container types within # the same LearningPackage. - container_code = code_field() + container_code = code_field(unicode=True) @property def id(self) -> ID: @@ -212,7 +212,7 @@ class Meta: fields=["learning_package", "container_code"], name="oel_container_uniq_lp_cc", ), - code_field_check("container_code", name="oel_container_code_regex"), + code_field_check("container_code", name="oel_container_code_regex", unicode=True), ] @classmethod diff --git a/src/openedx_content/migrations/0009_rename_component_local_key_to_component_code.py b/src/openedx_content/migrations/0009_rename_component_local_key_to_component_code.py index 3879c55b6..06246eedb 100644 --- a/src/openedx_content/migrations/0009_rename_component_local_key_to_component_code.py +++ b/src/openedx_content/migrations/0009_rename_component_local_key_to_component_code.py @@ -59,7 +59,14 @@ class Migration(migrations.Migration): migrations.AddConstraint( model_name='component', constraint=models.CheckConstraint( - condition=django.db.models.lookups.Regex(models.F('component_code'), '^[a-zA-Z0-9_.-]+\\Z'), + # The original version of this migration had an ascii-only regex constraint, + # matching the django-level RegexValidator defined above. However, + # that constraint caused an IntegrityError on some dev sites with non-ascii component + # codes in libraries, which technically we allow. So, we've loosened this constraint + # just to ensure that the migration applies cleanly. Migration 0013 will re-create + # the constraint and validator to be unicode-friendly, regardless of whether 0009 + # was applied with the ascii-only or unicode-friendly constraint. + condition=django.db.models.lookups.Regex(models.F('component_code'), '^[\\w.-]+\\Z'), name='oel_component_code_regex', violation_error_message='Enter a valid "code name" consisting of letters, numbers, underscores, hyphens, or periods.', ), diff --git a/src/openedx_content/migrations/0010_add_container_code.py b/src/openedx_content/migrations/0010_add_container_code.py index 4dc83cd6a..afd49df93 100644 --- a/src/openedx_content/migrations/0010_add_container_code.py +++ b/src/openedx_content/migrations/0010_add_container_code.py @@ -89,7 +89,14 @@ class Migration(migrations.Migration): migrations.AddConstraint( model_name='container', constraint=models.CheckConstraint( - condition=django.db.models.lookups.Regex(models.F('container_code'), '^[a-zA-Z0-9_.-]+\\Z'), + # The original version of this migration had an ascii-only regex constraint, + # matching the django-level RegexValidator defined above. However, + # that constraint caused an IntegrityError on some dev sites with non-ascii component + # codes in libraries, which technically we allow. So, we've loosened this constraint + # just to ensure that the migration applies cleanly. Migration 0013 will re-create + # the constraint and validator to be unicode-friendly, regardless of whether 0010 + # was applied with the ascii-only or unicode-friendly constraint. + condition=django.db.models.lookups.Regex(models.F('container_code'), '^[\\w.-]+\\Z'), name='oel_container_code_regex', violation_error_message='Enter a valid "code name" consisting of letters, numbers, underscores, hyphens, or periods.', ), diff --git a/src/openedx_content/migrations/0013_unicode_container_component_codes.py b/src/openedx_content/migrations/0013_unicode_container_component_codes.py new file mode 100644 index 000000000..e944ce2fd --- /dev/null +++ b/src/openedx_content/migrations/0013_unicode_container_component_codes.py @@ -0,0 +1,61 @@ +# Generated by Django 5.2.13 on 2026-04-23 14:15 + +import re + +import django.core.validators +import django.db.models.lookups +from django.conf import settings +from django.db import migrations, models + +import openedx_django_lib.fields + + +class Migration(migrations.Migration): + + dependencies = [ + ('openedx_content', '0012_rename_componentversionmedia_key_to_path'), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + # For the vast majority of sites, re-creating these constraints is redundant with + # 0009-0010. But, a handful of developers had applied 0009 and 0010 when they contained + # ascii-only constraints. Re-creating the constraints here ensure that everyone's on + # the same page with identical, unicode-friendly constraints. + migrations.RemoveConstraint( + model_name='component', + name='oel_component_code_regex', + ), + migrations.RemoveConstraint( + model_name='container', + name='oel_container_code_regex', + ), + migrations.AlterField( + model_name='collection', + name='collection_code', + field=openedx_django_lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, max_length=255, validators=[django.core.validators.RegexValidator(re.compile('^[a-zA-Z0-9_.-]+\\Z'), 'Enter a valid "code name" consisting of latin letters (A-Z, a-z), numbers, underscores, hyphens, or periods.', 'invalid')]), + ), + migrations.AlterField( + model_name='component', + name='component_code', + field=openedx_django_lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, max_length=255, validators=[django.core.validators.RegexValidator(re.compile('^[\\w.-]+\\Z'), 'Enter a valid "code name" consisting of any letters, numbers, underscores, hyphens, or periods.', 'invalid')]), + ), + migrations.AlterField( + model_name='container', + name='container_code', + field=openedx_django_lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, max_length=255, validators=[django.core.validators.RegexValidator(re.compile('^[\\w.-]+\\Z'), 'Enter a valid "code name" consisting of any letters, numbers, underscores, hyphens, or periods.', 'invalid')]), + ), + migrations.AddConstraint( + model_name='component', + constraint=models.CheckConstraint(condition=django.db.models.lookups.Regex(models.F('component_code'), '^[\\w.-]+\\Z'), name='oel_component_code_regex', violation_error_message='Enter a valid "code name" consisting of any letters, numbers, underscores, hyphens, or periods.'), + ), + migrations.AddConstraint( + model_name='container', + constraint=models.CheckConstraint(condition=django.db.models.lookups.Regex(models.F('container_code'), '^[\\w.-]+\\Z'), name='oel_container_code_regex', violation_error_message='Enter a valid "code name" consisting of any letters, numbers, underscores, hyphens, or periods.'), + ), + migrations.AlterConstraint( + model_name='collection', + name='oel_coll_collection_code_regex', + constraint=models.CheckConstraint(condition=django.db.models.lookups.Regex(models.F('collection_code'), '^[a-zA-Z0-9_.-]+\\Z'), name='oel_coll_collection_code_regex', violation_error_message='Enter a valid "code name" consisting of latin letters (A-Z, a-z), numbers, underscores, hyphens, or periods.'), + ), + ] diff --git a/src/openedx_core/__init__.py b/src/openedx_core/__init__.py index 415ef3d66..100ec7520 100644 --- a/src/openedx_core/__init__.py +++ b/src/openedx_core/__init__.py @@ -6,4 +6,4 @@ """ # The version for the entire repository -__version__ = "0.43.0" +__version__ = "0.44.0" diff --git a/src/openedx_django_lib/fields.py b/src/openedx_django_lib/fields.py index 20f7a0125..c6f1b1ee3 100644 --- a/src/openedx_django_lib/fields.py +++ b/src/openedx_django_lib/fields.py @@ -119,15 +119,21 @@ def immutable_uuid_field() -> models.UUIDField: # Alphanumeric, hyphens, underscores, periods -CODE_REGEX = re.compile(r"^[a-zA-Z0-9_.-]+\Z") +CODE_REGEX_ASCII = re.compile(r"^[a-zA-Z0-9_.-]+\Z") +# Anything which passes isalnum(), plus underscores, hyphens, and periods +CODE_REGEX_UNICODE = re.compile(r"^[\w.-]+\Z", flags=re.UNICODE) -_CODE_VIOLATION_MSG = _( - 'Enter a valid "code name" consisting of letters, numbers, underscores, hyphens, or periods.' +_CODE_VIOLATION_MSG_ASCII = _( + 'Enter a valid "code name" consisting of latin letters (A-Z, a-z), numbers, underscores, hyphens, or periods.' +) + +_CODE_VIOLATION_MSG_UNICODE = _( + 'Enter a valid "code name" consisting of any letters, numbers, underscores, hyphens, or periods.' ) -def code_field(**kwargs) -> MultiCollationCharField: +def code_field(unicode: bool, **kwargs) -> MultiCollationCharField: """ Field to hold a 'code', i.e. a slug-like local identifier. @@ -139,9 +145,8 @@ def code_field(**kwargs) -> MultiCollationCharField: blank=False, validators=[ RegexValidator( - CODE_REGEX, - # Translators: "letters" means latin letters: a-z and A-Z. - _CODE_VIOLATION_MSG, + CODE_REGEX_UNICODE if unicode else CODE_REGEX_ASCII, + _CODE_VIOLATION_MSG_UNICODE if unicode else _CODE_VIOLATION_MSG_ASCII, "invalid", ), ], @@ -149,9 +154,9 @@ def code_field(**kwargs) -> MultiCollationCharField: ) -def code_field_check(field_name: str, *, name: str) -> models.CheckConstraint: +def code_field_check(field_name: str, *, name: str, unicode: bool) -> models.CheckConstraint: """ - Return a ``CheckConstraint`` that enforces :data:`CODE_REGEX` at the DB level. + Return a ``CheckConstraint`` that enforces :data:`CODE_REGEX_UNICODE` or :data:`CODE_REGEX_ASCII` at the DB level. Django validators (used by :func:`code_field`) are not called on ``.save()`` or ``.update()``. Adding this constraint ensures the regex is also enforced @@ -162,13 +167,22 @@ def code_field_check(field_name: str, *, name: str) -> models.CheckConstraint: class Meta: constraints = [ - code_field_check("my_code_field", name="myapp_mymodel_my_code_field_regex"), + code_field_check( + "my_code_field", + name="myapp_mymodel_my_code_field_regex", + unicode=True/False, # Make sure this matches the code_field! + ), ] """ return models.CheckConstraint( - condition=Regex(models.F(field_name), CODE_REGEX.pattern), + condition=Regex( + models.F(field_name), + (CODE_REGEX_UNICODE if unicode else CODE_REGEX_ASCII).pattern, + ), name=name, - violation_error_message=_CODE_VIOLATION_MSG, + violation_error_message=( + _CODE_VIOLATION_MSG_UNICODE if unicode else _CODE_VIOLATION_MSG_ASCII + ), ) diff --git a/tests/openedx_content/applets/collections/test_api.py b/tests/openedx_content/applets/collections/test_api.py index 9eb78224e..76d234dc2 100644 --- a/tests/openedx_content/applets/collections/test_api.py +++ b/tests/openedx_content/applets/collections/test_api.py @@ -229,6 +229,8 @@ def test_create_collection_invalid_code(self): "has@symbol", "has/slash", "has#hash", + "café", # non-ascii letters not allowed for collection_code + "柏倉隆史", ] for code in invalid_codes: with self.subTest(code=code): diff --git a/tests/openedx_content/applets/components/test_api.py b/tests/openedx_content/applets/components/test_api.py index b6960a226..785a142e3 100644 --- a/tests/openedx_content/applets/components/test_api.py +++ b/tests/openedx_content/applets/components/test_api.py @@ -6,6 +6,7 @@ from django.contrib.auth import get_user_model from django.contrib.auth.models import User as UserType # pylint: disable=imported-auth-user from django.core.exceptions import ObjectDoesNotExist +from django.db import IntegrityError from django.test import TestCase from openedx_content.applets.collections import api as collection_api @@ -385,6 +386,37 @@ def test_exists_by_code(self): component_code='not_my_component', ) + def test_unicode_code(self): + """component_code supports non-ascii letters.""" + unicode_code = "柏倉隆史" + component = components_api.create_component( + self.learning_package.id, + component_type=self.problem_type, + component_code=unicode_code, + created=self.now, + created_by=None, + ) + assert component.component_code == unicode_code + assert components_api.get_component_by_code( + self.learning_package.id, + namespace='xblock.v1', + type_name='problem', + component_code=unicode_code, + ).id == component.id + + def test_create_container_fails_with_invalid_chars(self): + """component_code does NOT support whitespace, most symbols, emoji""" + for invalid_code in ["a b", "a,b", "a:b", "a☃b"]: + with self.subTest(invalid_code=invalid_code): + with self.assertRaisesRegex(IntegrityError, r'.*oel_component_code_regex.*'): + components_api.create_component( + self.learning_package.id, + component_type=self.problem_type, + component_code=invalid_code, + created=self.now, + created_by=None, + ) + class CreateNewVersionsTestCase(ComponentTestCase): """ diff --git a/tests/openedx_content/applets/containers/test_api.py b/tests/openedx_content/applets/containers/test_api.py index 01fa81664..0e16b0d9f 100644 --- a/tests/openedx_content/applets/containers/test_api.py +++ b/tests/openedx_content/applets/containers/test_api.py @@ -349,6 +349,21 @@ def test_create_generic_empty_container(lp: LearningPackage, admin_user) -> None containers_api.get_container_children_count(container, published=True) +def test_unicode_code(lp: LearningPackage) -> None: + """container_code supports non-ascii letters.""" + unicode_code = "柏倉隆史" + container, _ = containers_api.create_container_and_version( + lp.id, + container_code=unicode_code, + title="Unicode Container", + container_cls=TestContainer, + created=now, + created_by=None, + ) + assert container.container_code == unicode_code + assert containers_api.get_container_by_code(lp.id, unicode_code).id == container.id + + def test_create_container_queries(lp: LearningPackage, child_entity1: TestEntity, django_assert_num_queries) -> None: """Test how many database queries are required to create a container.""" base_args: dict[str, Any] = {