Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions src/openedx_content/applets/collections/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,8 @@ class Collection(models.Model):
# collection's opaque key:
# e.g. "lib-collection:{org_code}:{library_code}:{collection_code}"
# is the opaque key for a library collection.
collection_code = code_field()
# TODO: Consider supporting unicode https://github.com/openedx/openedx-platform/issues/38413
collection_code = code_field(unicode=False)

title = case_insensitive_char_field(
null=False,
Expand Down Expand Up @@ -179,7 +180,7 @@ class Meta:
],
name="oel_coll_uniq_lp_key",
),
code_field_check("collection_code", name="oel_coll_collection_code_regex"),
code_field_check("collection_code", name="oel_coll_collection_code_regex", unicode=False),
]
indexes = [
models.Index(
Expand Down
4 changes: 2 additions & 2 deletions src/openedx_content/applets/components/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def pk(self):
# component_code is an identifier that is local to the learning_package and
# component_type. The publishable.entity_ref is derived from component_type
# and component_code.
component_code = code_field()
component_code = code_field(unicode=True)

class Meta:
constraints = [
Expand All @@ -198,7 +198,7 @@ class Meta:
],
name="oel_component_uniq_lc_ct_lk",
),
code_field_check("component_code", name="oel_component_code_regex"),
code_field_check("component_code", name="oel_component_code_regex", unicode=True),
]
indexes = [
# Global Component-Type/Component-Code Index:
Expand Down
4 changes: 2 additions & 2 deletions src/openedx_content/applets/containers/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ class Container(PublishableEntityMixin):
# container_code is an identifier that is local to the learning_package.
# Unlike component_code, it is unique across all container types within
# the same LearningPackage.
container_code = code_field()
container_code = code_field(unicode=True)

@property
def id(self) -> ID:
Expand All @@ -212,7 +212,7 @@ class Meta:
fields=["learning_package", "container_code"],
name="oel_container_uniq_lp_cc",
),
code_field_check("container_code", name="oel_container_code_regex"),
code_field_check("container_code", name="oel_container_code_regex", unicode=True),
]

@classmethod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,14 @@ class Migration(migrations.Migration):
migrations.AddConstraint(
model_name='component',
constraint=models.CheckConstraint(
condition=django.db.models.lookups.Regex(models.F('component_code'), '^[a-zA-Z0-9_.-]+\\Z'),
# The original version of this migration had an ascii-only regex constraint,
# matching the django-level RegexValidator defined above. However,
# that constraint caused an IntegrityError on some dev sites with non-ascii component
# codes in libraries, which technically we allow. So, we've loosened this constraint
# just to ensure that the migration applies cleanly. Migration 0013 will re-create
# the constraint and validator to be unicode-friendly, regardless of whether 0009
# was applied with the ascii-only or unicode-friendly constraint.
condition=django.db.models.lookups.Regex(models.F('component_code'), '^[\\w.-]+\\Z'),
name='oel_component_code_regex',
violation_error_message='Enter a valid "code name" consisting of letters, numbers, underscores, hyphens, or periods.',
),
Expand Down
9 changes: 8 additions & 1 deletion src/openedx_content/migrations/0010_add_container_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,14 @@ class Migration(migrations.Migration):
migrations.AddConstraint(
model_name='container',
constraint=models.CheckConstraint(
condition=django.db.models.lookups.Regex(models.F('container_code'), '^[a-zA-Z0-9_.-]+\\Z'),
# The original version of this migration had an ascii-only regex constraint,
# matching the django-level RegexValidator defined above. However,
# that constraint caused an IntegrityError on some dev sites with non-ascii component
# codes in libraries, which technically we allow. So, we've loosened this constraint
# just to ensure that the migration applies cleanly. Migration 0013 will re-create
# the constraint and validator to be unicode-friendly, regardless of whether 0010
# was applied with the ascii-only or unicode-friendly constraint.
condition=django.db.models.lookups.Regex(models.F('container_code'), '^[\\w.-]+\\Z'),
name='oel_container_code_regex',
violation_error_message='Enter a valid "code name" consisting of letters, numbers, underscores, hyphens, or periods.',
),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Generated by Django 5.2.13 on 2026-04-23 14:15

import re

import django.core.validators
import django.db.models.lookups
from django.conf import settings
from django.db import migrations, models

import openedx_django_lib.fields


class Migration(migrations.Migration):

dependencies = [
('openedx_content', '0012_rename_componentversionmedia_key_to_path'),
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
]

operations = [
# For the vast majority of sites, re-creating these constraints is redundant with
# 0009-0010. But, a handful of developers had applied 0009 and 0010 when they contained
# ascii-only constraints. Re-creating the constraints here ensure that everyone's on
# the same page with identical, unicode-friendly constraints.
migrations.RemoveConstraint(
model_name='component',
name='oel_component_code_regex',
),
migrations.RemoveConstraint(
model_name='container',
name='oel_container_code_regex',
),
migrations.AlterField(
model_name='collection',
name='collection_code',
field=openedx_django_lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, max_length=255, validators=[django.core.validators.RegexValidator(re.compile('^[a-zA-Z0-9_.-]+\\Z'), 'Enter a valid "code name" consisting of latin letters (A-Z, a-z), numbers, underscores, hyphens, or periods.', 'invalid')]),
),
migrations.AlterField(
model_name='component',
name='component_code',
field=openedx_django_lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, max_length=255, validators=[django.core.validators.RegexValidator(re.compile('^[\\w.-]+\\Z'), 'Enter a valid "code name" consisting of any letters, numbers, underscores, hyphens, or periods.', 'invalid')]),
),
migrations.AlterField(
model_name='container',
name='container_code',
field=openedx_django_lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, max_length=255, validators=[django.core.validators.RegexValidator(re.compile('^[\\w.-]+\\Z'), 'Enter a valid "code name" consisting of any letters, numbers, underscores, hyphens, or periods.', 'invalid')]),
),
migrations.AddConstraint(
model_name='component',
constraint=models.CheckConstraint(condition=django.db.models.lookups.Regex(models.F('component_code'), '^[\\w.-]+\\Z'), name='oel_component_code_regex', violation_error_message='Enter a valid "code name" consisting of any letters, numbers, underscores, hyphens, or periods.'),
),
migrations.AddConstraint(
model_name='container',
constraint=models.CheckConstraint(condition=django.db.models.lookups.Regex(models.F('container_code'), '^[\\w.-]+\\Z'), name='oel_container_code_regex', violation_error_message='Enter a valid "code name" consisting of any letters, numbers, underscores, hyphens, or periods.'),
),
Comment on lines +52 to +55
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This updated constraint is already added in migration 0009, so why does it need to get re-created in this migration?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried to spell this out in the PR description. It's for those of us developers who already installed 0.43.0 into platform and applied 0009 and 0010, getting the ascii-only constraints.

I guess I could omit this, and instead just send out a slack blast to the handful of devs who might be in this situation, telling them to back-migrate to 0008. I think I'd also yank 0.43 from PyPI in that case.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bradenmacdonald let me know if you'd prefer this :

I guess I could omit this, and instead just send out a slack blast to the handful of devs who might be in this situation, telling them to back-migrate to 0008. I think I'd also yank 0.43 from PyPI in that case.

Copy link
Copy Markdown
Contributor

@bradenmacdonald bradenmacdonald Apr 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oops, sorry I didn't notice the PR description. Since your platform PR was not merged I expect you can count the number of affected developers on one hand. But it is a nice idea to resolve it automatically; if you're keeping it, maybe mention that in a comment in the migration itself too? Either way is good with me!

Copy link
Copy Markdown
Member Author

@kdmccormick kdmccormick Apr 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

great, I added comments!

migrations.AlterConstraint(
model_name='collection',
name='oel_coll_collection_code_regex',
constraint=models.CheckConstraint(condition=django.db.models.lookups.Regex(models.F('collection_code'), '^[a-zA-Z0-9_.-]+\\Z'), name='oel_coll_collection_code_regex', violation_error_message='Enter a valid "code name" consisting of latin letters (A-Z, a-z), numbers, underscores, hyphens, or periods.'),
),
]
2 changes: 1 addition & 1 deletion src/openedx_core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@
"""

# The version for the entire repository
__version__ = "0.43.0"
__version__ = "0.44.0"
38 changes: 26 additions & 12 deletions src/openedx_django_lib/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,15 +119,21 @@ def immutable_uuid_field() -> models.UUIDField:


# Alphanumeric, hyphens, underscores, periods
CODE_REGEX = re.compile(r"^[a-zA-Z0-9_.-]+\Z")
CODE_REGEX_ASCII = re.compile(r"^[a-zA-Z0-9_.-]+\Z")

# Anything which passes isalnum(), plus underscores, hyphens, and periods
CODE_REGEX_UNICODE = re.compile(r"^[\w.-]+\Z", flags=re.UNICODE)

_CODE_VIOLATION_MSG = _(
'Enter a valid "code name" consisting of letters, numbers, underscores, hyphens, or periods.'
_CODE_VIOLATION_MSG_ASCII = _(
'Enter a valid "code name" consisting of latin letters (A-Z, a-z), numbers, underscores, hyphens, or periods.'
)

_CODE_VIOLATION_MSG_UNICODE = _(
'Enter a valid "code name" consisting of any letters, numbers, underscores, hyphens, or periods.'
)


def code_field(**kwargs) -> MultiCollationCharField:
def code_field(unicode: bool, **kwargs) -> MultiCollationCharField:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like that this is mandatory.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same. I'd rather force developers to think about which kind of code they're defining.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe we can make it default to true after doing openedx/openedx-platform#38413

"""
Field to hold a 'code', i.e. a slug-like local identifier.

Expand All @@ -139,19 +145,18 @@ def code_field(**kwargs) -> MultiCollationCharField:
blank=False,
validators=[
RegexValidator(
CODE_REGEX,
# Translators: "letters" means latin letters: a-z and A-Z.
_CODE_VIOLATION_MSG,
CODE_REGEX_UNICODE if unicode else CODE_REGEX_ASCII,
_CODE_VIOLATION_MSG_UNICODE if unicode else _CODE_VIOLATION_MSG_ASCII,
"invalid",
),
],
**kwargs,
)


def code_field_check(field_name: str, *, name: str) -> models.CheckConstraint:
def code_field_check(field_name: str, *, name: str, unicode: bool) -> models.CheckConstraint:
"""
Return a ``CheckConstraint`` that enforces :data:`CODE_REGEX` at the DB level.
Return a ``CheckConstraint`` that enforces :data:`CODE_REGEX_UNICODE` or :data:`CODE_REGEX_ASCII` at the DB level.

Django validators (used by :func:`code_field`) are not called on ``.save()``
or ``.update()``. Adding this constraint ensures the regex is also enforced
Expand All @@ -162,13 +167,22 @@ def code_field_check(field_name: str, *, name: str) -> models.CheckConstraint:

class Meta:
constraints = [
code_field_check("my_code_field", name="myapp_mymodel_my_code_field_regex"),
code_field_check(
"my_code_field",
name="myapp_mymodel_my_code_field_regex",
unicode=True/False, # Make sure this matches the code_field!
),
]
"""
return models.CheckConstraint(
condition=Regex(models.F(field_name), CODE_REGEX.pattern),
condition=Regex(
models.F(field_name),
(CODE_REGEX_UNICODE if unicode else CODE_REGEX_ASCII).pattern,
),
name=name,
violation_error_message=_CODE_VIOLATION_MSG,
violation_error_message=(
_CODE_VIOLATION_MSG_UNICODE if unicode else _CODE_VIOLATION_MSG_ASCII
),
)


Expand Down
2 changes: 2 additions & 0 deletions tests/openedx_content/applets/collections/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,8 @@ def test_create_collection_invalid_code(self):
"has@symbol",
"has/slash",
"has#hash",
"café", # non-ascii letters not allowed for collection_code
"柏倉隆史",
]
for code in invalid_codes:
with self.subTest(code=code):
Expand Down
32 changes: 32 additions & 0 deletions tests/openedx_content/applets/components/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from django.contrib.auth import get_user_model
from django.contrib.auth.models import User as UserType # pylint: disable=imported-auth-user
from django.core.exceptions import ObjectDoesNotExist
from django.db import IntegrityError
from django.test import TestCase

from openedx_content.applets.collections import api as collection_api
Expand Down Expand Up @@ -385,6 +386,37 @@ def test_exists_by_code(self):
component_code='not_my_component',
)

def test_unicode_code(self):
"""component_code supports non-ascii letters."""
unicode_code = "柏倉隆史"
component = components_api.create_component(
self.learning_package.id,
component_type=self.problem_type,
component_code=unicode_code,
created=self.now,
created_by=None,
)
assert component.component_code == unicode_code
assert components_api.get_component_by_code(
self.learning_package.id,
namespace='xblock.v1',
type_name='problem',
component_code=unicode_code,
).id == component.id

def test_create_container_fails_with_invalid_chars(self):
"""component_code does NOT support whitespace, most symbols, emoji"""
for invalid_code in ["a b", "a,b", "a:b", "a☃b"]:
with self.subTest(invalid_code=invalid_code):
with self.assertRaisesRegex(IntegrityError, r'.*oel_component_code_regex.*'):
components_api.create_component(
self.learning_package.id,
component_type=self.problem_type,
component_code=invalid_code,
created=self.now,
created_by=None,
)


class CreateNewVersionsTestCase(ComponentTestCase):
"""
Expand Down
15 changes: 15 additions & 0 deletions tests/openedx_content/applets/containers/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,21 @@ def test_create_generic_empty_container(lp: LearningPackage, admin_user) -> None
containers_api.get_container_children_count(container, published=True)


def test_unicode_code(lp: LearningPackage) -> None:
"""container_code supports non-ascii letters."""
unicode_code = "柏倉隆史"
container, _ = containers_api.create_container_and_version(
lp.id,
container_code=unicode_code,
title="Unicode Container",
container_cls=TestContainer,
created=now,
created_by=None,
)
assert container.container_code == unicode_code
assert containers_api.get_container_by_code(lp.id, unicode_code).id == container.id


def test_create_container_queries(lp: LearningPackage, child_entity1: TestEntity, django_assert_num_queries) -> None:
"""Test how many database queries are required to create a container."""
base_args: dict[str, Any] = {
Expand Down
Loading