Skip to content

Commit 0b01361

Browse files
kdmccormickclaude
andcommitted
feat!: Package and Entity keys are now opaque refs
The `{LearningPackage,PublishableEntity}.key` fields were always meant to be externally-supplied refs whose format does not matter. However, they were effectively being used as parseable psuedo-keys for identifying containers and components, with more assumptions to their structure being baked in over time, both within and outside openedx-core. This commit renames them to `{package,entity}_ref`, and removes all parsing of them from the API and from the internals (except for where required for backcompat with Ulmo ZIP backups). Instead, the pairings of (component_type,component_code) and (container_type,container_code) are used to identify components and containers; their respective entity_refs are derived from those *by conventional default* but also are customizable by callers. BREAKING CHANGE: Renamed key_field(...) -> ref_field(...) in openedx_django_lib. BREAKING CHANGE: Renamed PublishableEntity.key -> PublishableEntityKey.entity_ref BREAKING CHANGE: Renamed PublishableEntityMixin.key -> PublishableEntityMixin.entity_ref BREAKING CHANGE: In create_publishable_entity(...), renamed param key -> entity_ref BREAKING CHANGE: Renamed get_publishable_entity_by_key(...) -> get_publishable_entity_by_ref(...), and renamed param key -> entity_ref BREAKING CHANGE: In get_entity_collections(...), renamed param entity_key -> entity_ref BREAKING CHANGE: In create_component(...)/create_container(...): * Positional key argument removed * Optional entity_ref argument added * Defaults to defaults to "{namespace}:{type_name}:{component_code}" if unspecified BREAKING CHANGE: Function get_or_create_component_type_by_entity_key() removed. BREAKING CHANGE: Rename get_container_children_entities_keys(...) -> get_container_children_entity_refs(...) BREAKING CHANGE: Renamed get_container_by_key(...) -> get_container_by_ref(..) and renamed param key -> entity_ref. BREAKING CHANGE: Renamed get_container_children_entities_keys(...) -> get_container_children_entity_refs(...) BRAEKING CHANGE: Renamed LearningPackage.key -> LearningPackage.package_ref. BREAKING CHANGE: In create_learning_package(...), renamed param key -> package_ref BREAKING CHANGE: In update_learning_package(...), renamed param key -> package_ref BREAKING CHANGE: Renamed get_learning_package_by_key(...) -> get_learning_package_by_ref(...), and renamed param key -> package_ref BREAKING CHANGE: In learning_package_exists(...), renamed param key -> package_ref BREAKING CHANGE: In look_up_component_version_media(...): * Renamed param learning_package_key -> learning_package_ref * Renamed param component_key -> entity_ref BREAKING CHANGE: In add_assets_to_component management command: * Renamed argument learning_package_key -> learning_package_ref * Renamed argument component_key -> entity_ref BREAKING CHANGE In load_learning_package: * Renamed param key -> package_ref * In return dict, within sub-dict ["lp_restored_data"]: * Renamed ["key"] -> ["package_ref"] * Renamed ["archive_lp_key"] -> ["archive_package_ref"] * Renamed ["archive_org_key"] -> ["archive_org_code"] * Renamed ["archive_slug"] -> ["archive_package_code"] * If archive_package_ref cannot be parsed into "{_prefix}:{org_code}:{package_code}", then archvie_org_code and archive_package_code will both be None. Callers should handle this case. (Previously, a ValueError would be raised by backup-restore code.) Backup/Restore format changes (non-breaking): * In [learning_package], key field is renamed -> package_ref. * In [entity], key field is renamed -> entity_ref. * For compatibility with Ulmo instances, both key and {package,entity}_ref will be written in all new archives. * For compatibility with Ulmo archives, both key and {package,entity}_ref will be read, with priority given to the latter. Part of: #322 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 266d7f3 commit 0b01361

37 files changed

Lines changed: 645 additions & 441 deletions

olx_importer/management/commands/load_components.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -61,33 +61,33 @@ def init_known_types(self):
6161

6262
def add_arguments(self, parser):
6363
parser.add_argument("course_data_path", type=pathlib.Path)
64-
parser.add_argument("learning_package_key", type=str)
64+
parser.add_argument("learning_package_ref", type=str)
6565

66-
def handle(self, course_data_path, learning_package_key, **options):
66+
def handle(self, course_data_path, learning_package_ref, **options):
6767
self.course_data_path = course_data_path
68-
self.learning_package_key = learning_package_key
69-
self.load_course_data(learning_package_key)
68+
self.learning_package_ref = learning_package_ref
69+
self.load_course_data(learning_package_ref)
7070

7171
def get_course_title(self):
7272
course_type_dir = self.course_data_path / "course"
7373
course_xml_file = next(course_type_dir.glob("*.xml"))
7474
course_root = ET.parse(course_xml_file).getroot()
7575
return course_root.attrib.get("display_name", "Unknown Course")
7676

77-
def load_course_data(self, learning_package_key):
77+
def load_course_data(self, learning_package_ref):
7878
print(f"Importing course from: {self.course_data_path}")
7979
now = datetime.now(timezone.utc)
8080
title = self.get_course_title()
8181

82-
if content_api.learning_package_exists(learning_package_key):
82+
if content_api.learning_package_exists(learning_package_ref):
8383
raise CommandError(
84-
f"{learning_package_key} already exists. "
84+
f"{learning_package_ref} already exists. "
8585
"This command currently only supports initial import."
8686
)
8787

8888
with transaction.atomic():
8989
self.learning_package = content_api.create_learning_package(
90-
learning_package_key, title, created=now,
90+
learning_package_ref, title, created=now,
9191
)
9292
for block_type in SUPPORTED_TYPES:
9393
self.import_block_type(block_type, now) #, publish_log_entry)

src/openedx_content/applets/backup_restore/api.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,26 +5,28 @@
55

66
from django.contrib.auth.models import User as UserType # pylint: disable=imported-auth-user
77

8-
from ..publishing.api import get_learning_package_by_key
8+
from ..publishing.api import get_learning_package_by_ref
99
from .zipper import LearningPackageUnzipper, LearningPackageZipper
1010

1111

12-
def create_zip_file(lp_key: str, path: str, user: UserType | None = None, origin_server: str | None = None) -> None:
12+
def create_zip_file(
13+
package_ref: str, path: str, user: UserType | None = None, origin_server: str | None = None
14+
) -> None:
1315
"""
1416
Creates a dump zip file for the given learning package key at the given path.
1517
The zip file contains a TOML representation of the learning package and its contents.
1618
17-
Can throw a NotFoundError at get_learning_package_by_key
19+
Can throw a NotFoundError at get_learning_package_by_ref
1820
"""
19-
learning_package = get_learning_package_by_key(lp_key)
21+
learning_package = get_learning_package_by_ref(package_ref)
2022
LearningPackageZipper(learning_package, user, origin_server).create_zip(path)
2123

2224

23-
def load_learning_package(path: str, key: str | None = None, user: UserType | None = None) -> dict:
25+
def load_learning_package(path: str, package_ref: str | None = None, user: UserType | None = None) -> dict:
2426
"""
2527
Loads a learning package from a zip file at the given path.
2628
Restores the learning package and its contents to the database.
2729
Returns a dictionary with the status of the operation and any errors encountered.
2830
"""
2931
with zipfile.ZipFile(path, "r") as zipf:
30-
return LearningPackageUnzipper(zipf, key, user).load()
32+
return LearningPackageUnzipper(zipf, package_ref, user).load()

src/openedx_content/applets/backup_restore/serializers.py

Lines changed: 52 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,30 @@ class LearningPackageSerializer(serializers.Serializer): # pylint: disable=abst
1212
"""
1313
Serializer for learning packages.
1414
15+
Archives created in Verawood or later write ``package_ref``. Archives
16+
created in Ulmo write ``key``. Both are accepted; ``package_ref`` takes
17+
precedence.
18+
1519
Note:
16-
The `key` field is serialized, but it is generally not trustworthy for restoration.
17-
During restore, a new key may be generated or overridden.
20+
The ref/key field is serialized but is generally not trustworthy for
21+
restoration. During restore, a new ref may be generated or overridden.
1822
"""
23+
1924
title = serializers.CharField(required=True)
20-
key = serializers.CharField(required=True)
25+
package_ref = serializers.CharField(required=False)
26+
key = serializers.CharField(required=False)
2127
description = serializers.CharField(required=True, allow_blank=True)
2228
created = serializers.DateTimeField(required=True, default_timezone=timezone.utc)
2329

30+
def validate(self, attrs):
31+
package_ref = attrs.pop("package_ref", None)
32+
legacy_key = attrs.pop("key", None)
33+
ref = package_ref or legacy_key
34+
if not ref:
35+
raise serializers.ValidationError("Either 'package_ref' or 'key' is required.")
36+
attrs["package_ref"] = ref # Normalise to 'package_ref' for create_learning_package.
37+
return attrs
38+
2439

2540
class LearningPackageMetadataSerializer(serializers.Serializer): # pylint: disable=abstract-method
2641
"""
@@ -40,18 +55,33 @@ class LearningPackageMetadataSerializer(serializers.Serializer): # pylint: disa
4055
class EntitySerializer(serializers.Serializer): # pylint: disable=abstract-method
4156
"""
4257
Serializer for publishable entities.
58+
59+
Archives created in Verawood or later write ``entity_ref``. Archives
60+
created in Ulmo use ``key``. Both are accepted; ``entity_ref`` takes
61+
precedence.
4362
"""
63+
4464
can_stand_alone = serializers.BooleanField(required=True)
45-
key = serializers.CharField(required=True)
65+
entity_ref = serializers.CharField(required=False)
66+
key = serializers.CharField(required=False)
4667
created = serializers.DateTimeField(required=True, default_timezone=timezone.utc)
4768

69+
def validate(self, attrs):
70+
entity_ref = attrs.pop("entity_ref", None)
71+
legacy_key = attrs.pop("key", None)
72+
ref = entity_ref or legacy_key
73+
if not ref:
74+
raise serializers.ValidationError("Either 'entity_ref' or 'key' is required.")
75+
attrs["entity_ref"] = ref
76+
return attrs
77+
4878

4979
class EntityVersionSerializer(serializers.Serializer): # pylint: disable=abstract-method
5080
"""
5181
Serializer for publishable entity versions.
5282
"""
5383
title = serializers.CharField(required=True)
54-
entity_key = serializers.CharField(required=True)
84+
entity_ref = serializers.CharField(required=True)
5585
created = serializers.DateTimeField(required=True, default_timezone=timezone.utc)
5686
version_num = serializers.IntegerField(required=True)
5787

@@ -78,6 +108,7 @@ def validate(self, attrs):
78108
``"{namespace}:{type_name}:{component_code}"``, so we fall back to
79109
parsing that for backwards compatibility.
80110
"""
111+
super().validate(attrs)
81112
component_section = attrs.pop("component", None)
82113
if component_section:
83114
# Verawood+ format: component_type and component_code are explicit.
@@ -92,14 +123,21 @@ def validate(self, attrs):
92123
) from exc
93124
component_type_obj = components_api.get_or_create_component_type(namespace, type_name)
94125
else:
95-
# Ulmo (legacy) format: parse the entity key.
96-
entity_key = attrs["key"]
126+
# Ulmo (legacy) format: parse the entity_ref (which ws normalized
127+
# from "key" in super.validate()) assuming the format:
128+
# (namespace, type_name, component_code). This parsing is
129+
# intentionally only here — entity_ref must not be parsed anywhere
130+
# else in the codebase. Verawood+ archives may not follow this
131+
# convention.
132+
entity_ref = attrs["entity_ref"]
97133
try:
98-
component_type_obj, component_code = (
99-
components_api.get_or_create_component_type_by_entity_key(entity_key)
100-
)
134+
namespace, type_name, component_code = entity_ref.split(":", 2)
101135
except ValueError as exc:
102-
raise serializers.ValidationError({"key": str(exc)})
136+
raise serializers.ValidationError(
137+
{"key": f"Invalid entity key format: {entity_ref!r}. "
138+
"Expected '{namespace}:{type_name}:{component_code}'."}
139+
) from exc
140+
component_type_obj = components_api.get_or_create_component_type(namespace, type_name)
103141
attrs["component_type"] = component_type_obj
104142
attrs["component_code"] = component_code
105143
return attrs
@@ -147,12 +185,13 @@ def validate(self, attrs):
147185
``container_code`` field inside [entity.container]. Archives created
148186
in Ulmo do not, so we fall back to using the entity key.
149187
"""
188+
super().validate(attrs)
150189
container = attrs.pop("container")
151190
# It is safe to do this after validate_container
152191
container_type = next(k for k in container if k in ("section", "subsection", "unit"))
153192
attrs["container_type"] = container_type
154-
# Verawood+: container_code is explicit. Ulmo: fall back to entity key.
155-
attrs["container_code"] = container.get("container_code") or attrs["key"]
193+
# Verawood+: container_code is explicit. Ulmo: fall back to entity_ref.
194+
attrs["container_code"] = container.get("container_code") or attrs["entity_ref"]
156195
return attrs
157196

158197

src/openedx_content/applets/backup_restore/toml.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,9 @@ def toml_learning_package(
4343
# Learning package main info
4444
section = tomlkit.table()
4545
section.add("title", learning_package.title)
46-
section.add("key", learning_package.key)
46+
# Write package_ref (Verawood+) and key (Ulmo back-compat).
47+
section.add("package_ref", learning_package.package_ref)
48+
section.add("key", learning_package.package_ref)
4749
section.add("description", learning_package.description)
4850
section.add("created", learning_package.created)
4951
section.add("updated", learning_package.updated)
@@ -89,8 +91,10 @@ def _get_toml_publishable_entity_table(
8991
"""
9092
entity_table = tomlkit.table()
9193
entity_table.add("can_stand_alone", entity.can_stand_alone)
92-
# Add key since the toml filename doesn't show the real key
93-
entity_table.add("key", entity.key)
94+
# Write entity_ref (Verawood+) and key (Ulmo back-compat) so that older
95+
# restore code can still read archives produced after this rename.
96+
entity_table.add("entity_ref", entity.entity_ref)
97+
entity_table.add("key", entity.entity_ref)
9498
entity_table.add("created", entity.created)
9599

96100
if not include_versions:
@@ -204,13 +208,13 @@ def toml_publishable_entity_version(version: PublishableEntityVersion) -> tomlki
204208
if hasattr(version, 'containerversion'):
205209
# If the version has a container version, add its children
206210
container_table = tomlkit.table()
207-
children = containers_api.get_container_children_entities_keys(version.containerversion)
211+
children = containers_api.get_container_children_entity_refs(version.containerversion)
208212
container_table.add("children", children)
209213
version_table.add("container", container_table)
210214
return version_table
211215

212216

213-
def toml_collection(collection: Collection, entity_keys: list[str]) -> str:
217+
def toml_collection(collection: Collection, entity_refs: list[str]) -> str:
214218
"""
215219
Create a TOML representation of a collection.
216220
@@ -228,7 +232,7 @@ def toml_collection(collection: Collection, entity_keys: list[str]) -> str:
228232
doc = tomlkit.document()
229233

230234
entities_array = tomlkit.array()
231-
entities_array.extend(entity_keys)
235+
entities_array.extend(entity_refs)
232236
entities_array.multiline(True)
233237

234238
collection_table = tomlkit.table()

0 commit comments

Comments
 (0)