Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
39ce524
temp: ignore .claude directory
kdmccormick Apr 1, 2026
c0a0fbc
feat!: Collection.key -> Collection.collection_code
kdmccormick Mar 30, 2026
200f68b
fix(squash): restore Content->Media docstring renames from 7cf1539
kdmccormick Apr 16, 2026
1b5b8cd
fix(squash): keep archive format using 'key' for collections, not 'co…
kdmccormick Apr 16, 2026
9f08503
fix(squash): add code_field_check() helper for DB-level regex constraint
kdmccormick Apr 16, 2026
5e472a2
fix(squash): squash migrations 0007-0010 into single migration
kdmccormick Apr 16, 2026
b7b9a4a
fix(squash): regex style; remove inaccurate migration comment
kdmccormick Apr 16, 2026
d55764c
fix(squash): renumber migration 0007->0008 after upstream/main rebase
kdmccormick Apr 16, 2026
ed97b23
feat!: Component.local_key -> Component.component_code
kdmccormick Apr 15, 2026
fd57876
fix(squash): renumber migrations for linear history after upstream re…
kdmccormick Apr 16, 2026
f4d32b8
fix(squash): squash component migrations into single migration
kdmccormick Apr 16, 2026
69fd649
feat!: Add Container.container_code field
kdmccormick Apr 1, 2026
926cc81
fix(squash): renumber migrations for linear history after upstream re…
kdmccormick Apr 16, 2026
0b32ccf
fix(squash): renumber migration after component squash
kdmccormick Apr 16, 2026
5283857
fixup(squash): fix tests
kdmccormick Apr 16, 2026
b519e96
feat!: Package and Entity `keys` are now opaque `refs`
kdmccormick Apr 1, 2026
530adbc
fix(squash): squash ref migrations into single migration
kdmccormick Apr 16, 2026
4986446
fix(squash): tests
kdmccormick Apr 16, 2026
b9d11db
fix(squash): replace SeparateDatabaseAndState+RunSQL with plain Alter…
kdmccormick Apr 16, 2026
09bbd04
fix(squash): derive entity_ref from code fields, don't accept it as a…
kdmccormick Apr 16, 2026
9fb3f04
feat!: ComponentVersionMedia.key -> ComponentVersionMedia.path
kdmccormick Apr 2, 2026
234f41f
fix(squash): squash media migrations into single migration
kdmccormick Apr 16, 2026
5c28c6f
fix(squash): replace SeparateDatabaseAndState+RunSQL with plain Alter…
kdmccormick Apr 16, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
.claude
*.py[cod]
__pycache__
.pytest_cache
Expand Down
22 changes: 11 additions & 11 deletions olx_importer/management/commands/load_components.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,33 +61,33 @@ def init_known_types(self):

def add_arguments(self, parser):
parser.add_argument("course_data_path", type=pathlib.Path)
parser.add_argument("learning_package_key", type=str)
parser.add_argument("learning_package_ref", type=str)

def handle(self, course_data_path, learning_package_key, **options):
def handle(self, course_data_path, learning_package_ref, **options):
self.course_data_path = course_data_path
self.learning_package_key = learning_package_key
self.load_course_data(learning_package_key)
self.learning_package_ref = learning_package_ref
self.load_course_data(learning_package_ref)

def get_course_title(self):
course_type_dir = self.course_data_path / "course"
course_xml_file = next(course_type_dir.glob("*.xml"))
course_root = ET.parse(course_xml_file).getroot()
return course_root.attrib.get("display_name", "Unknown Course")

def load_course_data(self, learning_package_key):
def load_course_data(self, learning_package_ref):
print(f"Importing course from: {self.course_data_path}")
now = datetime.now(timezone.utc)
title = self.get_course_title()

if content_api.learning_package_exists(learning_package_key):
if content_api.learning_package_exists(learning_package_ref):
raise CommandError(
f"{learning_package_key} already exists. "
f"{learning_package_ref} already exists. "
"This command currently only supports initial import."
)

with transaction.atomic():
self.learning_package = content_api.create_learning_package(
learning_package_key, title, created=now,
learning_package_ref, title, created=now,
)
for block_type in SUPPORTED_TYPES:
self.import_block_type(block_type, now) #, publish_log_entry)
Expand Down Expand Up @@ -140,7 +140,7 @@ def import_block_type(self, block_type_name, now): # , publish_log_entry):

for xml_file_path in block_data_path.glob("*.xml"):
components_found += 1
local_key = xml_file_path.stem
component_code = xml_file_path.stem

# Do some basic parsing of the content to see if it's even well
# constructed enough to add (or whether we should skip/error on it).
Expand All @@ -155,7 +155,7 @@ def import_block_type(self, block_type_name, now): # , publish_log_entry):
_component, component_version = content_api.create_component_and_version(
self.learning_package.id,
component_type=block_type,
local_key=local_key,
component_code=component_code,
title=display_name,
created=now,
created_by=None,
Expand All @@ -173,7 +173,7 @@ def import_block_type(self, block_type_name, now): # , publish_log_entry):
content_api.create_component_version_media(
component_version,
text_content.pk,
key="block.xml",
path="block.xml",
)

# Cycle through static assets references and add those as well...
Expand Down
14 changes: 8 additions & 6 deletions src/openedx_content/applets/backup_restore/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,28 @@

from django.contrib.auth.models import User as UserType # pylint: disable=imported-auth-user

from ..publishing.api import get_learning_package_by_key
from ..publishing.api import get_learning_package_by_ref
from .zipper import LearningPackageUnzipper, LearningPackageZipper


def create_zip_file(lp_key: str, path: str, user: UserType | None = None, origin_server: str | None = None) -> None:
def create_zip_file(
package_ref: str, path: str, user: UserType | None = None, origin_server: str | None = None
) -> None:
"""
Creates a dump zip file for the given learning package key at the given path.
The zip file contains a TOML representation of the learning package and its contents.

Can throw a NotFoundError at get_learning_package_by_key
Can throw a NotFoundError at get_learning_package_by_ref
"""
learning_package = get_learning_package_by_key(lp_key)
learning_package = get_learning_package_by_ref(package_ref)
LearningPackageZipper(learning_package, user, origin_server).create_zip(path)


def load_learning_package(path: str, key: str | None = None, user: UserType | None = None) -> dict:
def load_learning_package(path: str, package_ref: str | None = None, user: UserType | None = None) -> dict:
"""
Loads a learning package from a zip file at the given path.
Restores the learning package and its contents to the database.
Returns a dictionary with the status of the operation and any errors encountered.
"""
with zipfile.ZipFile(path, "r") as zipf:
return LearningPackageUnzipper(zipf, key, user).load()
return LearningPackageUnzipper(zipf, package_ref, user).load()
135 changes: 107 additions & 28 deletions src/openedx_content/applets/backup_restore/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,30 @@ class LearningPackageSerializer(serializers.Serializer): # pylint: disable=abst
"""
Serializer for learning packages.

Archives created in Verawood or later write ``package_ref``. Archives
created in Ulmo write ``key``. Both are accepted; ``package_ref`` takes
precedence.

Note:
The `key` field is serialized, but it is generally not trustworthy for restoration.
During restore, a new key may be generated or overridden.
The ref/key field is serialized but is generally not trustworthy for
restoration. During restore, a new ref may be generated or overridden.
"""

title = serializers.CharField(required=True)
key = serializers.CharField(required=True)
package_ref = serializers.CharField(required=False)
key = serializers.CharField(required=False)
description = serializers.CharField(required=True, allow_blank=True)
created = serializers.DateTimeField(required=True, default_timezone=timezone.utc)

def validate(self, attrs):
package_ref = attrs.pop("package_ref", None)
legacy_key = attrs.pop("key", None)
ref = package_ref or legacy_key
if not ref:
raise serializers.ValidationError("Either 'package_ref' or 'key' is required.")
attrs["package_ref"] = ref # Normalise to 'package_ref' for create_learning_package.
return attrs


class LearningPackageMetadataSerializer(serializers.Serializer): # pylint: disable=abstract-method
"""
Expand All @@ -40,40 +55,91 @@ class LearningPackageMetadataSerializer(serializers.Serializer): # pylint: disa
class EntitySerializer(serializers.Serializer): # pylint: disable=abstract-method
"""
Serializer for publishable entities.

Archives created in Verawood or later write ``entity_ref``. Archives
created in Ulmo use ``key``. Both are accepted; ``entity_ref`` takes
precedence.
"""

can_stand_alone = serializers.BooleanField(required=True)
key = serializers.CharField(required=True)
entity_ref = serializers.CharField(required=False)
key = serializers.CharField(required=False)
created = serializers.DateTimeField(required=True, default_timezone=timezone.utc)

def validate(self, attrs):
entity_ref = attrs.pop("entity_ref", None)
legacy_key = attrs.pop("key", None)
ref = entity_ref or legacy_key
if not ref:
raise serializers.ValidationError("Either 'entity_ref' or 'key' is required.")
attrs["entity_ref"] = ref
return attrs


class EntityVersionSerializer(serializers.Serializer): # pylint: disable=abstract-method
"""
Serializer for publishable entity versions.
"""
title = serializers.CharField(required=True)
entity_key = serializers.CharField(required=True)
entity_ref = serializers.CharField(required=True)
created = serializers.DateTimeField(required=True, default_timezone=timezone.utc)
version_num = serializers.IntegerField(required=True)


class ComponentSerializer(EntitySerializer): # pylint: disable=abstract-method
"""
Serializer for components.
Contains logic to convert entity_key to component_type and local_key.

Extracts component_type and component_code from the [entity.component]
section if present (archives created in Verawood or later). Falls back to
parsing the entity key for archives created in Ulmo.
"""

component = serializers.DictField(required=False)

def validate(self, attrs):
"""
Custom validation logic:
parse the entity_key into (component_type, local_key).
Custom validation logic: resolve component_type and component_code.

Archives created in Verawood or later supply an [entity.component]
section with ``component_type`` (e.g. "xblock.v1:problem") and
``component_code`` (e.g. "my_example"). Archives created in Ulmo only
have the entity ``key`` in the format
``"{namespace}:{type_name}:{component_code}"``, so we fall back to
parsing that for backwards compatibility.
"""
entity_key = attrs["key"]
try:
component_type_obj, local_key = components_api.get_or_create_component_type_by_entity_key(entity_key)
attrs["component_type"] = component_type_obj
attrs["local_key"] = local_key
except ValueError as exc:
raise serializers.ValidationError({"key": str(exc)})
super().validate(attrs)
component_section = attrs.pop("component", None)
if component_section:
# Verawood+ format: component_type and component_code are explicit.
component_type_str = component_section.get("component_type", "")
component_code = component_section.get("component_code", "")
try:
namespace, type_name = component_type_str.split(":", 1)
except ValueError as exc:
raise serializers.ValidationError(
{"component": f"Invalid component_type format: {component_type_str!r}. "
"Expected '{namespace}:{type_name}'."}
) from exc
component_type_obj = components_api.get_or_create_component_type(namespace, type_name)
else:
# Ulmo (legacy) format: parse the entity_ref (which ws normalized
# from "key" in super.validate()) assuming the format:
# (namespace, type_name, component_code). This parsing is
# intentionally only here — entity_ref must not be parsed anywhere
# else in the codebase. Verawood+ archives may not follow this
# convention.
entity_ref = attrs["entity_ref"]
try:
namespace, type_name, component_code = entity_ref.split(":", 2)
except ValueError as exc:
raise serializers.ValidationError(
{"key": f"Invalid entity key format: {entity_ref!r}. "
"Expected '{namespace}:{type_name}:{component_code}'."}
) from exc
component_type_obj = components_api.get_or_create_component_type(namespace, type_name)
attrs["component_type"] = component_type_obj
attrs["component_code"] = component_code
return attrs


Expand All @@ -86,35 +152,46 @@ class ComponentVersionSerializer(EntityVersionSerializer): # pylint: disable=ab
class ContainerSerializer(EntitySerializer): # pylint: disable=abstract-method
"""
Serializer for containers.

Extracts container_code from the [entity.container] section.
Archives created in Verawood or later include an explicit
``container_code`` field. Archives created in Ulmo do not, so we
fall back to using the entity key as the container_code.
"""

container = serializers.DictField(required=True)

def validate_container(self, value):
"""
Custom validation logic for the container field.
Ensures that the container dict has exactly one key which is one of
"section", "subsection", or "unit" values.
Ensures that the container dict has exactly one type key ("section",
"subsection", or "unit"), optionally alongside "container_code".
"""
errors = []
if not isinstance(value, dict) or len(value) != 1:
errors.append("Container must be a dict with exactly one key.")
if len(value) == 1: # Only check the key if there is exactly one
container_type = list(value.keys())[0]
if container_type not in ("section", "subsection", "unit"):
errors.append(f"Invalid container value: {container_type}")
type_keys = [k for k in value if k in ("section", "subsection", "unit")]
if len(type_keys) != 1:
errors.append(
"Container must have exactly one type key: 'section', 'subsection', or 'unit'."
)
if errors:
raise serializers.ValidationError(errors)
return value

def validate(self, attrs):
"""
Custom validation logic:
parse the container dict to extract the container type.
Custom validation logic: extract container_type and container_code.

Archives created in Verawood or later supply an explicit
``container_code`` field inside [entity.container]. Archives created
in Ulmo do not, so we fall back to using the entity key.
"""
container = attrs["container"]
container_type = list(container.keys())[0] # It is safe to do this after validate_container
super().validate(attrs)
container = attrs.pop("container")
# It is safe to do this after validate_container
container_type = next(k for k in container if k in ("section", "subsection", "unit"))
attrs["container_type"] = container_type
attrs.pop("container") # Remove the container field after processing
# Verawood+: container_code is explicit. Ulmo: fall back to entity_ref.
attrs["container_code"] = container.get("container_code") or attrs["entity_ref"]
return attrs


Expand Down Expand Up @@ -156,6 +233,8 @@ class CollectionSerializer(serializers.Serializer): # pylint: disable=abstract-
Serializer for collections.
"""
title = serializers.CharField(required=True)
# Note: the model field is now Collection.collection_code, but the archive
# format still uses "key". A future v2 format may align the name.
key = serializers.CharField(required=True)
description = serializers.CharField(required=True, allow_blank=True)
entities = serializers.ListField(
Expand Down
35 changes: 27 additions & 8 deletions src/openedx_content/applets/backup_restore/toml.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ def toml_learning_package(
# Learning package main info
section = tomlkit.table()
section.add("title", learning_package.title)
section.add("key", learning_package.key)
# Write package_ref (Verawood+) and key (Ulmo back-compat).
section.add("package_ref", learning_package.package_ref)
section.add("key", learning_package.package_ref)
section.add("description", learning_package.description)
section.add("created", learning_package.created)
section.add("updated", learning_package.updated)
Expand Down Expand Up @@ -89,8 +91,10 @@ def _get_toml_publishable_entity_table(
"""
entity_table = tomlkit.table()
entity_table.add("can_stand_alone", entity.can_stand_alone)
# Add key since the toml filename doesn't show the real key
entity_table.add("key", entity.key)
# Write entity_ref (Verawood+) and key (Ulmo back-compat) so that older
# restore code can still read archives produced after this rename.
entity_table.add("entity_ref", entity.entity_ref)
entity_table.add("key", entity.entity_ref)
entity_table.add("created", entity.created)

if not include_versions:
Expand All @@ -108,12 +112,25 @@ def _get_toml_publishable_entity_table(
published_table.add(tomlkit.comment("unpublished: no published_version_num"))
entity_table.add("published", published_table)

if hasattr(entity, "component"):
component = entity.component
component_table = tomlkit.table()
# Write component_type and component_code explicitly so that restore
# (Verawood and later) does not need to parse the entity key.
component_table.add("component_type", str(component.component_type))
component_table.add("component_code", component.component_code)
entity_table.add("component", component_table)

if hasattr(entity, "container"):
container = entity.container
container_table = tomlkit.table()
# Write container_code explicitly so that restore (Verawood and later)
# does not need to parse the entity key.
container_table.add("container_code", container.container_code)
container_types = ["section", "subsection", "unit"]

for container_type in container_types:
if hasattr(entity.container, container_type):
if hasattr(container, container_type):
container_table.add(container_type, tomlkit.table())
break # stop after the first match

Expand Down Expand Up @@ -191,13 +208,13 @@ def toml_publishable_entity_version(version: PublishableEntityVersion) -> tomlki
if hasattr(version, 'containerversion'):
# If the version has a container version, add its children
container_table = tomlkit.table()
children = containers_api.get_container_children_entities_keys(version.containerversion)
children = containers_api.get_container_children_entity_refs(version.containerversion)
container_table.add("children", children)
version_table.add("container", container_table)
return version_table


def toml_collection(collection: Collection, entity_keys: list[str]) -> str:
def toml_collection(collection: Collection, entity_refs: list[str]) -> str:
"""
Create a TOML representation of a collection.

Expand All @@ -215,12 +232,14 @@ def toml_collection(collection: Collection, entity_keys: list[str]) -> str:
doc = tomlkit.document()

entities_array = tomlkit.array()
entities_array.extend(entity_keys)
entities_array.extend(entity_refs)
entities_array.multiline(True)

collection_table = tomlkit.table()
collection_table.add("title", collection.title)
collection_table.add("key", collection.key)
# Note: the model field is now Collection.collection_code, but the archive
# format still uses "key". A future v2 format may align the name.
collection_table.add("key", collection.collection_code)
collection_table.add("description", collection.description)
collection_table.add("created", collection.created)
collection_table.add("entities", entities_array)
Expand Down
Loading
Loading