From adbc6c4cf12af74c38444dfc73a4039f79c3b041 Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Mon, 27 Apr 2026 13:05:01 -0400 Subject: [PATCH 1/2] feat!: remove create_component_version_media Media associations must now be specified at the time when new ComponentVersions are created: * create_component_version * create_component_and_version * create_next_component_version ComponentVersions are intended to be immutable, and having create_component_version_media meant that we were encouraging a pattern where people would create a component version first, and then make modifications to it after the fact. This bumps the version to 0.47.0. --- src/openedx_content/applets/components/api.py | 177 ++++++++++-------- src/openedx_content/applets/media/models.py | 12 ++ .../migrations/0014_typed_media_id.py | 28 +++ src/openedx_core/__init__.py | 2 +- .../applets/backup_restore/test_backup.py | 36 ++-- .../applets/components/test_api.py | 76 ++++++-- .../applets/components/test_assets.py | 36 ++-- 7 files changed, 224 insertions(+), 143 deletions(-) create mode 100644 src/openedx_content/migrations/0014_typed_media_id.py diff --git a/src/openedx_content/applets/components/api.py b/src/openedx_content/applets/components/api.py index 4c9f136a1..051506a49 100644 --- a/src/openedx_content/applets/components/api.py +++ b/src/openedx_content/applets/components/api.py @@ -15,6 +15,7 @@ import mimetypes from datetime import datetime from enum import StrEnum, auto +from functools import cache from logging import getLogger from pathlib import Path from uuid import UUID @@ -24,6 +25,7 @@ from django.http.response import HttpResponse, HttpResponseNotFound from ..media import api as media_api +from ..media.models import Media from ..publishing import api as publishing_api from .models import Component, ComponentType, ComponentVersion, ComponentVersionMedia, LearningPackage @@ -45,7 +47,6 @@ "component_exists_by_code", "get_collection_components", "get_components", - "create_component_version_media", "look_up_component_version_media", "AssetError", "get_redirect_response_for_component_asset", @@ -115,6 +116,8 @@ def create_component_version( title: str, created: datetime, created_by: int | None, + *, + media: dict[str, Media.ID | Media | bytes] | None = None, ) -> ComponentVersion: """ Create a new ComponentVersion @@ -131,13 +134,16 @@ def create_component_version( publishable_entity_version=publishable_entity_version, component_id=component_id, ) + if media: + _set_component_version_media(component_version, media, created=created) + return component_version def create_next_component_version( component_id: Component.ID, /, - media_to_replace: dict[str, int | None | bytes], + media_to_replace: dict[str, Media.ID | Media | bytes | None], created: datetime, title: str | None = None, created_by: int | None = None, @@ -191,8 +197,6 @@ def create_next_component_version( Why not use create_component_version? The main reason is that we want to reuse the logic to create a static file component from a dictionary. - TODO: Have to add learning_downloadable info to this when it comes time to - support static asset download. """ # This needs to grab the highest version_num for this Publishable Entity. # This will often be the Draft version, but not always. For instance, if @@ -225,50 +229,31 @@ def create_next_component_version( publishable_entity_version=publishable_entity_version, component_id=component_id, ) - # First copy the new stuff over... - for key, media_pk_or_bytes in media_to_replace.items(): - # If the media_pk is None, it means we want to remove the - # media represented by our key from the next version. Otherwise, - # we add our key->media_pk mapping to the next version. - if media_pk_or_bytes is not None: - if isinstance(media_pk_or_bytes, bytes): - file_path, file_media = key, media_pk_or_bytes - media_type_str, _encoding = mimetypes.guess_type(file_path) - # We use "application/octet-stream" as a generic fallback media type, per - # RFC 2046: https://datatracker.ietf.org/doc/html/rfc2046 - media_type_str = media_type_str or "application/octet-stream" - media_type = media_api.get_or_create_media_type(media_type_str) - media = media_api.get_or_create_file_media( - component.learning_package.id, - media_type.id, - data=file_media, - created=created, - ) - media_pk = media.pk - else: - media_pk = media_pk_or_bytes - ComponentVersionMedia.objects.create( - media_id=media_pk, - component_version=component_version, - path=key, - ) - if ignore_previous_media: - return component_version - - # Now copy any old associations that existed, as long as they aren't - # in conflict with the new stuff or marked for deletion. - last_version_media_mapping = ComponentVersionMedia.objects \ - .filter(component_version=last_version) - for cvrc in last_version_media_mapping: - if cvrc.path not in media_to_replace: - ComponentVersionMedia.objects.create( - media_id=cvrc.media_id, - component_version=component_version, - path=cvrc.path, + if ignore_previous_media or last_version is None: + paths_to_media = { + path: media + for path, media in media_to_replace.items() + if media is not None # Ignore deletion entries in this case. + } + else: + # Most of the time, we're adding our media changes as a delta on top + # of the last version's media. + previous_media = { + cvm.path: cvm.media_id + for cvm in ComponentVersionMedia.objects.filter( + component_version=last_version ) + } + paths_to_media = { + path: media + for path, media in (previous_media | media_to_replace).items() + if media is not None # "media is None" means "delete this" + } - return component_version + _set_component_version_media(component_version, paths_to_media, created) + + return component_version def create_component_and_version( # pylint: disable=too-many-positional-arguments @@ -281,6 +266,7 @@ def create_component_and_version( # pylint: disable=too-many-positional-argumen created_by: int | None = None, *, can_stand_alone: bool = True, + media: dict[str, Media.ID | Media | bytes] | None = None, ) -> tuple[Component, ComponentVersion]: """ Create a Component and associated ComponentVersion atomically. @@ -300,8 +286,76 @@ def create_component_and_version( # pylint: disable=too-many-positional-argumen title=title, created=created, created_by=created_by, + media=media or {}, ) - return (component, component_version) + + return (component, component_version) + + +def _set_component_version_media( + version: ComponentVersion, + paths_to_media_values: dict[str, Media.ID | Media | bytes], + created: datetime, +): + """ + Internal helper to set the Media for this ComponentVersion. + + Only call this when we're first initializing a ComponentVersion. + + Media can be specified as ``bytes`` for testing convenience, but you will + almost always want to create a Media object first in actual app code, + because that will give you better control over the MIME type and storage + specifics (file vs. database). + + Note that unlike create_next_component_version(), we don't accept `None` as + a media value here. This function does not carry over any Media associations + from past ComponentVersions, so our "None means Delete" convention doesn't + apply here. + """ + @cache # want to avoid repeated lookups, e.g. a component with ten images + def cached_media_type(media_type_str): + return media_api.get_or_create_media_type(media_type_str) + + # We allow a range of values to be in paths_to_media_values, but we want to + # normalize to media_ids for our bulk insert later. + paths_to_media_ids: dict[str, Media.ID] = {} + + for path, media_value in paths_to_media_values.items(): + match media_value: + case int(): # MediaID + media_id = media_value + case Media(): + media_id = media_value.id + case bytes(): + media_type_str, _encoding = mimetypes.guess_type(path) + # We use "application/octet-stream" as a generic fallback media type, per + # RFC 2046: https://datatracker.ietf.org/doc/html/rfc2046 + media_type_str = media_type_str or "application/octet-stream" + media_type = cached_media_type(media_type_str) + media = media_api.get_or_create_file_media( + version.component.learning_package.id, + media_type.id, + data=media_value, + created=created, + ) + media_id = media.id + case _: + raise ValueError(f"Invalid object for paths_to_media Media: {media_value!r}") + + # Don't allow whitespace, absolute paths, or Windows-style paths + normalized_path = path.strip().replace('\\', '/').lstrip('/') + paths_to_media_ids[normalized_path] = media_id + + ComponentVersionMedia.objects.bulk_create( + [ + ComponentVersionMedia( + component_version=version, + path=normalized_path, + media_id=media_id, + ) + for normalized_path, media_id in paths_to_media_ids.items() + ] + ) def get_component(component_id: Component.ID, /) -> Component: @@ -462,37 +516,6 @@ def look_up_component_version_media( ).get(queries) -def create_component_version_media( - component_version_id: int, - media_id: int, - /, - path: str, -) -> ComponentVersionMedia: - """ - Add a Media to the given ComponentVersion - - We don't allow paths that would be absolute, e.g. ones that start with - '/'. Storing these causes headaches with building relative paths and because - of mismatches with things that expect a leading slash and those that don't. - So for safety and consistency, we strip off leading slashes and emit a - warning when we do. - """ - if path.startswith('/'): - logger.warning( - "Absolute paths are not supported: " - f"removed leading '/' from ComponentVersion {component_version_id} " - f"media path: {repr(path)} (media_id: {media_id})" - ) - path = path.lstrip('/') - - cvrc, _created = ComponentVersionMedia.objects.get_or_create( - component_version_id=component_version_id, - media_id=media_id, - path=path, - ) - return cvrc - - class AssetError(StrEnum): """Error codes related to fetching ComponentVersion assets.""" ASSET_PATH_NOT_FOUND_FOR_COMPONENT_VERSION = auto() diff --git a/src/openedx_content/applets/media/models.py b/src/openedx_content/applets/media/models.py index 755405f77..30836358d 100644 --- a/src/openedx_content/applets/media/models.py +++ b/src/openedx_content/applets/media/models.py @@ -7,6 +7,7 @@ from functools import cache, cached_property from logging import getLogger +from typing import NewType from django.conf import settings from django.core.exceptions import ImproperlyConfigured, ValidationError @@ -18,6 +19,7 @@ from openedx_django_lib.fields import ( MultiCollationTextField, + TypedBigAutoField, case_insensitive_char_field, hash_field, manual_date_time_field, @@ -240,6 +242,16 @@ class Media(models.Model): # could be as much as 200K of data if we had nothing but emojis. MAX_TEXT_LENGTH = 50_000 + # Custom type for our primary key, to make it more type-safe when using in + # API calls. + MediaID = NewType("MediaID", int) + type ID = MediaID + + class IDField(TypedBigAutoField[ID]): # Boilerplate for fully-typed ID field. + pass + + id = IDField(primary_key=True) + objects: models.Manager[Media] = WithRelationsManager('media_type') learning_package = models.ForeignKey(LearningPackage, on_delete=models.CASCADE) diff --git a/src/openedx_content/migrations/0014_typed_media_id.py b/src/openedx_content/migrations/0014_typed_media_id.py new file mode 100644 index 000000000..5bb900831 --- /dev/null +++ b/src/openedx_content/migrations/0014_typed_media_id.py @@ -0,0 +1,28 @@ +# Generated by Django 5.2.12 on 2026-04-28 14:40 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + """ + Media.id was previously auto-created by Django (hence auto_created=True, + verbose_name='ID' in the original migration). It is now explicitly declared + on the model. The database column is unchanged (still BIGINT AUTO_INCREMENT + PRIMARY KEY), so database_operations is empty. + """ + dependencies = [ + ('openedx_content', '0013_unicode_container_component_codes'), + ] + + operations = [ + migrations.SeparateDatabaseAndState( + state_operations=[ + migrations.AlterField( + model_name='media', + name='id', + field=models.BigAutoField(primary_key=True, serialize=False), + ), + ], + database_operations=[], + ) + ] diff --git a/src/openedx_core/__init__.py b/src/openedx_core/__init__.py index 863a0e058..a835233dc 100644 --- a/src/openedx_core/__init__.py +++ b/src/openedx_core/__init__.py @@ -6,4 +6,4 @@ """ # The version for the entire repository -__version__ = "0.46.0" +__version__ = "0.47.0" diff --git a/tests/openedx_content/applets/backup_restore/test_backup.py b/tests/openedx_content/applets/backup_restore/test_backup.py index 5dddce65a..1c9e5cf8b 100644 --- a/tests/openedx_content/applets/backup_restore/test_backup.py +++ b/tests/openedx_content/applets/backup_restore/test_backup.py @@ -93,23 +93,19 @@ def setUpTestData(cls): published_at=cls.now, ) - new_problem_version = api.create_next_component_version( - cls.published_component.id, - title="My published problem draft v2", - media_to_replace={}, - created=cls.now, - ) - new_txt_media = api.get_or_create_text_media( cls.learning_package.id, text_media_type.id, text="This is some data", created=cls.now, ) - api.create_component_version_media( - new_problem_version.pk, - new_txt_media.pk, - path="hello.txt", + api.create_next_component_version( + cls.published_component.id, + title="My published problem draft v2", + media_to_replace={ + 'hello.txt': new_txt_media + }, + created=cls.now, ) # Create a Draft component, one in each learning package @@ -122,23 +118,19 @@ def setUpTestData(cls): created_by=cls.user.id, ) - new_html_version = api.create_next_component_version( - cls.draft_component.id, - title="My draft html v2", - media_to_replace={}, - created=cls.now, - ) - cls.html_asset_media = api.get_or_create_file_media( cls.learning_package.id, html_media_type.id, data=b"hello world!", created=cls.now, ) - api.create_component_version_media( - new_html_version.pk, - cls.html_asset_media.id, - path="static/other/subdirectory/hello.html", + api.create_next_component_version( + cls.draft_component.id, + title="My draft html v2", + media_to_replace={ + "static/other/subdirectory/hello.html": cls.html_asset_media + }, + created=cls.now, ) components = api.get_publishable_entities(cls.learning_package) diff --git a/tests/openedx_content/applets/components/test_api.py b/tests/openedx_content/applets/components/test_api.py index 785a142e3..2ae052eba 100644 --- a/tests/openedx_content/applets/components/test_api.py +++ b/tests/openedx_content/applets/components/test_api.py @@ -438,24 +438,23 @@ def setUpTestData(cls) -> None: cls.text_media_type = media_api.get_or_create_media_type("text/plain") def test_add(self): - new_version = components_api.create_component_version( - self.problem.id, - version_num=1, - title="My Title", - created=self.now, - created_by=None, - ) new_media = media_api.get_or_create_text_media( self.learning_package.id, self.text_media_type.id, text="This is some data", created=self.now, ) - components_api.create_component_version_media( - new_version.pk, - new_media.pk, - path="my/path/to/hello.txt", + components_api.create_component_version( + self.problem.id, + version_num=1, + title="My Title", + created=self.now, + created_by=None, + media={ + "my/path/to/hello.txt": new_media + } ) + # re-fetch from the database to check to see if we wrote it correctly new_version = components_api.get_component(self.problem.id) \ .versions \ @@ -466,20 +465,59 @@ def test_add(self): ) # Write the same content again, but to an absolute path (should auto- - # strip) the leading '/'s. - components_api.create_component_version_media( - new_version.pk, - new_media.pk, - path="//nested/path/hello.txt", + # strip the leading '/'s) and Windows style separators (should convert + # to "/"). + new_version = components_api.create_next_component_version( + self.problem.id, + media_to_replace={ + "//nested\\path\\hello.txt": new_media + }, + created=self.now, ) - new_version = components_api.get_component(self.problem.id) \ - .versions \ - .get(publishable_entity_version__version_num=1) assert ( new_media == new_version.media.get(componentversionmedia__path="nested/path/hello.txt") ) + def test_create_with_media_or_id(self): + """Test that we can create Media associations with Media or Media.ID.""" + python_src_media = media_api.get_or_create_file_media( + self.learning_package.id, + self.text_media_type.id, + data=b"print('hello world!')", + created=self.now, + ) + _problem, version = components_api.create_component_and_version( + self.learning_package.id, + component_type=self.problem_type, + component_code="media_problem_1", + title="Testing Media Associations", + created=self.now, + media={ + 'hello.py': python_src_media, + 'hello2.py': python_src_media.id, + 'hello.txt': b"Bytes are tested in test_bytes_content()" + } + ) + assert ( + python_src_media == + version.media.get(componentversionmedia__path="hello.py") == + version.media.get(componentversionmedia__path="hello2.py") + ) + + # But we don't accept None as a media value + with self.assertRaises(ValueError): + components_api.create_component_and_version( + self.learning_package.id, + component_type=self.problem_type, + component_code="media_problem_2", + title="This shouldn't work", + created=self.now, + media={ + "block.xml": None + } + ) + def test_bytes_content(self): bytes_media = b'raw content' diff --git a/tests/openedx_content/applets/components/test_assets.py b/tests/openedx_content/applets/components/test_assets.py index d580e86b2..27a37abee 100644 --- a/tests/openedx_content/applets/components/test_assets.py +++ b/tests/openedx_content/applets/components/test_assets.py @@ -56,15 +56,6 @@ def setUpTestData(cls) -> None: package_ref="ComponentTestCase-test-key", title="Components Test Case Learning Package", ) - cls.component, cls.component_version = components_api.create_component_and_version( - cls.learning_package.id, - component_type=cls.problem_type, - component_code="my_problem", - title="My Problem", - created=cls.now, - created_by=None, - ) - # ProblemBlock content that is stored as text Content, not a file. cls.problem_media = media_api.get_or_create_text_media( cls.learning_package.id, @@ -72,11 +63,6 @@ def setUpTestData(cls) -> None: text="(pretend problem OLX is here)", created=cls.now, ) - components_api.create_component_version_media( - cls.component_version.pk, - cls.problem_media.id, - path="block.xml", - ) # Python source file, stored as a file. This is hypothetical, as we # don't actually support bundling grader files like this today. @@ -86,12 +72,6 @@ def setUpTestData(cls) -> None: data=b"print('hello world!')", created=cls.now, ) - components_api.create_component_version_media( - cls.component_version.pk, - cls.python_source_asset.id, - path="src/grader.py", - ) - # An HTML file that is student downloadable cls.html_asset_media = media_api.get_or_create_file_media( cls.learning_package.id, @@ -99,10 +79,18 @@ def setUpTestData(cls) -> None: data=b"hello world!", created=cls.now, ) - components_api.create_component_version_media( - cls.component_version.pk, - cls.html_asset_media.id, - path="static/hello.html", + cls.component, cls.component_version = components_api.create_component_and_version( + cls.learning_package.id, + component_type=cls.problem_type, + component_code="my_problem", + title="My Problem", + created=cls.now, + created_by=None, + media={ + "block.xml": cls.problem_media, + "src/grader.py": cls.python_source_asset, + "static/hello.html": cls.html_asset_media + } ) def test_no_component_version(self): From cdab85db38e38b114898513e02a89b3a9c74041e Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Tue, 28 Apr 2026 20:18:23 -0400 Subject: [PATCH 2/2] temp: comments and making path checking stricter --- src/openedx_content/applets/components/api.py | 44 +++++++++++++--- .../applets/components/test_api.py | 52 ++++++++++++++----- 2 files changed, 76 insertions(+), 20 deletions(-) diff --git a/src/openedx_content/applets/components/api.py b/src/openedx_content/applets/components/api.py index 051506a49..2a08fb159 100644 --- a/src/openedx_content/applets/components/api.py +++ b/src/openedx_content/applets/components/api.py @@ -121,6 +121,17 @@ def create_component_version( ) -> ComponentVersion: """ Create a new ComponentVersion + + The ``media`` parameter is a dict of file paths to Media-like things (a + Media.ID, Media model object, or simple bytes). This is the Media that we + want to associate with the new ComponentVersion. This will typically include + a "block.xml" for the XBlock OLX definition, and possibly some static files + like "static/diagram.png". + + Media can be specified as ``bytes`` for testing convenience, but you will + almost always want to create a Media object first in actual app code, + because that will give you better control over the MIME type and storage + specifics (file vs. database). """ with atomic(): publishable_entity_version = publishing_api.create_publishable_entity_version( @@ -175,8 +186,14 @@ def create_next_component_version( API or send the media bytes as part of ``media_to_replace`` values. The ``media_to_replace`` dict is a mapping of strings representing the - local path/key for a file, to ``Media.id`` or media bytes values. Using - `None` for a value in this dict means to delete that key in the next version. + local path/key for a file, to ``Media.id``, ``Media`` object, or media bytes + values. Passing media as ``bytes`` is useful for testing purposes, but you + will almost always want to create a Media object first in actual app code, + because that will give you better control over the resulting Media's MIME + type and storage specifics (file vs. database). + + Using `None` for a value in this dict means to delete that key in the next + version. Make sure to wrap the function call on a atomic statement: ``with transaction.atomic():`` @@ -316,16 +333,33 @@ def _set_component_version_media( def cached_media_type(media_type_str): return media_api.get_or_create_media_type(media_type_str) + def valid_path(path): + """No absolute paths, whitespace, or backslashes (Windows separators)""" + return path == path.strip().lstrip('/') and '\\' not in path + # We allow a range of values to be in paths_to_media_values, but we want to # normalize to media_ids for our bulk insert later. paths_to_media_ids: dict[str, Media.ID] = {} + cv_learning_package_id = version.component.learning_package_id + for path, media_value in paths_to_media_values.items(): + if not valid_path(path): + raise ValueError(f"{path!r} is an invalid media path ({version!r})") + match media_value: case int(): # MediaID media_id = media_value case Media(): media_id = media_value.id + if media_value.learning_package_id != cv_learning_package_id: + raise ValueError( + f"Media LearningPackage does not match Component: " + f"Tried to create ComponentVersion {version!r} " + f"(Learning Package ID {cv_learning_package_id!r}) " + f"with Media {media_value!r} " + f"(Learning Package ID {media_value.learning_package_id!r})" + ) case bytes(): media_type_str, _encoding = mimetypes.guess_type(path) # We use "application/octet-stream" as a generic fallback media type, per @@ -333,7 +367,7 @@ def cached_media_type(media_type_str): media_type_str = media_type_str or "application/octet-stream" media_type = cached_media_type(media_type_str) media = media_api.get_or_create_file_media( - version.component.learning_package.id, + cv_learning_package_id, media_type.id, data=media_value, created=created, @@ -342,9 +376,7 @@ def cached_media_type(media_type_str): case _: raise ValueError(f"Invalid object for paths_to_media Media: {media_value!r}") - # Don't allow whitespace, absolute paths, or Windows-style paths - normalized_path = path.strip().replace('\\', '/').lstrip('/') - paths_to_media_ids[normalized_path] = media_id + paths_to_media_ids[path] = media_id ComponentVersionMedia.objects.bulk_create( [ diff --git a/tests/openedx_content/applets/components/test_api.py b/tests/openedx_content/applets/components/test_api.py index 2ae052eba..b56eb67af 100644 --- a/tests/openedx_content/applets/components/test_api.py +++ b/tests/openedx_content/applets/components/test_api.py @@ -464,20 +464,22 @@ def test_add(self): new_version.media.get(componentversionmedia__path="my/path/to/hello.txt") ) - # Write the same content again, but to an absolute path (should auto- - # strip the leading '/'s) and Windows style separators (should convert - # to "/"). - new_version = components_api.create_next_component_version( - self.problem.id, - media_to_replace={ - "//nested\\path\\hello.txt": new_media - }, - created=self.now, - ) - assert ( - new_media == - new_version.media.get(componentversionmedia__path="nested/path/hello.txt") - ) + # Invalid paths raise ValueError + invalid_paths = [ + "/absolute/paths/not-allowed.txt", + " no/whitespace.txt", + "no/whitespace.txt ", + "no\\windows\\style\\seprators", + ] + for invalid_path in invalid_paths: + with self.assertRaises(ValueError): + new_version = components_api.create_next_component_version( + self.problem.id, + media_to_replace={ + invalid_path: new_media + }, + created=self.now, + ) def test_create_with_media_or_id(self): """Test that we can create Media associations with Media or Media.ID.""" @@ -518,6 +520,28 @@ def test_create_with_media_or_id(self): } ) + # We also don't allow a different LearningPackage's Media to be assigned + different_lp = publishing_api.create_learning_package( + "different_lp", title="Media LP" + ) + outside_media = media_api.get_or_create_file_media( + different_lp.id, + self.text_media_type.id, + data=b'Hello outside media!', + created=self.now, + ) + with self.assertRaises(ValueError): + components_api.create_component_and_version( + self.learning_package.id, + component_type=self.problem_type, + component_code="media_problem_3", + title="This also shouldn't work", + created=self.now, + media={ + "invalid-outside-media.txt": outside_media, + } + ) + def test_bytes_content(self): bytes_media = b'raw content'