Skip to content

Commit 0abc619

Browse files
authored
feat!: remove create_component_version_media (#565)
Media associations must now be specified at the time when new ComponentVersions are created: * create_component_version * create_component_and_version * create_next_component_version ComponentVersions are intended to be immutable, and having create_component_version_media meant that we were encouraging a pattern where people would create a component version first, and then make modifications to it after the fact. Paths are now more strictly checked for whitespace and to make sure they are not absolute paths. This bumps the version to 0.47.0. * temp: comments and making path checking stricter
1 parent 6777915 commit 0abc619

7 files changed

Lines changed: 285 additions & 148 deletions

File tree

src/openedx_content/applets/components/api.py

Lines changed: 134 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import mimetypes
1616
from datetime import datetime
1717
from enum import StrEnum, auto
18+
from functools import cache
1819
from logging import getLogger
1920
from pathlib import Path
2021
from uuid import UUID
@@ -24,6 +25,7 @@
2425
from django.http.response import HttpResponse, HttpResponseNotFound
2526

2627
from ..media import api as media_api
28+
from ..media.models import Media
2729
from ..publishing import api as publishing_api
2830
from .models import Component, ComponentType, ComponentVersion, ComponentVersionMedia, LearningPackage
2931

@@ -45,7 +47,6 @@
4547
"component_exists_by_code",
4648
"get_collection_components",
4749
"get_components",
48-
"create_component_version_media",
4950
"look_up_component_version_media",
5051
"AssetError",
5152
"get_redirect_response_for_component_asset",
@@ -115,9 +116,22 @@ def create_component_version(
115116
title: str,
116117
created: datetime,
117118
created_by: int | None,
119+
*,
120+
media: dict[str, Media.ID | Media | bytes] | None = None,
118121
) -> ComponentVersion:
119122
"""
120123
Create a new ComponentVersion
124+
125+
The ``media`` parameter is a dict of file paths to Media-like things (a
126+
Media.ID, Media model object, or simple bytes). This is the Media that we
127+
want to associate with the new ComponentVersion. This will typically include
128+
a "block.xml" for the XBlock OLX definition, and possibly some static files
129+
like "static/diagram.png".
130+
131+
Media can be specified as ``bytes`` for testing convenience, but you will
132+
almost always want to create a Media object first in actual app code,
133+
because that will give you better control over the MIME type and storage
134+
specifics (file vs. database).
121135
"""
122136
with atomic():
123137
publishable_entity_version = publishing_api.create_publishable_entity_version(
@@ -131,13 +145,16 @@ def create_component_version(
131145
publishable_entity_version=publishable_entity_version,
132146
component_id=component_id,
133147
)
148+
if media:
149+
_set_component_version_media(component_version, media, created=created)
150+
134151
return component_version
135152

136153

137154
def create_next_component_version(
138155
component_id: Component.ID,
139156
/,
140-
media_to_replace: dict[str, int | None | bytes],
157+
media_to_replace: dict[str, Media.ID | Media | bytes | None],
141158
created: datetime,
142159
title: str | None = None,
143160
created_by: int | None = None,
@@ -169,8 +186,14 @@ def create_next_component_version(
169186
API or send the media bytes as part of ``media_to_replace`` values.
170187
171188
The ``media_to_replace`` dict is a mapping of strings representing the
172-
local path/key for a file, to ``Media.id`` or media bytes values. Using
173-
`None` for a value in this dict means to delete that key in the next version.
189+
local path/key for a file, to ``Media.id``, ``Media`` object, or media bytes
190+
values. Passing media as ``bytes`` is useful for testing purposes, but you
191+
will almost always want to create a Media object first in actual app code,
192+
because that will give you better control over the resulting Media's MIME
193+
type and storage specifics (file vs. database).
194+
195+
Using `None` for a value in this dict means to delete that key in the next
196+
version.
174197
175198
Make sure to wrap the function call on a atomic statement:
176199
``with transaction.atomic():``
@@ -191,8 +214,6 @@ def create_next_component_version(
191214
Why not use create_component_version?
192215
The main reason is that we want to reuse the logic to create a static file component from a dictionary.
193216
194-
TODO: Have to add learning_downloadable info to this when it comes time to
195-
support static asset download.
196217
"""
197218
# This needs to grab the highest version_num for this Publishable Entity.
198219
# This will often be the Draft version, but not always. For instance, if
@@ -225,50 +246,31 @@ def create_next_component_version(
225246
publishable_entity_version=publishable_entity_version,
226247
component_id=component_id,
227248
)
228-
# First copy the new stuff over...
229-
for key, media_pk_or_bytes in media_to_replace.items():
230-
# If the media_pk is None, it means we want to remove the
231-
# media represented by our key from the next version. Otherwise,
232-
# we add our key->media_pk mapping to the next version.
233-
if media_pk_or_bytes is not None:
234-
if isinstance(media_pk_or_bytes, bytes):
235-
file_path, file_media = key, media_pk_or_bytes
236-
media_type_str, _encoding = mimetypes.guess_type(file_path)
237-
# We use "application/octet-stream" as a generic fallback media type, per
238-
# RFC 2046: https://datatracker.ietf.org/doc/html/rfc2046
239-
media_type_str = media_type_str or "application/octet-stream"
240-
media_type = media_api.get_or_create_media_type(media_type_str)
241-
media = media_api.get_or_create_file_media(
242-
component.learning_package.id,
243-
media_type.id,
244-
data=file_media,
245-
created=created,
246-
)
247-
media_pk = media.pk
248-
else:
249-
media_pk = media_pk_or_bytes
250-
ComponentVersionMedia.objects.create(
251-
media_id=media_pk,
252-
component_version=component_version,
253-
path=key,
254-
)
255249

256-
if ignore_previous_media:
257-
return component_version
258-
259-
# Now copy any old associations that existed, as long as they aren't
260-
# in conflict with the new stuff or marked for deletion.
261-
last_version_media_mapping = ComponentVersionMedia.objects \
262-
.filter(component_version=last_version)
263-
for cvrc in last_version_media_mapping:
264-
if cvrc.path not in media_to_replace:
265-
ComponentVersionMedia.objects.create(
266-
media_id=cvrc.media_id,
267-
component_version=component_version,
268-
path=cvrc.path,
250+
if ignore_previous_media or last_version is None:
251+
paths_to_media = {
252+
path: media
253+
for path, media in media_to_replace.items()
254+
if media is not None # Ignore deletion entries in this case.
255+
}
256+
else:
257+
# Most of the time, we're adding our media changes as a delta on top
258+
# of the last version's media.
259+
previous_media = {
260+
cvm.path: cvm.media_id
261+
for cvm in ComponentVersionMedia.objects.filter(
262+
component_version=last_version
269263
)
264+
}
265+
paths_to_media = {
266+
path: media
267+
for path, media in (previous_media | media_to_replace).items()
268+
if media is not None # "media is None" means "delete this"
269+
}
270+
271+
_set_component_version_media(component_version, paths_to_media, created)
270272

271-
return component_version
273+
return component_version
272274

273275

274276
def create_component_and_version( # pylint: disable=too-many-positional-arguments
@@ -281,6 +283,7 @@ def create_component_and_version( # pylint: disable=too-many-positional-argumen
281283
created_by: int | None = None,
282284
*,
283285
can_stand_alone: bool = True,
286+
media: dict[str, Media.ID | Media | bytes] | None = None,
284287
) -> tuple[Component, ComponentVersion]:
285288
"""
286289
Create a Component and associated ComponentVersion atomically.
@@ -300,8 +303,91 @@ def create_component_and_version( # pylint: disable=too-many-positional-argumen
300303
title=title,
301304
created=created,
302305
created_by=created_by,
306+
media=media or {},
303307
)
304-
return (component, component_version)
308+
309+
return (component, component_version)
310+
311+
312+
def _set_component_version_media(
313+
version: ComponentVersion,
314+
paths_to_media_values: dict[str, Media.ID | Media | bytes],
315+
created: datetime,
316+
):
317+
"""
318+
Internal helper to set the Media for this ComponentVersion.
319+
320+
Only call this when we're first initializing a ComponentVersion.
321+
322+
Media can be specified as ``bytes`` for testing convenience, but you will
323+
almost always want to create a Media object first in actual app code,
324+
because that will give you better control over the MIME type and storage
325+
specifics (file vs. database).
326+
327+
Note that unlike create_next_component_version(), we don't accept `None` as
328+
a media value here. This function does not carry over any Media associations
329+
from past ComponentVersions, so our "None means Delete" convention doesn't
330+
apply here.
331+
"""
332+
@cache # want to avoid repeated lookups, e.g. a component with ten images
333+
def cached_media_type(media_type_str):
334+
return media_api.get_or_create_media_type(media_type_str)
335+
336+
def valid_path(path):
337+
"""No absolute paths, whitespace, or backslashes (Windows separators)"""
338+
return path == path.strip().lstrip('/') and '\\' not in path
339+
340+
# We allow a range of values to be in paths_to_media_values, but we want to
341+
# normalize to media_ids for our bulk insert later.
342+
paths_to_media_ids: dict[str, Media.ID] = {}
343+
344+
cv_learning_package_id = version.component.learning_package_id
345+
346+
for path, media_value in paths_to_media_values.items():
347+
if not valid_path(path):
348+
raise ValueError(f"{path!r} is an invalid media path ({version!r})")
349+
350+
match media_value:
351+
case int(): # MediaID
352+
media_id = media_value
353+
case Media():
354+
media_id = media_value.id
355+
if media_value.learning_package_id != cv_learning_package_id:
356+
raise ValueError(
357+
f"Media LearningPackage does not match Component: "
358+
f"Tried to create ComponentVersion {version!r} "
359+
f"(Learning Package ID {cv_learning_package_id!r}) "
360+
f"with Media {media_value!r} "
361+
f"(Learning Package ID {media_value.learning_package_id!r})"
362+
)
363+
case bytes():
364+
media_type_str, _encoding = mimetypes.guess_type(path)
365+
# We use "application/octet-stream" as a generic fallback media type, per
366+
# RFC 2046: https://datatracker.ietf.org/doc/html/rfc2046
367+
media_type_str = media_type_str or "application/octet-stream"
368+
media_type = cached_media_type(media_type_str)
369+
media = media_api.get_or_create_file_media(
370+
cv_learning_package_id,
371+
media_type.id,
372+
data=media_value,
373+
created=created,
374+
)
375+
media_id = media.id
376+
case _:
377+
raise ValueError(f"Invalid object for paths_to_media Media: {media_value!r}")
378+
379+
paths_to_media_ids[path] = media_id
380+
381+
ComponentVersionMedia.objects.bulk_create(
382+
[
383+
ComponentVersionMedia(
384+
component_version=version,
385+
path=normalized_path,
386+
media_id=media_id,
387+
)
388+
for normalized_path, media_id in paths_to_media_ids.items()
389+
]
390+
)
305391

306392

307393
def get_component(component_id: Component.ID, /) -> Component:
@@ -462,37 +548,6 @@ def look_up_component_version_media(
462548
).get(queries)
463549

464550

465-
def create_component_version_media(
466-
component_version_id: int,
467-
media_id: int,
468-
/,
469-
path: str,
470-
) -> ComponentVersionMedia:
471-
"""
472-
Add a Media to the given ComponentVersion
473-
474-
We don't allow paths that would be absolute, e.g. ones that start with
475-
'/'. Storing these causes headaches with building relative paths and because
476-
of mismatches with things that expect a leading slash and those that don't.
477-
So for safety and consistency, we strip off leading slashes and emit a
478-
warning when we do.
479-
"""
480-
if path.startswith('/'):
481-
logger.warning(
482-
"Absolute paths are not supported: "
483-
f"removed leading '/' from ComponentVersion {component_version_id} "
484-
f"media path: {repr(path)} (media_id: {media_id})"
485-
)
486-
path = path.lstrip('/')
487-
488-
cvrc, _created = ComponentVersionMedia.objects.get_or_create(
489-
component_version_id=component_version_id,
490-
media_id=media_id,
491-
path=path,
492-
)
493-
return cvrc
494-
495-
496551
class AssetError(StrEnum):
497552
"""Error codes related to fetching ComponentVersion assets."""
498553
ASSET_PATH_NOT_FOUND_FOR_COMPONENT_VERSION = auto()

src/openedx_content/applets/media/models.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from functools import cache, cached_property
99
from logging import getLogger
10+
from typing import NewType
1011

1112
from django.conf import settings
1213
from django.core.exceptions import ImproperlyConfigured, ValidationError
@@ -18,6 +19,7 @@
1819

1920
from openedx_django_lib.fields import (
2021
MultiCollationTextField,
22+
TypedBigAutoField,
2123
case_insensitive_char_field,
2224
hash_field,
2325
manual_date_time_field,
@@ -240,6 +242,16 @@ class Media(models.Model):
240242
# could be as much as 200K of data if we had nothing but emojis.
241243
MAX_TEXT_LENGTH = 50_000
242244

245+
# Custom type for our primary key, to make it more type-safe when using in
246+
# API calls.
247+
MediaID = NewType("MediaID", int)
248+
type ID = MediaID
249+
250+
class IDField(TypedBigAutoField[ID]): # Boilerplate for fully-typed ID field.
251+
pass
252+
253+
id = IDField(primary_key=True)
254+
243255
objects: models.Manager[Media] = WithRelationsManager('media_type')
244256

245257
learning_package = models.ForeignKey(LearningPackage, on_delete=models.CASCADE)
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# Generated by Django 5.2.12 on 2026-04-28 14:40
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
"""
8+
Media.id was previously auto-created by Django (hence auto_created=True,
9+
verbose_name='ID' in the original migration). It is now explicitly declared
10+
on the model. The database column is unchanged (still BIGINT AUTO_INCREMENT
11+
PRIMARY KEY), so database_operations is empty.
12+
"""
13+
dependencies = [
14+
('openedx_content', '0013_unicode_container_component_codes'),
15+
]
16+
17+
operations = [
18+
migrations.SeparateDatabaseAndState(
19+
state_operations=[
20+
migrations.AlterField(
21+
model_name='media',
22+
name='id',
23+
field=models.BigAutoField(primary_key=True, serialize=False),
24+
),
25+
],
26+
database_operations=[],
27+
)
28+
]

src/openedx_core/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@
66
"""
77

88
# The version for the entire repository
9-
__version__ = "0.46.0"
9+
__version__ = "0.47.0"

0 commit comments

Comments
 (0)