Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
213 changes: 134 additions & 79 deletions src/openedx_content/applets/components/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import mimetypes
from datetime import datetime
from enum import StrEnum, auto
from functools import cache
from logging import getLogger
from pathlib import Path
from uuid import UUID
Expand All @@ -24,6 +25,7 @@
from django.http.response import HttpResponse, HttpResponseNotFound

from ..media import api as media_api
from ..media.models import Media
from ..publishing import api as publishing_api
from .models import Component, ComponentType, ComponentVersion, ComponentVersionMedia, LearningPackage

Expand All @@ -45,7 +47,6 @@
"component_exists_by_code",
"get_collection_components",
"get_components",
"create_component_version_media",
"look_up_component_version_media",
"AssetError",
"get_redirect_response_for_component_asset",
Expand Down Expand Up @@ -115,9 +116,22 @@ def create_component_version(
title: str,
created: datetime,
created_by: int | None,
*,
media: dict[str, Media.ID | Media | bytes] | None = None,
) -> ComponentVersion:
"""
Create a new ComponentVersion

The ``media`` parameter is a dict of file paths to Media-like things (a
Media.ID, Media model object, or simple bytes). This is the Media that we
want to associate with the new ComponentVersion. This will typically include
a "block.xml" for the XBlock OLX definition, and possibly some static files
like "static/diagram.png".

Media can be specified as ``bytes`` for testing convenience, but you will
almost always want to create a Media object first in actual app code,
because that will give you better control over the MIME type and storage
specifics (file vs. database).
"""
with atomic():
publishable_entity_version = publishing_api.create_publishable_entity_version(
Expand All @@ -131,13 +145,16 @@ def create_component_version(
publishable_entity_version=publishable_entity_version,
component_id=component_id,
)
if media:
_set_component_version_media(component_version, media, created=created)

return component_version


def create_next_component_version(
component_id: Component.ID,
/,
media_to_replace: dict[str, int | None | bytes],
media_to_replace: dict[str, Media.ID | Media | bytes | None],
created: datetime,
title: str | None = None,
created_by: int | None = None,
Expand Down Expand Up @@ -169,8 +186,14 @@ def create_next_component_version(
API or send the media bytes as part of ``media_to_replace`` values.

The ``media_to_replace`` dict is a mapping of strings representing the
local path/key for a file, to ``Media.id`` or media bytes values. Using
`None` for a value in this dict means to delete that key in the next version.
local path/key for a file, to ``Media.id``, ``Media`` object, or media bytes
values. Passing media as ``bytes`` is useful for testing purposes, but you
will almost always want to create a Media object first in actual app code,
because that will give you better control over the resulting Media's MIME
type and storage specifics (file vs. database).

Using `None` for a value in this dict means to delete that key in the next
version.

Make sure to wrap the function call on a atomic statement:
``with transaction.atomic():``
Expand All @@ -191,8 +214,6 @@ def create_next_component_version(
Why not use create_component_version?
The main reason is that we want to reuse the logic to create a static file component from a dictionary.

TODO: Have to add learning_downloadable info to this when it comes time to
support static asset download.
"""
# This needs to grab the highest version_num for this Publishable Entity.
# This will often be the Draft version, but not always. For instance, if
Expand Down Expand Up @@ -225,50 +246,31 @@ def create_next_component_version(
publishable_entity_version=publishable_entity_version,
component_id=component_id,
)
# First copy the new stuff over...
for key, media_pk_or_bytes in media_to_replace.items():
# If the media_pk is None, it means we want to remove the
# media represented by our key from the next version. Otherwise,
# we add our key->media_pk mapping to the next version.
if media_pk_or_bytes is not None:
if isinstance(media_pk_or_bytes, bytes):
file_path, file_media = key, media_pk_or_bytes
media_type_str, _encoding = mimetypes.guess_type(file_path)
# We use "application/octet-stream" as a generic fallback media type, per
# RFC 2046: https://datatracker.ietf.org/doc/html/rfc2046
media_type_str = media_type_str or "application/octet-stream"
media_type = media_api.get_or_create_media_type(media_type_str)
media = media_api.get_or_create_file_media(
component.learning_package.id,
media_type.id,
data=file_media,
created=created,
)
media_pk = media.pk
else:
media_pk = media_pk_or_bytes
ComponentVersionMedia.objects.create(
media_id=media_pk,
component_version=component_version,
path=key,
)

if ignore_previous_media:
return component_version

# Now copy any old associations that existed, as long as they aren't
# in conflict with the new stuff or marked for deletion.
last_version_media_mapping = ComponentVersionMedia.objects \
.filter(component_version=last_version)
for cvrc in last_version_media_mapping:
if cvrc.path not in media_to_replace:
ComponentVersionMedia.objects.create(
media_id=cvrc.media_id,
component_version=component_version,
path=cvrc.path,
if ignore_previous_media or last_version is None:
paths_to_media = {
path: media
for path, media in media_to_replace.items()
if media is not None # Ignore deletion entries in this case.
}
else:
# Most of the time, we're adding our media changes as a delta on top
# of the last version's media.
previous_media = {
cvm.path: cvm.media_id
for cvm in ComponentVersionMedia.objects.filter(
component_version=last_version
)
}
paths_to_media = {
path: media
for path, media in (previous_media | media_to_replace).items()
if media is not None # "media is None" means "delete this"
}

_set_component_version_media(component_version, paths_to_media, created)

return component_version
return component_version


def create_component_and_version( # pylint: disable=too-many-positional-arguments
Expand All @@ -281,6 +283,7 @@ def create_component_and_version( # pylint: disable=too-many-positional-argumen
created_by: int | None = None,
*,
can_stand_alone: bool = True,
media: dict[str, Media.ID | Media | bytes] | None = None,
) -> tuple[Component, ComponentVersion]:
"""
Create a Component and associated ComponentVersion atomically.
Expand All @@ -300,8 +303,91 @@ def create_component_and_version( # pylint: disable=too-many-positional-argumen
title=title,
created=created,
created_by=created_by,
media=media or {},
)
return (component, component_version)

return (component, component_version)


def _set_component_version_media(
version: ComponentVersion,
paths_to_media_values: dict[str, Media.ID | Media | bytes],
created: datetime,
):
"""
Internal helper to set the Media for this ComponentVersion.

Only call this when we're first initializing a ComponentVersion.

Media can be specified as ``bytes`` for testing convenience, but you will
almost always want to create a Media object first in actual app code,
because that will give you better control over the MIME type and storage
specifics (file vs. database).
Comment thread
bradenmacdonald marked this conversation as resolved.

Note that unlike create_next_component_version(), we don't accept `None` as
a media value here. This function does not carry over any Media associations
from past ComponentVersions, so our "None means Delete" convention doesn't
apply here.
"""
@cache # want to avoid repeated lookups, e.g. a component with ten images
def cached_media_type(media_type_str):
return media_api.get_or_create_media_type(media_type_str)

def valid_path(path):
"""No absolute paths, whitespace, or backslashes (Windows separators)"""
return path == path.strip().lstrip('/') and '\\' not in path

# We allow a range of values to be in paths_to_media_values, but we want to
# normalize to media_ids for our bulk insert later.
paths_to_media_ids: dict[str, Media.ID] = {}

cv_learning_package_id = version.component.learning_package_id

for path, media_value in paths_to_media_values.items():
if not valid_path(path):
raise ValueError(f"{path!r} is an invalid media path ({version!r})")

match media_value:
case int(): # MediaID
media_id = media_value
case Media():
media_id = media_value.id
Comment thread
bradenmacdonald marked this conversation as resolved.
if media_value.learning_package_id != cv_learning_package_id:
raise ValueError(
f"Media LearningPackage does not match Component: "
f"Tried to create ComponentVersion {version!r} "
f"(Learning Package ID {cv_learning_package_id!r}) "
f"with Media {media_value!r} "
f"(Learning Package ID {media_value.learning_package_id!r})"
)
case bytes():
media_type_str, _encoding = mimetypes.guess_type(path)
# We use "application/octet-stream" as a generic fallback media type, per
# RFC 2046: https://datatracker.ietf.org/doc/html/rfc2046
media_type_str = media_type_str or "application/octet-stream"
media_type = cached_media_type(media_type_str)
media = media_api.get_or_create_file_media(
cv_learning_package_id,
media_type.id,
data=media_value,
created=created,
)
media_id = media.id
case _:
raise ValueError(f"Invalid object for paths_to_media Media: {media_value!r}")

paths_to_media_ids[path] = media_id

ComponentVersionMedia.objects.bulk_create(
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice, using bulk_create here will improve performance for anything with multiple pieces of media. Not sure we have much of that yet though.

[
ComponentVersionMedia(
component_version=version,
path=normalized_path,
media_id=media_id,
)
for normalized_path, media_id in paths_to_media_ids.items()
]
)


def get_component(component_id: Component.ID, /) -> Component:
Expand Down Expand Up @@ -462,37 +548,6 @@ def look_up_component_version_media(
).get(queries)


def create_component_version_media(
component_version_id: int,
media_id: int,
/,
path: str,
) -> ComponentVersionMedia:
"""
Add a Media to the given ComponentVersion

We don't allow paths that would be absolute, e.g. ones that start with
'/'. Storing these causes headaches with building relative paths and because
of mismatches with things that expect a leading slash and those that don't.
So for safety and consistency, we strip off leading slashes and emit a
warning when we do.
"""
if path.startswith('/'):
logger.warning(
"Absolute paths are not supported: "
f"removed leading '/' from ComponentVersion {component_version_id} "
f"media path: {repr(path)} (media_id: {media_id})"
)
path = path.lstrip('/')

cvrc, _created = ComponentVersionMedia.objects.get_or_create(
component_version_id=component_version_id,
media_id=media_id,
path=path,
)
return cvrc


class AssetError(StrEnum):
"""Error codes related to fetching ComponentVersion assets."""
ASSET_PATH_NOT_FOUND_FOR_COMPONENT_VERSION = auto()
Expand Down
12 changes: 12 additions & 0 deletions src/openedx_content/applets/media/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from functools import cache, cached_property
from logging import getLogger
from typing import NewType

from django.conf import settings
from django.core.exceptions import ImproperlyConfigured, ValidationError
Expand All @@ -18,6 +19,7 @@

from openedx_django_lib.fields import (
MultiCollationTextField,
TypedBigAutoField,
case_insensitive_char_field,
hash_field,
manual_date_time_field,
Expand Down Expand Up @@ -240,6 +242,16 @@ class Media(models.Model):
# could be as much as 200K of data if we had nothing but emojis.
MAX_TEXT_LENGTH = 50_000

# Custom type for our primary key, to make it more type-safe when using in
# API calls.
MediaID = NewType("MediaID", int)
type ID = MediaID

class IDField(TypedBigAutoField[ID]): # Boilerplate for fully-typed ID field.
pass

id = IDField(primary_key=True)
Comment thread
ormsbee marked this conversation as resolved.

objects: models.Manager[Media] = WithRelationsManager('media_type')

learning_package = models.ForeignKey(LearningPackage, on_delete=models.CASCADE)
Expand Down
28 changes: 28 additions & 0 deletions src/openedx_content/migrations/0014_typed_media_id.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Generated by Django 5.2.12 on 2026-04-28 14:40

from django.db import migrations, models


class Migration(migrations.Migration):
"""
Media.id was previously auto-created by Django (hence auto_created=True,
verbose_name='ID' in the original migration). It is now explicitly declared
on the model. The database column is unchanged (still BIGINT AUTO_INCREMENT
PRIMARY KEY), so database_operations is empty.
"""
dependencies = [
('openedx_content', '0013_unicode_container_component_codes'),
]

operations = [
migrations.SeparateDatabaseAndState(
state_operations=[
migrations.AlterField(
model_name='media',
name='id',
field=models.BigAutoField(primary_key=True, serialize=False),
),
],
database_operations=[],
)
]
2 changes: 1 addition & 1 deletion src/openedx_core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@
"""

# The version for the entire repository
__version__ = "0.46.0"
__version__ = "0.47.0"
Loading