Skip to content

Commit c22cc1b

Browse files
feat: emit events when content is modified or published (#543)
Co-Authored-By: Claude <noreply@anthropic.com>
1 parent c026a49 commit c22cc1b

20 files changed

Lines changed: 1872 additions & 44 deletions

File tree

src/openedx_content/api.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,14 @@
99
"""
1010

1111
# These wildcard imports are okay because these api modules declare __all__.
12-
# pylint: disable=wildcard-import
12+
# pylint: disable=wildcard-import,unused-import
13+
14+
# Signals are kept in a separate namespace, for two reasons:
15+
# (1) so they can easily be imported/used as `api.signals` (e.g. `from openedx_content import api`, use `api.signals.x`)
16+
# (2) to avoid confusion between event data structures and other API symbols with similar names (e.g.
17+
# `DraftChangeLogEventData` vs `DraftChangeLogRecord` is clearer if the former is `signals.DraftChangeLogEventData`)
18+
from . import signals
19+
# The rest of the public API (other than models):
1320
from .applets.backup_restore.api import *
1421
from .applets.collections.api import *
1522
from .applets.components.api import *

src/openedx_content/applets/collections/api.py

Lines changed: 84 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,19 @@
11
"""
22
Collections API (warning: UNSTABLE, in progress API)
33
"""
4+
45
from __future__ import annotations
56

67
from datetime import datetime, timezone
8+
from functools import partial
79

810
from django.core.exceptions import ValidationError
911
from django.db.models import QuerySet
12+
from django.db.transaction import on_commit
1013

1114
from ..publishing import api as publishing_api
1215
from ..publishing.models import PublishableEntity
16+
from . import signals
1317
from .models import Collection, CollectionPublishableEntity, LearningPackage
1418

1519
# The public API that will be re-exported by openedx_content.api
@@ -32,6 +36,39 @@
3236
]
3337

3438

39+
def _queue_change_event(
40+
collection: Collection,
41+
*,
42+
created: bool = False,
43+
metadata_modified: bool = False,
44+
deleted: bool = False,
45+
entities_added: list[PublishableEntity.ID] | None = None,
46+
entities_removed: list[PublishableEntity.ID] | None = None,
47+
user_id: int | None = None,
48+
) -> None:
49+
"""Helper for emitting the event when a collection has changed."""
50+
51+
learning_package_id = collection.learning_package.id
52+
learning_package_title = collection.learning_package.title
53+
54+
# Send out an event immediately after this database transaction commits.
55+
on_commit(partial(
56+
signals.COLLECTION_CHANGED.send_event,
57+
time=collection.modified,
58+
learning_package=signals.LearningPackageEventData(id=learning_package_id, title=learning_package_title),
59+
changed_by=signals.UserAttributionEventData(user_id=user_id),
60+
change=signals.CollectionChangeData(
61+
collection_id=collection.id,
62+
collection_code=collection.collection_code,
63+
created=created,
64+
metadata_modified=metadata_modified,
65+
deleted=deleted,
66+
entities_added=entities_added or [],
67+
entities_removed=entities_removed or [],
68+
),
69+
))
70+
71+
3572
def create_collection(
3673
learning_package_id: LearningPackage.ID,
3774
collection_code: str,
@@ -54,6 +91,8 @@ def create_collection(
5491
)
5592
collection.full_clean()
5693
collection.save()
94+
if enabled:
95+
_queue_change_event(collection, created=True, user_id=created_by)
5796
return collection
5897

5998

@@ -87,6 +126,7 @@ def update_collection(
87126
collection.description = description
88127

89128
collection.save()
129+
_queue_change_event(collection, metadata_modified=True)
90130
return collection
91131

92132

@@ -103,12 +143,20 @@ def delete_collection(
103143
Soft-deleted collections can be re-enabled using restore_collection.
104144
"""
105145
collection = get_collection(learning_package_id, collection_code)
146+
entities_removed = list(collection.entities.order_by("id").values_list("id", flat=True))
147+
was_already_soft_deleted = not collection.enabled
106148

107149
if hard_delete:
150+
collection.modified = datetime.now(tz=timezone.utc) # For the event timestamp; won't get saved to the DB
151+
if not was_already_soft_deleted: # Send the deleted event unless this was already soft deleted.
152+
_queue_change_event(collection, deleted=True, entities_removed=entities_removed)
153+
# Delete after enqueing the event:
108154
collection.delete()
109-
else:
155+
elif not was_already_soft_deleted:
156+
# Soft delete:
110157
collection.enabled = False
111158
collection.save()
159+
_queue_change_event(collection, deleted=True, entities_removed=entities_removed)
112160
return collection
113161

114162

@@ -120,9 +168,11 @@ def restore_collection(
120168
Undo a "soft delete" by re-enabling a Collection.
121169
"""
122170
collection = get_collection(learning_package_id, collection_code)
171+
entities_added = list(collection.entities.order_by("id").values_list("id", flat=True))
123172

124173
collection.enabled = True
125174
collection.save()
175+
_queue_change_event(collection, created=True, entities_added=entities_added)
126176
return collection
127177

128178

@@ -152,12 +202,12 @@ def add_to_collection(
152202
)
153203

154204
collection = get_collection(learning_package_id, collection_code)
155-
collection.entities.add(
156-
*entities_qset.all(),
157-
through_defaults={"created_by_id": created_by},
158-
)
205+
existing_ids = set(collection.entities.values_list("id", flat=True))
206+
ids_to_add = entities_qset.values_list("id", flat=True)
207+
collection.entities.add(*ids_to_add, through_defaults={"created_by_id": created_by})
159208
collection.modified = datetime.now(tz=timezone.utc)
160209
collection.save()
210+
_queue_change_event(collection, entities_added=sorted(list(set(ids_to_add) - existing_ids)), user_id=created_by)
161211

162212
return collection
163213

@@ -178,9 +228,12 @@ def remove_from_collection(
178228
"""
179229
collection = get_collection(learning_package_id, collection_code)
180230

181-
collection.entities.remove(*entities_qset.all())
231+
ids_to_remove = list(entities_qset.values_list("id", flat=True))
232+
entities_removed = sorted(list(collection.entities.filter(id__in=ids_to_remove).values_list("id", flat=True)))
233+
collection.entities.remove(*ids_to_remove)
182234
collection.modified = datetime.now(tz=timezone.utc)
183235
collection.save()
236+
_queue_change_event(collection, entities_removed=entities_removed)
184237

185238
return collection
186239

@@ -222,7 +275,7 @@ def get_collections(learning_package_id: LearningPackage.ID, enabled: bool | Non
222275
qs = Collection.objects.filter(learning_package_id=learning_package_id)
223276
if enabled is not None:
224277
qs = qs.filter(enabled=enabled)
225-
return qs.select_related("learning_package").order_by('pk')
278+
return qs.select_related("learning_package").order_by("pk")
226279

227280

228281
def set_collections(
@@ -245,25 +298,34 @@ def set_collections(
245298
raise ValidationError(
246299
"Collection entities must be from the same learning package as the collection.",
247300
)
248-
current_relations = CollectionPublishableEntity.objects.filter(
249-
entity=publishable_entity
250-
).select_related('collection')
251-
# Clear other collections for given entity and add only new collections from collection_qset
252-
removed_collections = set(
253-
r.collection for r in current_relations.exclude(collection__in=collection_qset)
301+
current_relations = CollectionPublishableEntity.objects.filter(entity=publishable_entity).select_related(
302+
"collection"
254303
)
255-
new_collections = set(collection_qset.exclude(
256-
id__in=current_relations.values_list('collection', flat=True)
257-
))
304+
# Clear other collections for given entity and add only new collections from collection_qset
305+
removed_collections = set(r.collection for r in current_relations.exclude(collection__in=collection_qset))
306+
new_collections = set(collection_qset.exclude(id__in=current_relations.values_list("collection", flat=True)))
258307
# Triggers a m2m_changed signal
259308
publishable_entity.collections.set(
260309
objs=collection_qset,
261310
through_defaults={"created_by_id": created_by},
262311
)
263-
# Update modified date via update to avoid triggering post_save signal for all collections, which can be very slow.
264-
affected_collection = removed_collections | new_collections
265-
Collection.objects.filter(
266-
id__in=[collection.id for collection in affected_collection]
267-
).update(modified=datetime.now(tz=timezone.utc))
312+
# Update modified date:
313+
affected_collections = removed_collections | new_collections
314+
Collection.objects.filter(id__in=[collection.id for collection in affected_collections]).update(
315+
modified=datetime.now(tz=timezone.utc)
316+
)
268317

269-
return affected_collection
318+
# Emit one event per affected collection. Re-fetch with select_related so _queue_change_event
319+
# can read collection.learning_package without extra queries; the re-fetch also picks up the
320+
# updated modified timestamp from the bulk update above.
321+
removed_ids = {c.id for c in removed_collections}
322+
for collection in Collection.objects.filter(id__in=[c.id for c in affected_collections]).select_related(
323+
"learning_package"
324+
):
325+
# TODO: test performance of this and potentially send these async if > 1 affected collection.
326+
if collection.id in removed_ids:
327+
_queue_change_event(collection, entities_removed=[publishable_entity.id], user_id=created_by)
328+
else:
329+
_queue_change_event(collection, entities_added=[publishable_entity.id], user_id=created_by)
330+
331+
return affected_collections
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
"""Signal handlers for collections-related updates."""
2+
3+
from functools import partial
4+
5+
from django.db import transaction
6+
from django.dispatch import receiver
7+
8+
from ..publishing.signals import ENTITIES_DRAFT_CHANGED, DraftChangeLogEventData, UserAttributionEventData
9+
from .tasks import emit_collections_changed_for_entity_changes_task
10+
11+
12+
@receiver(ENTITIES_DRAFT_CHANGED)
13+
def on_entities_changed(
14+
change_log: DraftChangeLogEventData,
15+
changed_by: UserAttributionEventData,
16+
**kwargs,
17+
):
18+
"""
19+
When entity drafts are deleted or restored, notify affected collections.
20+
21+
Dispatches a task to emit COLLECTION_CHANGED for any
22+
collections that contain the changed entities.
23+
"""
24+
removed_entity_ids = [record.entity_id for record in change_log.changes if record.new_version_id is None]
25+
# old_version_id=None covers both brand-new entities and restored soft-deletes; we can't distinguish
26+
# them here without a DB query. The task is a no-op for new entities (not yet in any collection).
27+
# TODO: if ChangeLogRecordData gains a 'restored' flag, filter to only restored entities here.
28+
# (Newly-created entities cannot be part of collections yet, so we only care about entities that
29+
# were previously in collections, then deleted and then restored.)
30+
added_entity_ids = [
31+
record.entity_id
32+
for record in change_log.changes
33+
if record.old_version_id is None and record.new_version_id is not None
34+
]
35+
36+
if not removed_entity_ids and not added_entity_ids:
37+
return
38+
39+
transaction.on_commit(
40+
partial(
41+
emit_collections_changed_for_entity_changes_task.delay,
42+
removed_entity_ids=removed_entity_ids,
43+
added_entity_ids=added_entity_ids,
44+
user_id=changed_by.user_id,
45+
)
46+
)
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
"""
2+
Low-level events/signals emitted by openedx_content
3+
"""
4+
5+
from attrs import define, field
6+
from openedx_events.tooling import OpenEdxPublicSignal # type: ignore[import-untyped]
7+
8+
from ..publishing.models.publishable_entity import PublishableEntity
9+
from ..publishing.signals import LearningPackageEventData, UserAttributionEventData
10+
11+
# Public API available via openedx_content.api
12+
__all__ = [
13+
# All event data structures should end with "...Data":
14+
"CollectionChangeData",
15+
# All events:
16+
"COLLECTION_CHANGED",
17+
]
18+
19+
20+
@define
21+
class CollectionChangeData:
22+
"""Summary of changes to a collection, for event purposes"""
23+
24+
collection_id: int
25+
collection_code: str
26+
created: bool = False
27+
"""The collection is newly-created, or un-deleted. Some entities may be added simultaneously."""
28+
metadata_modified: bool = False
29+
"""The collection's title/description has changed. Does not indicate whether or not entities were added/removed."""
30+
deleted: bool = False
31+
"""
32+
The collection has been deleted. When this is true, the entities_removed list will have all entity IDs.
33+
Does not distinguish between "soft" and "hard" deletion.
34+
"""
35+
entities_added: list[PublishableEntity.ID] = field(factory=list)
36+
entities_removed: list[PublishableEntity.ID] = field(factory=list)
37+
38+
39+
COLLECTION_CHANGED = OpenEdxPublicSignal(
40+
event_type="org.openedx.content.collections.collection_changed.v1",
41+
data={
42+
"learning_package": LearningPackageEventData,
43+
"changed_by": UserAttributionEventData,
44+
"change": CollectionChangeData,
45+
},
46+
)
47+
"""
48+
A ``Collection`` has been created, modified, or deleted, or its entities have
49+
changed.
50+
51+
This is a low-level batch event. It does not have any course or library context
52+
information available. It does not distinguish between Containers, Components,
53+
or other entity types.
54+
55+
💾 This event is only emitted after any transaction has been committed.
56+
57+
⏳ This **batch** event is emitted **synchronously**. Handlers that do anything
58+
per-entity or that is possibly slow should dispatch an asynchronous task for
59+
processing the event.
60+
"""
61+
62+
# Note: at present, the openedx_tagging code (in this repo) emits a
63+
# CONTENT_OBJECT_ASSOCIATIONS_CHANGED event whenever an entity's tags change.
64+
# But we do NOT emit the same event when an entity's collections change; rather
65+
# we expect code in the platform to listen for COLLECTION_CHANGED and then
66+
# re-emit '...ASSOCIATIONS_CHANGED' as needed.
67+
# The reason we don't emit the '...ASSOCIATIONS_CHANGED' event here
68+
# is simple: we know the entity IDs but not their opaque keys, and all of the
69+
# code that listens for that event expects the entity's opaque keys.
70+
# The tagging code can do it here because the `object_id` in the tagging models
71+
# _is_ the opaque key ("lb:..."), but the collections code is too low-level to
72+
# know about opaque keys of the entities. We don't even know which learning
73+
# context (which content library) a given entity is in.

0 commit comments

Comments
 (0)