Skip to content

Commit c48445b

Browse files
authored
feat: new history log API functions [FC-0123] (#501)
Implements Python API functions to get and build the history log of a PublishableEntity: get_entity_draft_history() Return DraftChangeLogRecords for a PublishableEntity since its last publication. get_entity_version_contributors() Returns the users who authored changes between two published versions of an entity. get_entity_publish_history() Return all PublishLogRecords for a PublishableEntity. get_entity_publish_history_entries() Return the DraftChangeLogRecords associated with a specific PublishLog. get_descendant_component_entity_ids() BFS traversal of a container hierarchy, returning the IDs of all descendant component entities (used to scope queries when fetching history for container children). Increments version to 0.44.0
1 parent beffc1f commit c48445b

3 files changed

Lines changed: 984 additions & 5 deletions

File tree

src/openedx_content/applets/containers/api.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
"get_containers_with_entity",
7070
"get_container_children_count",
7171
"get_container_children_entity_refs",
72+
"get_descendant_component_entity_ids",
7273
]
7374

7475

@@ -889,3 +890,49 @@ def get_container_children_entity_refs(container_version: ContainerVersion) -> l
889890
.values_list("entity__entity_ref", flat=True)
890891
.order_by("order_num")
891892
)
893+
894+
895+
def get_descendant_component_entity_ids(container: Container) -> list[int]:
896+
"""
897+
[ 🛑 UNSTABLE ]
898+
Return the entity IDs of all leaf (non-Container) descendants of ``container``.
899+
900+
Intermediate containers (e.g. Subsections, Units) are never included in the
901+
result; only leaf component entities are returned.
902+
903+
The traversal follows draft state only. Soft-deleted children are skipped
904+
automatically because ``get_entities_in_container`` omits them.
905+
906+
Edge cases:
907+
- A container whose draft was soft-deleted has no children to traverse and
908+
contributes no entity IDs.
909+
- An entity that appears as a child of multiple containers is deduplicated
910+
because the result is built from a set.
911+
- A cycle-guard (``visited_container_pks``) prevents infinite loops, which
912+
cannot occur in practice but is included for safety.
913+
"""
914+
all_component_ids: set[int] = set()
915+
containers_to_visit: list[Container] = [container]
916+
visited_container_pks: set[int] = {container.pk}
917+
918+
while containers_to_visit:
919+
current = containers_to_visit.pop()
920+
try:
921+
children = get_entities_in_container(
922+
current,
923+
published=False,
924+
select_related_version="containerversion__container",
925+
)
926+
except ContainerVersion.DoesNotExist:
927+
continue
928+
929+
for entry in children:
930+
try:
931+
child_container = entry.entity_version.containerversion.container
932+
if child_container.pk not in visited_container_pks:
933+
visited_container_pks.add(child_container.pk)
934+
containers_to_visit.append(child_container)
935+
except ContainerVersion.DoesNotExist:
936+
all_component_ids.add(entry.entity.pk)
937+
938+
return list(all_component_ids)

src/openedx_content/applets/publishing/api.py

Lines changed: 274 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,9 @@
1111
from datetime import datetime, timezone
1212
from typing import ContextManager, Optional, cast
1313

14+
from django.contrib.auth import get_user_model
1415
from django.core.exceptions import ObjectDoesNotExist
15-
from django.db.models import F, Prefetch, Q, QuerySet
16+
from django.db.models import F, OuterRef, Prefetch, Q, QuerySet, Subquery
1617
from django.db.transaction import atomic
1718

1819
from openedx_django_lib.fields import create_hash_digest
@@ -60,6 +61,10 @@
6061
"publish_from_drafts",
6162
"get_draft_version",
6263
"get_published_version",
64+
"get_entity_draft_history",
65+
"get_entity_publish_history",
66+
"get_entity_publish_history_entries",
67+
"get_entity_version_contributors",
6368
"set_draft_version",
6469
"soft_delete_draft",
6570
"reset_drafts_to_published",
@@ -566,6 +571,274 @@ def get_published_version(
566571
return published.version
567572

568573

574+
def get_entity_draft_history(
575+
publishable_entity_or_id: PublishableEntity | int, /
576+
) -> QuerySet[DraftChangeLogRecord]:
577+
"""
578+
[ 🛑 UNSTABLE ]
579+
Return DraftChangeLogRecords for a PublishableEntity since its last publication,
580+
ordered from most recent to oldest.
581+
582+
Edge cases:
583+
- Never published, no versions: returns an empty queryset.
584+
- Never published, has versions: returns all DraftChangeLogRecords.
585+
- No changes since the last publish: returns an empty queryset.
586+
- Last publish was a soft-delete (Published.version=None): the Published row
587+
still exists and its published_at timestamp is used as the lower bound, so
588+
only draft changes made after that soft-delete publish are returned. If
589+
there are no subsequent changes, the queryset is empty.
590+
- Unpublished soft-delete (soft-delete in draft, not yet published): the
591+
soft-delete DraftChangeLogRecord (new_version=None) is included because
592+
it was made after the last real publish.
593+
"""
594+
if isinstance(publishable_entity_or_id, int):
595+
entity_id = PublishableEntity.PublishableEntityID(publishable_entity_or_id)
596+
else:
597+
entity_id = publishable_entity_or_id.id
598+
599+
qs = (
600+
DraftChangeLogRecord.objects
601+
.filter(entity_id=entity_id)
602+
.select_related(
603+
"draft_change_log__changed_by",
604+
"old_version",
605+
"new_version",
606+
)
607+
.order_by("-draft_change_log__changed_at")
608+
)
609+
610+
# Narrow to changes since the last publication (or last reset to published)
611+
try:
612+
published = Published.objects.select_related(
613+
"publish_log_record__publish_log"
614+
).get(entity_id=entity_id)
615+
published_at = published.publish_log_record.publish_log.published_at
616+
published_version_id = published.version_id
617+
618+
# If reset_drafts_to_published() was called after the last publish,
619+
# there will be a DraftChangeLogRecord where new_version == published
620+
# version. Use the most recent such record's timestamp as the lower
621+
# bound so that discarded entries no longer appear in the draft history.
622+
last_reset_at = (
623+
DraftChangeLogRecord.objects
624+
.filter(
625+
entity_id=entity_id,
626+
new_version_id=published_version_id,
627+
draft_change_log__changed_at__gt=published_at,
628+
)
629+
.order_by("-draft_change_log__changed_at")
630+
.values_list("draft_change_log__changed_at", flat=True)
631+
.first()
632+
)
633+
634+
lower_bound = last_reset_at if last_reset_at else published_at
635+
qs = qs.filter(draft_change_log__changed_at__gt=lower_bound)
636+
except Published.DoesNotExist:
637+
pass
638+
639+
return qs
640+
641+
642+
def get_entity_publish_history(
643+
publishable_entity_or_id: PublishableEntity | int, /
644+
) -> QuerySet[PublishLogRecord]:
645+
"""
646+
[ 🛑 UNSTABLE ]
647+
Return all PublishLogRecords for a PublishableEntity, ordered most recent first.
648+
649+
Edge cases:
650+
- Never published: returns an empty queryset.
651+
- Soft-delete published (new_version=None): the record is included with
652+
old_version pointing to the last published version and new_version=None,
653+
indicating the entity was removed from the published state.
654+
- Multiple draft versions created between two publishes are compacted: each
655+
PublishLogRecord captures only the version that was actually published,
656+
not the intermediate draft versions.
657+
"""
658+
if isinstance(publishable_entity_or_id, int):
659+
entity_id = PublishableEntity.PublishableEntityID(publishable_entity_or_id)
660+
else:
661+
entity_id = publishable_entity_or_id.id
662+
663+
return (
664+
PublishLogRecord.objects
665+
.filter(entity_id=entity_id)
666+
.select_related(
667+
"publish_log__published_by",
668+
"old_version",
669+
"new_version",
670+
)
671+
.order_by("-publish_log__published_at")
672+
)
673+
674+
675+
def get_entity_publish_history_entries(
676+
publishable_entity_or_id: PublishableEntity | int,
677+
/,
678+
publish_log_uuid: str,
679+
) -> QuerySet[DraftChangeLogRecord]:
680+
"""
681+
[ 🛑 UNSTABLE ]
682+
Return the DraftChangeLogRecords associated with a specific PublishLog.
683+
684+
Finds the PublishLogRecord for the given entity and publish_log_uuid, then
685+
returns all DraftChangeLogRecords whose changed_at falls between the previous
686+
publish for this entity (exclusive) and this publish (inclusive), ordered
687+
most-recent-first.
688+
689+
Time bounds are used instead of version bounds because DraftChangeLogRecord
690+
has no single version_num field (soft-delete records have new_version=None),
691+
and using published_at timestamps cleanly handles all cases without extra
692+
joins.
693+
694+
Edge cases:
695+
- Each publish group is independent: only the DraftChangeLogRecords that
696+
belong to the requested publish_log_uuid are returned; changes attributed
697+
to other publish groups are excluded.
698+
- Soft-delete publish (PublishLogRecord.new_version=None): the soft-delete
699+
DraftChangeLogRecord (new_version=None) is included in the entries because
700+
it falls within the time window of that publish group.
701+
702+
Raises PublishLogRecord.DoesNotExist if publish_log_uuid is not found for
703+
this entity.
704+
"""
705+
if isinstance(publishable_entity_or_id, int):
706+
entity_id = PublishableEntity.PublishableEntityID(publishable_entity_or_id)
707+
else:
708+
entity_id = publishable_entity_or_id.id
709+
710+
# Fetch the PublishLogRecord for the requested PublishLog
711+
pub_record = (
712+
PublishLogRecord.objects
713+
.filter(entity_id=entity_id, publish_log__uuid=publish_log_uuid)
714+
.select_related("publish_log")
715+
.get()
716+
)
717+
published_at = pub_record.publish_log.published_at
718+
719+
# Find the previous publish for this entity to use as the lower time bound
720+
prev_pub_record = (
721+
PublishLogRecord.objects
722+
.filter(entity_id=entity_id, publish_log__published_at__lt=published_at)
723+
.select_related("publish_log")
724+
.order_by("-publish_log__published_at")
725+
.first()
726+
)
727+
prev_published_at = prev_pub_record.publish_log.published_at if prev_pub_record else None
728+
729+
# All draft changes up to (and including) this publish's timestamp
730+
draft_qs = (
731+
DraftChangeLogRecord.objects
732+
.filter(entity_id=entity_id, draft_change_log__changed_at__lte=published_at)
733+
.select_related(
734+
"draft_change_log__changed_by",
735+
"old_version",
736+
"new_version",
737+
)
738+
.order_by("-draft_change_log__changed_at")
739+
)
740+
# Exclude changes that belong to an earlier PublishLog's window
741+
if prev_published_at:
742+
draft_qs = draft_qs.filter(draft_change_log__changed_at__gt=prev_published_at)
743+
744+
# Find the baseline: the version that was published in the previous publish group
745+
# (None if this is the first publish for this entity).
746+
baseline_version_id = prev_pub_record.new_version_id if prev_pub_record else None
747+
748+
# If reset_drafts_to_published() was called within this publish window, there
749+
# will be a DraftChangeLogRecord where new_version == baseline. Use the most
750+
# recent such record as the new lower bound so discarded entries are excluded.
751+
reset_filter = {
752+
"entity_id": entity_id,
753+
"new_version_id": baseline_version_id,
754+
"draft_change_log__changed_at__lte": published_at,
755+
}
756+
if prev_published_at:
757+
reset_filter["draft_change_log__changed_at__gt"] = prev_published_at
758+
759+
last_reset_at = (
760+
DraftChangeLogRecord.objects
761+
.filter(**reset_filter)
762+
.order_by("-draft_change_log__changed_at")
763+
.values_list("draft_change_log__changed_at", flat=True)
764+
.first()
765+
)
766+
if last_reset_at:
767+
draft_qs = draft_qs.filter(draft_change_log__changed_at__gt=last_reset_at)
768+
769+
return draft_qs
770+
771+
772+
def get_entity_version_contributors(
773+
publishable_entity_or_id: PublishableEntity | int,
774+
/,
775+
old_version_num: int,
776+
new_version_num: int | None,
777+
) -> QuerySet:
778+
"""
779+
[ 🛑 UNSTABLE ]
780+
Return distinct User queryset of contributors (changed_by) for
781+
DraftChangeLogRecords of a PublishableEntity after old_version_num.
782+
783+
If new_version_num is not None (normal publish), captures records where
784+
new_version is between old_version_num (exclusive) and new_version_num (inclusive).
785+
786+
If new_version_num is None (soft delete published), captures both normal
787+
edits after old_version_num AND the soft-delete record itself (identified
788+
by new_version=None and old_version >= old_version_num). A soft-delete
789+
record whose old_version falls before old_version_num is excluded.
790+
791+
Edge cases:
792+
- If no DraftChangeLogRecords fall in the range, returns an empty queryset.
793+
- Records with changed_by=None (system changes with no associated user) are
794+
always excluded.
795+
- A user who contributed multiple versions in the range appears only once
796+
(results are deduplicated with DISTINCT).
797+
"""
798+
if isinstance(publishable_entity_or_id, int):
799+
entity_id = PublishableEntity.PublishableEntityID(publishable_entity_or_id)
800+
else:
801+
entity_id = publishable_entity_or_id.id
802+
803+
if new_version_num is not None:
804+
version_filter = Q(
805+
new_version__version_num__gt=old_version_num,
806+
new_version__version_num__lte=new_version_num,
807+
)
808+
else:
809+
# Soft delete: include edits after old_version_num + the soft-delete record
810+
version_filter = (
811+
Q(new_version__version_num__gt=old_version_num) |
812+
Q(new_version__isnull=True, old_version__version_num__gte=old_version_num)
813+
)
814+
815+
contributor_ids = (
816+
DraftChangeLogRecord.objects
817+
.filter(entity_id=entity_id)
818+
.filter(version_filter)
819+
.exclude(draft_change_log__changed_by=None)
820+
.values_list("draft_change_log__changed_by", flat=True)
821+
.distinct()
822+
)
823+
# Order by most recent contribution first. filter(pk__in=subquery) doesn't
824+
# preserve subquery ordering, so we annotate each user with their latest
825+
# changed_at via a correlated subquery and order on that. N (contributors
826+
# per publish event) is typically 1–5, so the per-row cost is negligible.
827+
last_contrib_subquery = (
828+
DraftChangeLogRecord.objects
829+
.filter(entity_id=entity_id, draft_change_log__changed_by=OuterRef("pk"))
830+
.filter(version_filter)
831+
.order_by("-draft_change_log__changed_at")
832+
.values("draft_change_log__changed_at")[:1]
833+
)
834+
return (
835+
get_user_model().objects
836+
.filter(pk__in=contributor_ids)
837+
.annotate(last_contributed=Subquery(last_contrib_subquery))
838+
.order_by("-last_contributed")
839+
)
840+
841+
569842
def set_draft_version(
570843
draft_or_id: Draft | PublishableEntity.ID,
571844
publishable_entity_version_pk: int | None,

0 commit comments

Comments
 (0)