Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
0473c0e
feat: #253 new branch to reduce noise; adding BE logic to implement c…
tbain Mar 18, 2026
2c0b959
feat: #253 Fixing API tests with regard to count logic changes
tbain Mar 23, 2026
8846805
Merge branch 'main' of https://github.com/openedx/openedx-core into t…
tbain Mar 25, 2026
b01964b
feat: #253 Resolving merge conflict with upstream main branch
tbain Mar 26, 2026
a23afe8
feat: #253 Fixing pylint issues
tbain Mar 26, 2026
3df68ab
feat: #253 Fixing pycodestyle issue
tbain Mar 26, 2026
435808c
feat: #253 Fixing pycodestyle issue
tbain Mar 26, 2026
457313b
feat: #253 Addressing first round Code review comments
tbain Mar 27, 2026
a14c56e
feat: #253 fixing count depth issue and updating appropriate unit tests
tbain Mar 27, 2026
2055a07
feat: #253 fixing spelling errors in comments
tbain Mar 27, 2026
939f18c
feat: #253 Fixing code review comments; fix incorrect unit test & fil…
tbain Mar 30, 2026
5762c33
feat: #253 adjusting comments per code review feedback
tbain Apr 1, 2026
79be83a
Merge branch 'main' of https://github.com/openedx/openedx-core into t…
tbain Apr 1, 2026
c2f79d2
feat: #253 fixing unit tests to work with upstream updates
tbain Apr 1, 2026
c017e8a
feat: #253 Changing usage_count to being in-mem/python based instead …
tbain Apr 3, 2026
7d42793
feat: #253 Fixing code quality pipeline issues
tbain Apr 3, 2026
1e47967
feat: #253 Moving usage_count logic out to API level, cleaning up/add…
tbain Apr 8, 2026
a87c877
Merge branch 'main' of https://github.com/openedx/openedx-core into t…
tbain Apr 8, 2026
bbc3638
Merge branch 'main' of https://github.com/openedx/openedx-core into t…
tbain Apr 13, 2026
155d03b
feat: #253 Addressing code review comments
tbain Apr 13, 2026
9e03f6b
Merge branch 'main' of https://github.com/openedx/openedx-core into t…
tbain Apr 13, 2026
184a469
Merge branch 'main' of https://github.com/openedx/openedx-core into t…
tbain Apr 13, 2026
0880c20
feat: #253 Addressing code review comments
tbain Apr 13, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/openedx_core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@
"""

# The version for the entire repository
__version__ = "0.39.1"
__version__ = "0.39.2"
51 changes: 48 additions & 3 deletions src/openedx_tagging/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
"""
from __future__ import annotations

from typing import Any
from collections import defaultdict
from typing import Any, Counter

from django.db import models, transaction
from django.db.models import F, QuerySet, Value
Expand Down Expand Up @@ -116,7 +117,6 @@ def search_tags(
taxonomy: Taxonomy,
search_term: str,
exclude_object_id: str | None = None,
include_counts: bool = False,
) -> TagDataQuerySet:
"""
Returns a list of all tags that contains `search_term` of the given
Expand All @@ -138,7 +138,6 @@ def search_tags(
qs = taxonomy.cast().get_filtered_tags(
search_term=search_term,
excluded_values=excluded_values,
include_counts=include_counts,
)
return qs

Expand Down Expand Up @@ -525,3 +524,49 @@ def unmark_copied_tags(object_id: str) -> None:
Update copied object tags on the given object to mark them as "not copied".
"""
ObjectTag.objects.filter(object_id=object_id).update(is_copied=False)


def add_usage_counts(taxonomy: Taxonomy, tag_data: TagDataQuerySet) -> TagDataQuerySet:
"""
Add usage counts to the query result.

Not a simple raw count of each tags usage. A tag can be directly
applied to an object, which can be a course, library, module,
or something else.

A tag can also be indirectly applied when some of its children
are applied to an object, it is considered automatically applied.
So, if the tags "Chemistry" and "Physics" are applied once
each to different objects, their parent tag "Natural Science" is
considered indirectly applied to 2 objects.

Deduplication: A tag can only be applied to a single object once.
So if two child tags are applied to the same object, e.g.
"Chemistry" and "Physics" are applied to the same course, the
parent tag, "Natural Science" is only applied to it once,
because no tag can be applied to the same object twice.

For performance reasons, we call this function with the list result of the
QuerySet so we can then add the counts in-memory rather than annotate to a
QuerySet which would require a very expensive annotation to join the
in-memory data to the original QuerySet.
"""

object_tags = taxonomy.objecttag_set.values_list("object_id", "tag__lineage")
tag_counts: Counter[str] = Counter()
object_tag_lineage_seen: defaultdict[str, set] = defaultdict(set)

for object_id, tag_lineage in object_tags:
# split the lineages to get a dict of {tag.value: [lineages]}
lineage_tags = list(tag_lineage.split('\t')) if tag_lineage else []
# de-duplicate based on if the lineage is already 'seen' per object
unseen_tags = [t for t in lineage_tags if t not in object_tag_lineage_seen[object_id]]

tag_counts.update(unseen_tags)
object_tag_lineage_seen[object_id].update(unseen_tags)

# In-memory 'annotation'; this is faster than using annotate() on the QuerySet.
for row in tag_data:
row["usage_count"] = tag_counts.get(row["value"], 0)

return tag_data
38 changes: 5 additions & 33 deletions src/openedx_tagging/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,6 @@ def get_filtered_tags( # pylint: disable=too-many-positional-arguments
depth: int | None = None,
parent_tag_value: str | None = None,
search_term: str | None = None,
include_counts: bool = False,
excluded_values: list[str] | None = None,
) -> TagDataQuerySet:
"""
Expand All @@ -451,7 +450,7 @@ def get_filtered_tags( # pylint: disable=too-many-positional-arguments
if self.allow_free_text:
if parent_tag_value is not None:
raise ValueError("Cannot specify a parent tag ID for free text taxonomies")
result = self._get_filtered_tags_free_text(search_term=search_term, include_counts=include_counts)
result = self._get_filtered_tags_free_text(search_term=search_term)
if excluded_values:
return result.exclude(value__in=excluded_values)
else:
Expand All @@ -460,7 +459,6 @@ def get_filtered_tags( # pylint: disable=too-many-positional-arguments
result = self._get_filtered_tags_one_level(
parent_tag_value=parent_tag_value,
search_term=search_term,
include_counts=include_counts,
)
if excluded_values:
return result.exclude(value__in=excluded_values)
Expand All @@ -470,7 +468,6 @@ def get_filtered_tags( # pylint: disable=too-many-positional-arguments
return self._get_filtered_tags_deep(
parent_tag_value=parent_tag_value,
search_term=search_term,
include_counts=include_counts,
excluded_values=excluded_values,
)
else:
Expand All @@ -479,7 +476,6 @@ def get_filtered_tags( # pylint: disable=too-many-positional-arguments
def _get_filtered_tags_free_text(
self,
search_term: str | None,
include_counts: bool,
) -> TagDataQuerySet:
"""
Implementation of get_filtered_tags() for free text taxonomies.
Expand All @@ -499,16 +495,13 @@ def _get_filtered_tags_free_text(
_id=Value(None, output_field=models.CharField()),
)
qs = qs.values("value", "child_count", "depth", "parent_value", "external_id", "_id").order_by("value")
if include_counts:
return qs.annotate(usage_count=models.Count("value"))
else:
return qs.distinct() # type: ignore[return-value]

return qs.distinct() # type: ignore[return-value]

def _get_filtered_tags_one_level(
self,
parent_tag_value: str | None,
search_term: str | None,
include_counts: bool,
) -> TagDataQuerySet:
"""
Implementation of get_filtered_tags() for closed taxonomies, where
Expand All @@ -531,24 +524,13 @@ def _get_filtered_tags_one_level(
qs = qs.annotate(_id=F("id")) # ID has an underscore to encourage use of 'value' rather than this internal ID
qs = qs.values("value", "child_count", "depth", "parent_value", "external_id", "_id")
qs = qs.order_by("value")
if include_counts:
# We need to include the count of how many times this tag is used to tag objects.
# You'd think we could just use:
# qs = qs.annotate(usage_count=models.Count("objecttag__pk"))
# but that adds another join which starts creating a cross product and the children and usage_count become
# intertwined and multiplied with each other. So we use a subquery.
obj_tags = ObjectTag.objects.filter(tag_id=models.OuterRef("pk")).order_by().annotate(
# We need to use Func() to get Count() without GROUP BY - see https://stackoverflow.com/a/69031027
count=models.Func(F('id'), function='Count')
)
qs = qs.annotate(usage_count=models.Subquery(obj_tags.values('count')))

return qs # type: ignore[return-value]

def _get_filtered_tags_deep(
self,
parent_tag_value: str | None,
search_term: str | None,
include_counts: bool,
excluded_values: list[str] | None,
) -> TagDataQuerySet:
"""
Expand Down Expand Up @@ -615,17 +597,7 @@ def _get_filtered_tags_deep(
# lineage is a case-insensitive column storing "Root\tParent\t...\tThisValue\t", so
# ordering by it gives the tree sort order that we want.
qs = qs.order_by("lineage")
if include_counts:
# Including the counts is a bit tricky; see the comment above in _get_filtered_tags_one_level()
obj_tags = (
ObjectTag.objects.filter(tag_id=models.OuterRef("pk"))
.order_by()
.annotate(
# We need to use Func() to get Count() without GROUP BY - see https://stackoverflow.com/a/69031027
count=models.Func(F("id"), function="Count")
)
)
qs = qs.annotate(usage_count=models.Subquery(obj_tags.values("count")))

return qs # type: ignore[return-value]

def add_tag(
Expand Down
17 changes: 15 additions & 2 deletions src/openedx_tagging/rest_api/v1/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from ...api import (
TagDoesNotExist,
add_tag_to_taxonomy,
add_usage_counts,
create_taxonomy,
delete_tags_from_taxonomy,
get_object_tag_counts,
Expand Down Expand Up @@ -844,21 +845,33 @@ def get_queryset(self) -> TagDataQuerySet:
parent_tag_value=parent_tag_value,
search_term=search_term,
depth=depth,
include_counts=include_counts,
)
if depth == 1:
# We're already returning just a single level. It will be paginated normally.
if include_counts:
results_with_counts = add_usage_counts(self.get_taxonomy(), results)
return results_with_counts

return results
elif full_depth_threshold and len(results) < full_depth_threshold:
# We can load and display all the tags in this (sub)tree at once:
self.pagination_class = DisabledTagsPagination
if include_counts:
results_with_counts = add_usage_counts(self.get_taxonomy(), results)
return results_with_counts

return results
else:
# We had to do a deep query, but we will only return one level of results.
# This is because the user did not request a deep response (via full_depth_threshold) or the result was too
# large (larger than the threshold).
# It will be paginated normally.
return results.filter(parent_value=parent_tag_value)
filtered_results = results.filter(parent_value=parent_tag_value)
if include_counts:
results_with_counts = add_usage_counts(self.get_taxonomy(), results)
return results_with_counts

return filtered_results

def post(self, request, *args, **kwargs):
"""
Expand Down
84 changes: 42 additions & 42 deletions tests/openedx_tagging/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -752,53 +752,53 @@ def get_object_tags():

@ddt.data(
("ChA", [
"Archaea (used: 1, children: 2)",
" Euryarchaeida (used: 0, children: 0)",
" Proteoarchaeota (used: 0, children: 0)",
"Bacteria (used: 0, children: 1)", # does not contain "cha" but a child does
" Archaebacteria (used: 1, children: 0)",
"Archaea (children: 2)",
" Euryarchaeida (children: 0)",
" Proteoarchaeota (children: 0)",
"Bacteria (children: 1)", # does not contain "cha" but a child does
" Archaebacteria (children: 0)",
]),
("ar", [
"Archaea (used: 1, children: 2)",
" Euryarchaeida (used: 0, children: 0)",
" Proteoarchaeota (used: 0, children: 0)",
"Bacteria (used: 0, children: 1)", # does not contain "ar" but a child does
" Archaebacteria (used: 1, children: 0)",
"Eukaryota (used: 0, children: 1)",
" Animalia (used: 1, children: 2)", # does not contain "ar" but a child does
" Arthropoda (used: 1, children: 0)",
" Cnidaria (used: 0, children: 0)",
"Archaea (children: 2)",
" Euryarchaeida (children: 0)",
" Proteoarchaeota (children: 0)",
"Bacteria (children: 1)", # does not contain "ar" but a child does
" Archaebacteria (children: 0)",
"Eukaryota (children: 1)",
" Animalia (children: 2)", # does not contain "ar" but a child does
" Arthropoda (children: 0)",
" Cnidaria (children: 0)",
]),
("aE", [
"Archaea (used: 1, children: 2)",
" Euryarchaeida (used: 0, children: 0)",
" Proteoarchaeota (used: 0, children: 0)",
"Bacteria (used: 0, children: 1)", # does not contain "ae" but a child does
" Archaebacteria (used: 1, children: 0)",
"Eukaryota (used: 0, children: 1)", # does not contain "ae" but a child does
" Plantae (used: 1, children: 0)",
"Archaea (children: 2)",
" Euryarchaeida (children: 0)",
" Proteoarchaeota (children: 0)",
"Bacteria (children: 1)", # does not contain "ae" but a child does
" Archaebacteria (children: 0)",
"Eukaryota (children: 1)", # does not contain "ae" but a child does
" Plantae (children: 0)",
]),
("a", [
"Archaea (used: 1, children: 3)",
" DPANN (used: 0, children: 0)",
" Euryarchaeida (used: 0, children: 0)",
" Proteoarchaeota (used: 0, children: 0)",
"Bacteria (used: 0, children: 2)",
" Archaebacteria (used: 1, children: 0)",
" Eubacteria (used: 0, children: 0)",
"Eukaryota (used: 0, children: 4)",
" Animalia (used: 1, children: 7)",
" Arthropoda (used: 1, children: 0)",
" Chordata (used: 0, children: 1)",
" Mammalia (used: 0, children: 0)",
" Cnidaria (used: 0, children: 0)",
" Ctenophora (used: 0, children: 0)",
" Gastrotrich (used: 1, children: 0)",
" Placozoa (used: 1, children: 0)",
" Porifera (used: 0, children: 0)",
" Monera (used: 1, children: 0)",
" Plantae (used: 1, children: 0)",
" Protista (used: 0, children: 0)",
"Archaea (children: 3)",
" DPANN (children: 0)",
" Euryarchaeida (children: 0)",
" Proteoarchaeota (children: 0)",
"Bacteria (children: 2)",
" Archaebacteria (children: 0)",
" Eubacteria (children: 0)",
"Eukaryota (children: 4)",
" Animalia (children: 7)",
" Arthropoda (children: 0)",
" Chordata (children: 1)",
" Mammalia (children: 0)",
" Cnidaria (children: 0)",
" Ctenophora (children: 0)",
" Gastrotrich (children: 0)",
" Placozoa (children: 0)",
" Porifera (children: 0)",
" Monera (children: 0)",
" Plantae (children: 0)",
" Protista (children: 0)",
]),
)
@ddt.unpack
Expand All @@ -817,7 +817,7 @@ def test_autocomplete_tags_closed(self, search: str, expected: list[str]) -> Non
_value=value,
).save()

result = tagging_api.search_tags(closed_taxonomy, search, include_counts=True)
result = tagging_api.search_tags(closed_taxonomy, search)
assert pretty_format_tags(result, parent=False) == expected

def test_autocomplete_tags_closed_omit_object(self) -> None:
Expand Down
Loading