openedx · bradenmacdonald · Apr 13, 2026 · Mar 18, 2026 · Mar 23, 2026 · Mar 25, 2026
diff --git a/src/openedx_core/__init__.py b/src/openedx_core/__init__.py
@@ -6,4 +6,4 @@
 """
 
 # The version for the entire repository
-__version__ = "0.39.1"
+__version__ = "0.39.2"
diff --git a/src/openedx_tagging/api.py b/src/openedx_tagging/api.py
@@ -12,7 +12,8 @@
 """
 from __future__ import annotations
 
-from typing import Any
+from collections import defaultdict
+from typing import Any, Counter
 
 from django.db import models, transaction
 from django.db.models import F, QuerySet, Value
@@ -116,7 +117,6 @@ def search_tags(
     taxonomy: Taxonomy,
     search_term: str,
     exclude_object_id: str | None = None,
-    include_counts: bool = False,
 ) -> TagDataQuerySet:
     """
     Returns a list of all tags that contains `search_term` of the given
@@ -138,7 +138,6 @@ def search_tags(
     qs = taxonomy.cast().get_filtered_tags(
         search_term=search_term,
         excluded_values=excluded_values,
-        include_counts=include_counts,
     )
     return qs
 
@@ -525,3 +524,49 @@ def unmark_copied_tags(object_id: str) -> None:
     Update copied object tags on the given object to mark them as "not copied".
     """
     ObjectTag.objects.filter(object_id=object_id).update(is_copied=False)
+
+
+def add_usage_counts(taxonomy: Taxonomy, tag_data: TagDataQuerySet) -> TagDataQuerySet:
+    """
+    Add usage counts to the query result.
+
+    Not a simple raw count of each tags usage. A tag can be directly
+    applied to an object, which can be a course, library, module,
+    or something else.
+
+    A tag can also be indirectly applied when some of its children
+    are applied to an object, it is considered automatically applied.
+    So, if the tags "Chemistry" and "Physics" are applied once
+    each to different objects, their parent tag "Natural Science" is
+    considered indirectly applied to 2 objects.
+
+    Deduplication: A tag can only be applied to a single object once.
+    So if two child tags are applied to the same object, e.g.
+    "Chemistry" and "Physics" are applied to the same course, the
+    parent tag, "Natural Science" is only applied to it once,
+    because no tag can be applied to the same object twice.
+
+    For performance reasons, we call this function with the list result of the
+    QuerySet so we can then add the counts in-memory rather than annotate to a
+    QuerySet which would require a very expensive annotation to join the
+    in-memory data to the original QuerySet.
+    """
+
+    object_tags = taxonomy.objecttag_set.values_list("object_id", "tag__lineage")
+    tag_counts: Counter[str] = Counter()
+    object_tag_lineage_seen: defaultdict[str, set] = defaultdict(set)
+
+    for object_id, tag_lineage in object_tags:
+        # split the lineages to get a dict of {tag.value: [lineages]}
+        lineage_tags = list(tag_lineage.split('\t')) if tag_lineage else []
+        # de-duplicate based on if the lineage is already 'seen' per object
+        unseen_tags = [t for t in lineage_tags if t not in object_tag_lineage_seen[object_id]]
+
+        tag_counts.update(unseen_tags)
+        object_tag_lineage_seen[object_id].update(unseen_tags)
+
+    # In-memory 'annotation'; this is faster than using annotate() on the QuerySet.
+    for row in tag_data:
+        row["usage_count"] = tag_counts.get(row["value"], 0)
+
+    return tag_data
diff --git a/src/openedx_tagging/models/base.py b/src/openedx_tagging/models/base.py
@@ -426,7 +426,6 @@ def get_filtered_tags(  # pylint: disable=too-many-positional-arguments
         depth: int | None = None,
         parent_tag_value: str | None = None,
         search_term: str | None = None,
-        include_counts: bool = False,
         excluded_values: list[str] | None = None,
     ) -> TagDataQuerySet:
         """
@@ -451,7 +450,7 @@ def get_filtered_tags(  # pylint: disable=too-many-positional-arguments
         if self.allow_free_text:
             if parent_tag_value is not None:
                 raise ValueError("Cannot specify a parent tag ID for free text taxonomies")
-            result = self._get_filtered_tags_free_text(search_term=search_term, include_counts=include_counts)
+            result = self._get_filtered_tags_free_text(search_term=search_term)
             if excluded_values:
                 return result.exclude(value__in=excluded_values)
             else:
@@ -460,7 +459,6 @@ def get_filtered_tags(  # pylint: disable=too-many-positional-arguments
             result = self._get_filtered_tags_one_level(
                 parent_tag_value=parent_tag_value,
                 search_term=search_term,
-                include_counts=include_counts,
             )
             if excluded_values:
                 return result.exclude(value__in=excluded_values)
@@ -470,7 +468,6 @@ def get_filtered_tags(  # pylint: disable=too-many-positional-arguments
             return self._get_filtered_tags_deep(
                 parent_tag_value=parent_tag_value,
                 search_term=search_term,
-                include_counts=include_counts,
                 excluded_values=excluded_values,
             )
         else:
@@ -479,7 +476,6 @@ def get_filtered_tags(  # pylint: disable=too-many-positional-arguments
     def _get_filtered_tags_free_text(
         self,
         search_term: str | None,
-        include_counts: bool,
     ) -> TagDataQuerySet:
         """
         Implementation of get_filtered_tags() for free text taxonomies.
@@ -499,16 +495,13 @@ def _get_filtered_tags_free_text(
             _id=Value(None, output_field=models.CharField()),
         )
         qs = qs.values("value", "child_count", "depth", "parent_value", "external_id", "_id").order_by("value")
-        if include_counts:
-            return qs.annotate(usage_count=models.Count("value"))
-        else:
-            return qs.distinct()  # type: ignore[return-value]
+
+        return qs.distinct()  # type: ignore[return-value]
 
     def _get_filtered_tags_one_level(
         self,
         parent_tag_value: str | None,
         search_term: str | None,
-        include_counts: bool,
     ) -> TagDataQuerySet:
         """
         Implementation of get_filtered_tags() for closed taxonomies, where
@@ -531,24 +524,13 @@ def _get_filtered_tags_one_level(
         qs = qs.annotate(_id=F("id"))  # ID has an underscore to encourage use of 'value' rather than this internal ID
         qs = qs.values("value", "child_count", "depth", "parent_value", "external_id", "_id")
         qs = qs.order_by("value")
-        if include_counts:
-            # We need to include the count of how many times this tag is used to tag objects.
-            # You'd think we could just use:
-            #     qs = qs.annotate(usage_count=models.Count("objecttag__pk"))
-            # but that adds another join which starts creating a cross product and the children and usage_count become
-            # intertwined and multiplied with each other. So we use a subquery.
-            obj_tags = ObjectTag.objects.filter(tag_id=models.OuterRef("pk")).order_by().annotate(
-                # We need to use Func() to get Count() without GROUP BY - see https://stackoverflow.com/a/69031027
-                count=models.Func(F('id'), function='Count')
-            )
-            qs = qs.annotate(usage_count=models.Subquery(obj_tags.values('count')))
+
         return qs  # type: ignore[return-value]
 
     def _get_filtered_tags_deep(
         self,
         parent_tag_value: str | None,
         search_term: str | None,
-        include_counts: bool,
         excluded_values: list[str] | None,
     ) -> TagDataQuerySet:
         """
@@ -615,17 +597,7 @@ def _get_filtered_tags_deep(
         # lineage is a case-insensitive column storing "Root\tParent\t...\tThisValue\t", so
         # ordering by it gives the tree sort order that we want.
         qs = qs.order_by("lineage")
-        if include_counts:
-            # Including the counts is a bit tricky; see the comment above in _get_filtered_tags_one_level()
-            obj_tags = (
-                ObjectTag.objects.filter(tag_id=models.OuterRef("pk"))
-                .order_by()
-                .annotate(
-                    # We need to use Func() to get Count() without GROUP BY - see https://stackoverflow.com/a/69031027
-                    count=models.Func(F("id"), function="Count")
-                )
-            )
-            qs = qs.annotate(usage_count=models.Subquery(obj_tags.values("count")))
+
         return qs  # type: ignore[return-value]
 
     def add_tag(

diff --git a/src/openedx_tagging/rest_api/v1/views.py b/src/openedx_tagging/rest_api/v1/views.py
@@ -17,6 +17,7 @@
 from ...api import (
     TagDoesNotExist,
     add_tag_to_taxonomy,
+    add_usage_counts,
     create_taxonomy,
     delete_tags_from_taxonomy,
     get_object_tag_counts,
@@ -844,21 +845,33 @@ def get_queryset(self) -> TagDataQuerySet:
             parent_tag_value=parent_tag_value,
             search_term=search_term,
             depth=depth,
-            include_counts=include_counts,
         )
         if depth == 1:
             # We're already returning just a single level. It will be paginated normally.
+            if include_counts:
+                results_with_counts = add_usage_counts(self.get_taxonomy(), results)
+                return results_with_counts
+
             return results
         elif full_depth_threshold and len(results) < full_depth_threshold:
             # We can load and display all the tags in this (sub)tree at once:
             self.pagination_class = DisabledTagsPagination
+            if include_counts:
+                results_with_counts = add_usage_counts(self.get_taxonomy(), results)
+                return results_with_counts
+
             return results
         else:
             # We had to do a deep query, but we will only return one level of results.
             # This is because the user did not request a deep response (via full_depth_threshold) or the result was too
             # large (larger than the threshold).
             # It will be paginated normally.
-            return results.filter(parent_value=parent_tag_value)
+            filtered_results = results.filter(parent_value=parent_tag_value)
+            if include_counts:
+                results_with_counts = add_usage_counts(self.get_taxonomy(), results)
+                return results_with_counts
+
+            return filtered_results
 
     def post(self, request, *args, **kwargs):
         """

diff --git a/tests/openedx_tagging/test_api.py b/tests/openedx_tagging/test_api.py
@@ -752,53 +752,53 @@ def get_object_tags():
 
     @ddt.data(
         ("ChA", [
-            "Archaea (used: 1, children: 2)",
-            "  Euryarchaeida (used: 0, children: 0)",
-            "  Proteoarchaeota (used: 0, children: 0)",
-            "Bacteria (used: 0, children: 1)",  # does not contain "cha" but a child does
-            "  Archaebacteria (used: 1, children: 0)",
+            "Archaea (children: 2)",
+            "  Euryarchaeida (children: 0)",
+            "  Proteoarchaeota (children: 0)",
+            "Bacteria (children: 1)",  # does not contain "cha" but a child does
+            "  Archaebacteria (children: 0)",
         ]),
         ("ar", [
-            "Archaea (used: 1, children: 2)",
-            "  Euryarchaeida (used: 0, children: 0)",
-            "  Proteoarchaeota (used: 0, children: 0)",
-            "Bacteria (used: 0, children: 1)",  # does not contain "ar" but a child does
-            "  Archaebacteria (used: 1, children: 0)",
-            "Eukaryota (used: 0, children: 1)",
-            "  Animalia (used: 1, children: 2)",  # does not contain "ar" but a child does
-            "    Arthropoda (used: 1, children: 0)",
-            "    Cnidaria (used: 0, children: 0)",
+            "Archaea (children: 2)",
+            "  Euryarchaeida (children: 0)",
+            "  Proteoarchaeota (children: 0)",
+            "Bacteria (children: 1)",  # does not contain "ar" but a child does
+            "  Archaebacteria (children: 0)",
+            "Eukaryota (children: 1)",
+            "  Animalia (children: 2)",  # does not contain "ar" but a child does
+            "    Arthropoda (children: 0)",
+            "    Cnidaria (children: 0)",
         ]),
         ("aE", [
-            "Archaea (used: 1, children: 2)",
-            "  Euryarchaeida (used: 0, children: 0)",
-            "  Proteoarchaeota (used: 0, children: 0)",
-            "Bacteria (used: 0, children: 1)",  # does not contain "ae" but a child does
-            "  Archaebacteria (used: 1, children: 0)",
-            "Eukaryota (used: 0, children: 1)",  # does not contain "ae" but a child does
-            "  Plantae (used: 1, children: 0)",
+            "Archaea (children: 2)",
+            "  Euryarchaeida (children: 0)",
+            "  Proteoarchaeota (children: 0)",
+            "Bacteria (children: 1)",  # does not contain "ae" but a child does
+            "  Archaebacteria (children: 0)",
+            "Eukaryota (children: 1)",  # does not contain "ae" but a child does
+            "  Plantae (children: 0)",
         ]),
         ("a", [
-            "Archaea (used: 1, children: 3)",
-            "  DPANN (used: 0, children: 0)",
-            "  Euryarchaeida (used: 0, children: 0)",
-            "  Proteoarchaeota (used: 0, children: 0)",
-            "Bacteria (used: 0, children: 2)",
-            "  Archaebacteria (used: 1, children: 0)",
-            "  Eubacteria (used: 0, children: 0)",
-            "Eukaryota (used: 0, children: 4)",
-            "  Animalia (used: 1, children: 7)",
-            "    Arthropoda (used: 1, children: 0)",
-            "    Chordata (used: 0, children: 1)",
-            "      Mammalia (used: 0, children: 0)",
-            "    Cnidaria (used: 0, children: 0)",
-            "    Ctenophora (used: 0, children: 0)",
-            "    Gastrotrich (used: 1, children: 0)",
-            "    Placozoa (used: 1, children: 0)",
-            "    Porifera (used: 0, children: 0)",
-            "  Monera (used: 1, children: 0)",
-            "  Plantae (used: 1, children: 0)",
-            "  Protista (used: 0, children: 0)",
+            "Archaea (children: 3)",
+            "  DPANN (children: 0)",
+            "  Euryarchaeida (children: 0)",
+            "  Proteoarchaeota (children: 0)",
+            "Bacteria (children: 2)",
+            "  Archaebacteria (children: 0)",
+            "  Eubacteria (children: 0)",
+            "Eukaryota (children: 4)",
+            "  Animalia (children: 7)",
+            "    Arthropoda (children: 0)",
+            "    Chordata (children: 1)",
+            "      Mammalia (children: 0)",
+            "    Cnidaria (children: 0)",
+            "    Ctenophora (children: 0)",
+            "    Gastrotrich (children: 0)",
+            "    Placozoa (children: 0)",
+            "    Porifera (children: 0)",
+            "  Monera (children: 0)",
+            "  Plantae (children: 0)",
+            "  Protista (children: 0)",
         ]),
     )
     @ddt.unpack
@@ -817,7 +817,7 @@ def test_autocomplete_tags_closed(self, search: str, expected: list[str]) -> Non
                 _value=value,
             ).save()
 
-        result = tagging_api.search_tags(closed_taxonomy, search, include_counts=True)
+        result = tagging_api.search_tags(closed_taxonomy, search)
         assert pretty_format_tags(result, parent=False) == expected
 
     def test_autocomplete_tags_closed_omit_object(self) -> None: