Skip to content

Commit 4c4b876

Browse files
feat!: deprecate "descendant_count" to improve performance
It can be relatively easily calculated in python using the result data if needed, now that the API otherwise supports unlimited depth.
1 parent 02908db commit 4c4b876

8 files changed

Lines changed: 57 additions & 123 deletions

File tree

src/openedx_tagging/data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ class TagData(TypedDict):
2121
value: str
2222
external_id: str | None
2323
child_count: int
24-
descendant_count: int
24+
descendant_count: int # Deprecated; do not use.
2525
depth: int
2626
parent_value: str | None
2727
# Note: usage_count may or may not be present, depending on the request.

src/openedx_tagging/models/base.py

Lines changed: 5 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -524,14 +524,8 @@ def _get_filtered_tags_one_level(
524524
qs = self.tag_set.filter(parent=None)
525525
qs = qs.annotate(parent_value=Value(None, output_field=models.CharField()))
526526
qs = qs.annotate(child_count=models.Count("children", distinct=True)) # type: ignore[no-redef]
527-
# Count all descendants at any depth using depth + lineage prefix.
528-
# depth__gt correctly excludes self; lineage prefix matches all descendants.
529-
descendants_sq = (
530-
self.tag_set.filter(depth__gt=models.OuterRef("depth"), lineage__startswith=models.OuterRef("lineage"))
531-
.order_by()
532-
.annotate(count=models.Func(F("id"), function="Count"))
533-
)
534-
qs = qs.annotate(descendant_count=models.Subquery(descendants_sq.values("count"))) # type: ignore[no-redef]
527+
# Add the deprecated "descendant_count field". For now it's just the same as child_count.
528+
qs = qs.annotate(descendant_count=F("child_count"))
535529
# Filter by search term:
536530
if search_term:
537531
qs = qs.filter(value__icontains=search_term)
@@ -562,11 +556,6 @@ def _get_filtered_tags_deep(
562556
Implementation of get_filtered_tags() for closed taxonomies, where
563557
we're including tags from multiple levels of the hierarchy.
564558
"""
565-
# Note: we ignore a lot of "no-redef" warnings here because we use annotations to pre-load fields like
566-
# `child_count`, and `descendant_count` for all tags in a single query rather than computing them later for each
567-
# Tag, with additional queries. Also, we are converting the result to a values query (that returns a dict), not
568-
# returning actual Tag objects at the end, but mypy doesn't know that.
569-
570559
if parent_tag_value:
571560
# Get a subtree below this tag:
572561
main_parent_tag = self.tag_for_value(parent_tag_value)
@@ -611,28 +600,14 @@ def _get_filtered_tags_deep(
611600
# frontend.
612601

613602
# Count the direct children, and annotate the result on each row as "child_count".
614-
# The query below produces the same results as:
615-
# qs = initial_qs.annotate(child_count=models.Count("children"))
616-
# However, this correlated subquery avoids a JOIN + GROUP BY, and is far more efficient in practice.
617-
# This also lets us use the same code path whether there's a search_term or not.
618603
child_count_sq = (
619604
initial_qs.filter(parent_id=models.OuterRef("pk"))
620605
.order_by()
621606
.annotate(count=models.Func(F("id"), function="Count"))
622607
)
623-
# Count all descendants at any depth using the lineage prefix trick.
624-
descendants_sq = (
625-
initial_qs.filter(
626-
depth__gt=models.OuterRef("depth"),
627-
lineage__startswith=models.OuterRef("lineage"),
628-
)
629-
.order_by()
630-
.annotate(count=models.Func(F("id"), function="Count"))
631-
)
632-
qs = initial_qs.annotate( # type: ignore[no-redef]
633-
child_count=models.Subquery(child_count_sq.values("count")),
634-
descendant_count=models.Subquery(descendants_sq.values("count")),
635-
)
608+
qs = initial_qs.annotate(child_count=models.Subquery(child_count_sq.values("count")))
609+
# Add the deprecated "descendant_count" field. For now it just is the same as child_count.
610+
qs = qs.annotate(descendant_count=F("child_count"))
636611

637612
# Add the parent value
638613
qs = qs.annotate(parent_value=F("parent__value"))

src/openedx_tagging/rest_api/v1/serializers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ class TagDataSerializer(UserPermissionsSerializerMixin, serializers.Serializer):
226226
value = serializers.CharField()
227227
external_id = serializers.CharField(allow_null=True)
228228
child_count = serializers.IntegerField()
229-
descendant_count = serializers.IntegerField()
229+
descendant_count = serializers.IntegerField() # deprecated
230230
depth = serializers.IntegerField()
231231
parent_value = serializers.CharField(allow_null=True)
232232
usage_count = serializers.IntegerField(required=False)

tests/openedx_tagging/import_export/test_template.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,25 +53,25 @@ def test_import_template(self, template_file, parser_format):
5353
'Electronic instruments (ELECTRIC) (None) (children: 2)',
5454
' Synthesizer (SYNTH) (Electronic instruments) (children: 0)',
5555
' Theramin (THERAMIN) (Electronic instruments) (children: 0)',
56-
'Percussion instruments (PERCUSS) (None) (children: 3 + 6)',
56+
'Percussion instruments (PERCUSS) (None) (children: 3)',
5757
' Chordophone (CHORD) (Percussion instruments) (children: 1)',
5858
' Piano (PIANO) (Chordophone) (children: 0)',
5959
' Idiophone (BELLS) (Percussion instruments) (children: 2)',
6060
' Celesta (CELESTA) (Idiophone) (children: 0)',
6161
' Hi-hat (HI-HAT) (Idiophone) (children: 0)',
62-
' Membranophone (DRUMS) (Percussion instruments) (children: 2 + 1)',
62+
' Membranophone (DRUMS) (Percussion instruments) (children: 2)',
6363
' Cajón (CAJÓN) (Membranophone) (children: 1)',
6464
' Pyle Stringed Jam Cajón (PYLE) (Cajón) (children: 0)',
6565
' Tabla (TABLA) (Membranophone) (children: 0)',
66-
'String instruments (STRINGS) (None) (children: 2 + 5)',
66+
'String instruments (STRINGS) (None) (children: 2)',
6767
' Bowed strings (BOW) (String instruments) (children: 2)',
6868
' Cello (CELLO) (Bowed strings) (children: 0)',
6969
' Violin (VIOLIN) (Bowed strings) (children: 0)',
7070
' Plucked strings (PLUCK) (String instruments) (children: 3)',
7171
' Banjo (BANJO) (Plucked strings) (children: 0)',
7272
' Harp (HARP) (Plucked strings) (children: 0)',
7373
' Mandolin (MANDOLIN) (Plucked strings) (children: 0)',
74-
'Wind instruments (WINDS) (None) (children: 2 + 5)',
74+
'Wind instruments (WINDS) (None) (children: 2)',
7575
' Brass (BRASS) (Wind instruments) (children: 2)',
7676
' Trumpet (TRUMPET) (Brass) (children: 0)',
7777
' Tuba (TUBA) (Brass) (children: 0)',

tests/openedx_tagging/test_api.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,8 @@ def test_get_tags(self) -> None:
145145
"Bacteria (children: 2)",
146146
" Archaebacteria (children: 0)",
147147
" Eubacteria (children: 0)",
148-
"Eukaryota (children: 5 + 8)",
149-
" Animalia (children: 7 + 1)",
148+
"Eukaryota (children: 5)",
149+
" Animalia (children: 7)",
150150
" Arthropoda (children: 0)",
151151
" Chordata (children: 1)",
152152
" Mammalia (children: 0)",
@@ -175,7 +175,7 @@ def test_get_root_tags(self):
175175
assert pretty_format_tags(root_life_on_earth_tags, parent=False) == [
176176
'Archaea (children: 3)',
177177
'Bacteria (children: 2)',
178-
'Eukaryota (children: 5 + 8)',
178+
'Eukaryota (children: 5)',
179179
]
180180

181181
@override_settings(LANGUAGES=test_languages)
@@ -764,7 +764,7 @@ def get_object_tags():
764764
" Proteoarchaeota (used: 0, children: 0)",
765765
"Bacteria (used: 0, children: 1)", # does not contain "ar" but a child does
766766
" Archaebacteria (used: 1, children: 0)",
767-
"Eukaryota (used: 0, children: 1 + 2)",
767+
"Eukaryota (used: 0, children: 1)",
768768
" Animalia (used: 1, children: 2)", # does not contain "ar" but a child does
769769
" Arthropoda (used: 1, children: 0)",
770770
" Cnidaria (used: 0, children: 0)",
@@ -786,8 +786,8 @@ def get_object_tags():
786786
"Bacteria (used: 0, children: 2)",
787787
" Archaebacteria (used: 1, children: 0)",
788788
" Eubacteria (used: 0, children: 0)",
789-
"Eukaryota (used: 0, children: 4 + 8)",
790-
" Animalia (used: 1, children: 7 + 1)",
789+
"Eukaryota (used: 0, children: 4)",
790+
" Animalia (used: 1, children: 7)",
791791
" Arthropoda (used: 1, children: 0)",
792792
" Chordata (used: 0, children: 1)",
793793
" Mammalia (used: 0, children: 0)",
@@ -1107,10 +1107,10 @@ def test_deep_taxonomy(self) -> None:
11071107
assert pretty_format_tags(
11081108
tagging_api.get_tags(taxonomy)
11091109
) == [
1110-
"Root - depth 0 (None) (children: 1 + 4)",
1111-
" Child - depth 1 (Root - depth 0) (children: 1 + 3)",
1112-
" Grandchild - depth 2 (Child - depth 1) (children: 1 + 2)",
1113-
" Great-Grandchild - depth 3 (Grandchild - depth 2) (children: 1 + 1)",
1110+
"Root - depth 0 (None) (children: 1)",
1111+
" Child - depth 1 (Root - depth 0) (children: 1)",
1112+
" Grandchild - depth 2 (Child - depth 1) (children: 1)",
1113+
" Great-Grandchild - depth 3 (Grandchild - depth 2) (children: 1)",
11141114
" Great-Great-Grandchild - depth 4 (Great-Grandchild - depth 3) (children: 1)",
11151115
" Great-Great-Great-Grandchild - depth 5 (Great-Great-Grandchild - depth 4) (children: 0)",
11161116
]
@@ -1128,7 +1128,7 @@ def test_deep_taxonomy(self) -> None:
11281128
# Or even load a subtree:
11291129
deep_result = taxonomy.get_filtered_tags(depth=None, parent_tag_value=tag_2.value)
11301130
assert pretty_format_tags(deep_result, parent=False) == [
1131-
' Great-Grandchild - depth 3 (children: 1 + 1)',
1131+
' Great-Grandchild - depth 3 (children: 1)',
11321132
' Great-Great-Grandchild - depth 4 (children: 1)',
11331133
' Great-Great-Great-Grandchild - depth 5 (children: 0)',
11341134
]

tests/openedx_tagging/test_models.py

Lines changed: 8 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ def test_get_root(self) -> None:
331331
# These are the root tags, in alphabetical order:
332332
{"value": "Archaea", "child_count": 3, "descendant_count": 3, **common_fields},
333333
{"value": "Bacteria", "child_count": 2, "descendant_count": 2, **common_fields},
334-
{"value": "Eukaryota", "child_count": 5, "descendant_count": 13, **common_fields},
334+
{"value": "Eukaryota", "child_count": 5, "descendant_count": 5, **common_fields},
335335
]
336336

337337
def test_get_child_tags_one_level(self) -> None:
@@ -345,7 +345,7 @@ def test_get_child_tags_one_level(self) -> None:
345345
del r["_id"] # Remove the internal database IDs; they aren't interesting here and a other tests check them
346346
assert result == [
347347
# These are the Eukaryota tags, in alphabetical order:
348-
{"value": "Animalia", "child_count": 7, "descendant_count": 8, **common_fields},
348+
{"value": "Animalia", "child_count": 7, "descendant_count": 7, **common_fields},
349349
{"value": "Fungi", "child_count": 0, "descendant_count": 0, **common_fields},
350350
{"value": "Monera", "child_count": 0, "descendant_count": 0, **common_fields},
351351
{"value": "Plantae", "child_count": 0, "descendant_count": 0, **common_fields},
@@ -441,8 +441,8 @@ def test_get_all(self) -> None:
441441
"Bacteria (None) (children: 2)",
442442
" Archaebacteria (Bacteria) (children: 0)",
443443
" Eubacteria (Bacteria) (children: 0)",
444-
"Eukaryota (None) (children: 5 + 8)",
445-
" Animalia (Eukaryota) (children: 7 + 1)",
444+
"Eukaryota (None) (children: 5)",
445+
" Animalia (Eukaryota) (children: 7)",
446446
" Arthropoda (Animalia) (children: 0)",
447447
" Chordata (Animalia) (children: 1)",
448448
" Mammalia (Chordata) (children: 0)",
@@ -478,7 +478,7 @@ def test_search_2(self) -> None:
478478
"""
479479
result = pretty_format_tags(self.taxonomy.get_filtered_tags(search_term="chordata"))
480480
assert result == [
481-
"Eukaryota (None) (children: 1 + 1)", # Has one child that matches, plus one additional matching descendant
481+
"Eukaryota (None) (children: 1)", # Has one child that matches
482482
" Animalia (Eukaryota) (children: 1)",
483483
" Chordata (Animalia) (children: 0)", # this is the matching tag.
484484
]
@@ -492,8 +492,8 @@ def test_search_3(self) -> None:
492492
assert result == [
493493
"Archaea (None) (children: 1)",
494494
" Proteoarchaeota (Archaea) (children: 0)",
495-
"Eukaryota (None) (children: 2 + 2)", # 2 direct matching children, 2 additional matching descendants
496-
" Animalia (Eukaryota) (children: 2)",
495+
"Eukaryota (None) (children: 2)", # 2 direct matching children
496+
" Animalia (Eukaryota) (children: 2)", # also 2 matching children
497497
" Arthropoda (Animalia) (children: 0)", # match
498498
" Gastrotrich (Animalia) (children: 0)", # match
499499
" Protista (Eukaryota) (children: 0)", # match
@@ -575,7 +575,7 @@ def test_tree_sort(self) -> None:
575575
taxonomy = self.create_sort_test_taxonomy()
576576
result = pretty_format_tags(taxonomy.get_filtered_tags())
577577
assert result == [
578-
"1 (None) (children: 4 + 1)",
578+
"1 (None) (children: 4)",
579579
" 1 A (1) (children: 0)",
580580
" 11 (1) (children: 0)",
581581
" 11111 (1) (children: 1)",
@@ -595,43 +595,6 @@ def test_tree_sort(self) -> None:
595595
" azure (ALPHABET) (children: 0)",
596596
]
597597

598-
def test_descendant_counts(self) -> None:
599-
"""
600-
Test getting the descendant count on a taxonomy known to cause aggregation
601-
bugs unless the aggregations are correctly specified with distinct=True
602-
603-
https://docs.djangoproject.com/en/5.0/topics/db/aggregation/#combining-multiple-aggregations
604-
"""
605-
taxonomy = api.create_taxonomy("ESDC Subset")
606-
api.add_tag_to_taxonomy(taxonomy, "Interests") # root tag
607-
api.add_tag_to_taxonomy(taxonomy, "Holland Codes", parent_tag_value="Interests") # child tag
608-
# Create the grandchild tag:
609-
g_tag = api.add_tag_to_taxonomy(taxonomy, "Interests - Holland Codes", parent_tag_value="Holland Codes")
610-
# Create the 6 great-grandchild tags:
611-
api.add_tag_to_taxonomy(taxonomy, "Artistic", parent_tag_value=g_tag.value)
612-
api.add_tag_to_taxonomy(taxonomy, "Conventional", parent_tag_value=g_tag.value)
613-
api.add_tag_to_taxonomy(taxonomy, "Enterprising", parent_tag_value=g_tag.value)
614-
api.add_tag_to_taxonomy(taxonomy, "Investigative", parent_tag_value=g_tag.value)
615-
api.add_tag_to_taxonomy(taxonomy, "Realistic", parent_tag_value=g_tag.value)
616-
api.add_tag_to_taxonomy(taxonomy, "Social", parent_tag_value=g_tag.value)
617-
618-
result = pretty_format_tags(taxonomy.get_filtered_tags(depth=1, include_counts=True))
619-
assert result == [
620-
"Interests (None) (used: 0, children: 1 + 7)", # 1 child + (1 grandchild and 6 great grandchild tags)
621-
]
622-
result2 = pretty_format_tags(taxonomy.get_filtered_tags(depth=None, include_counts=True))
623-
assert result2 == [
624-
"Interests (None) (used: 0, children: 1 + 7)",
625-
" Holland Codes (Interests) (used: 0, children: 1 + 6)",
626-
" Interests - Holland Codes (Holland Codes) (used: 0, children: 6)",
627-
" Artistic (Interests - Holland Codes) (used: 0, children: 0)",
628-
" Conventional (Interests - Holland Codes) (used: 0, children: 0)",
629-
" Enterprising (Interests - Holland Codes) (used: 0, children: 0)",
630-
" Investigative (Interests - Holland Codes) (used: 0, children: 0)",
631-
" Realistic (Interests - Holland Codes) (used: 0, children: 0)",
632-
" Social (Interests - Holland Codes) (used: 0, children: 0)",
633-
]
634-
635598

636599
class TestFilteredTagsFreeTextTaxonomy(TestCase):
637600
"""

0 commit comments

Comments
 (0)