Skip to content
This repository was archived by the owner on May 5, 2025. It is now read-only.

Commit 868862f

Browse files
authored
Allow unrolling subqueries in cleanup task (#1156)
1 parent 66faf91 commit 868862f

4 files changed

Lines changed: 133 additions & 9 deletions

File tree

services/cleanup/models.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,12 @@
99
from shared.django_apps.compare.models import CommitComparison
1010
from shared.django_apps.core.models import Commit, Pull, Repository
1111
from shared.django_apps.profiling.models import ProfilingUpload
12-
from shared.django_apps.reports.models import CommitReport, ReportDetails
12+
from shared.django_apps.reports.models import (
13+
CommitReport,
14+
DailyTestRollup,
15+
ReportDetails,
16+
TestInstance,
17+
)
1318
from shared.django_apps.reports.models import ReportSession as Upload
1419
from shared.django_apps.staticanalysis.models import StaticAnalysisSingleFileSnapshot
1520
from shared.django_apps.timeseries.models import Dataset, Measurement
@@ -18,6 +23,7 @@
1823
from shared.utils.sessions import SessionType
1924

2025
from services.archive import ArchiveService, MinioEndpoints
26+
from services.cleanup.relations import reverse_filter
2127
from services.cleanup.utils import CleanupContext, CleanupResult
2228

2329
MANUAL_QUERY_CHUNKSIZE = 1_000
@@ -220,6 +226,21 @@ def cleanup_repository(context: CleanupContext, query: QuerySet) -> CleanupResul
220226
return CleanupResult(query._raw_delete(query.db))
221227

222228

229+
def unroll_subquery(context: CleanupContext, query: QuerySet) -> CleanupResult:
230+
reversed_query = reverse_filter(query)
231+
if not reversed_query:
232+
return CleanupResult(query._raw_delete(query.db))
233+
field, subquery = reversed_query
234+
235+
cleaned_models = 0
236+
for parent in subquery:
237+
cleaned_models += query.model.objects.filter(**{field: parent.pk})._raw_delete(
238+
query.db
239+
)
240+
241+
return CleanupResult(cleaned_models)
242+
243+
223244
# All the models that need custom python code for deletions so a bulk `DELETE` query does not work.
224245
MANUAL_CLEANUP: dict[
225246
type[Model], Callable[[CleanupContext, QuerySet], CleanupResult]
@@ -235,4 +256,6 @@ def cleanup_repository(context: CleanupContext, query: QuerySet) -> CleanupResul
235256
cleanup_with_storage_field, "content_location"
236257
),
237258
Repository: cleanup_repository,
259+
TestInstance: unroll_subquery,
260+
DailyTestRollup: unroll_subquery,
238261
}

services/cleanup/relations.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from django.db.models import Model
66
from django.db.models.expressions import Col, Expression
7+
from django.db.models.fields import Field
78
from django.db.models.lookups import Exact, In
89
from django.db.models.query import QuerySet
910
from shared.django_apps.bundle_analysis.models import CacheConfig
@@ -191,3 +192,28 @@ def simplified_lookup(queryset: QuerySet) -> QuerySet | list[int]:
191192
return condition.rhs
192193

193194
return queryset
195+
196+
197+
def reverse_filter(queryset: QuerySet) -> None | tuple[str, QuerySet]:
198+
if queryset.query.is_sliced:
199+
return None
200+
201+
where = queryset.query.where
202+
if len(where.children) != 1:
203+
return None
204+
205+
condition = where.children[0]
206+
if (
207+
not isinstance(condition, In)
208+
or not isinstance(condition.lhs, Col)
209+
or not isinstance(condition.lhs.target, Field)
210+
or condition.rhs_is_direct_value()
211+
):
212+
return None
213+
214+
column = condition.lhs.target.name
215+
216+
query = condition.rhs
217+
model = query.model
218+
219+
return (column, QuerySet(model=model, query=query))

services/cleanup/tests/test_relations.py

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,16 @@
33
from django.db.models.query import QuerySet
44
from django.db.models.sql.subqueries import DeleteQuery
55
from shared.django_apps.codecov_auth.models import Owner
6+
from shared.django_apps.codecov_auth.tests.factories import OwnerFactory
67
from shared.django_apps.core.models import Repository
7-
from shared.django_apps.reports.models import ReportDetails
8+
from shared.django_apps.core.tests.factories import RepositoryFactory
9+
from shared.django_apps.reports.models import ReportDetails, TestInstance
810

9-
from services.cleanup.relations import build_relation_graph, simplified_lookup
11+
from services.cleanup.relations import (
12+
build_relation_graph,
13+
reverse_filter,
14+
simplified_lookup,
15+
)
1016

1117

1218
def dump_delete_queries(queryset: QuerySet) -> str:
@@ -60,3 +66,34 @@ def test_can_simplify_queries():
6066
def test_leaf_table(snapshot):
6167
query = ReportDetails.objects.all()
6268
assert dump_delete_queries(query) == snapshot("leaf.txt")
69+
70+
71+
@pytest.mark.django_db
72+
def test_can_reverse_filter():
73+
query = TestInstance.objects.filter(repoid=123)
74+
assert reverse_filter(query) is None
75+
76+
query = TestInstance.objects.filter(repoid__in=[123, 234])
77+
assert reverse_filter(query) is None
78+
79+
query = TestInstance.objects.filter(
80+
repoid__in=Repository.objects.filter(author=123), branch="foo"
81+
)
82+
assert reverse_filter(query) is None
83+
84+
owner = OwnerFactory()
85+
r1 = RepositoryFactory(author=owner)
86+
r2 = RepositoryFactory(author=owner)
87+
r3 = RepositoryFactory(author=owner)
88+
89+
filtered_qs = Repository.objects.filter(author=owner.ownerid)
90+
query = TestInstance.objects.filter(repoid__in=filtered_qs)
91+
92+
column, reversed_query = reverse_filter(query)
93+
94+
assert column == "repoid"
95+
# ideally, we would assert that the `QuerySet` itself is the same,
96+
# but that is not really doable. but in essense we only care that it yields
97+
# the same results, which it does:
98+
assert set(reversed_query) == set(filtered_qs)
99+
assert set(reversed_query) == {r1, r2, r3}

tasks/tests/unit/test_delete_owner.py

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,20 @@
1010
CommitFactory,
1111
RepositoryFactory,
1212
)
13+
from shared.django_apps.reports.models import (
14+
CommitReport,
15+
DailyTestRollup,
16+
Test,
17+
TestInstance,
18+
)
19+
from shared.django_apps.reports.models import ReportSession as Upload
20+
from shared.django_apps.reports.tests.factories import (
21+
CommitReportFactory,
22+
DailyTestRollupFactory,
23+
TestFactory,
24+
TestInstanceFactory,
25+
UploadFactory,
26+
)
1327

1428
from services.cleanup.utils import CleanupResult, CleanupSummary
1529
from tasks.delete_owner import DeleteOwnerTask
@@ -53,26 +67,50 @@ def test_delete_owner_deletes_owner_with_commit_compares(mock_storage):
5367
compare_commit = CommitFactory(repository=repo, author=user)
5468
CommitComparisonFactory(base_commit=base_commit, compare_commit=compare_commit)
5569

70+
report = CommitReportFactory(commit=base_commit)
71+
upload = UploadFactory(report=report)
72+
test = TestFactory(repository=repo)
73+
TestInstanceFactory(test=test, upload=upload)
74+
DailyTestRollupFactory(test=test, repoid=repo.repoid)
75+
76+
# This test factory implicitly creates:
77+
# - Test with a Repository and an Owner,
78+
# - An Upload with a CommitReport, a Commit that has a different Owner and
79+
# one more Repository with yet another different Owner.
80+
# And then also a Branch and a Pull via DB triggers because of the Commit.
81+
remaining = TestInstanceFactory()
82+
5683
res = DeleteOwnerTask().run_impl({}, user.ownerid)
5784

5885
assert res == CleanupSummary(
59-
CleanupResult(7),
86+
CleanupResult(12),
6087
{
6188
Branch: CleanupResult(1),
6289
Commit: CleanupResult(2),
6390
CommitComparison: CleanupResult(1),
6491
Owner: CleanupResult(1),
6592
Pull: CleanupResult(1),
6693
Repository: CleanupResult(1),
94+
CommitReport: CleanupResult(1),
95+
Upload: CleanupResult(1),
96+
Test: CleanupResult(1),
97+
TestInstance: CleanupResult(1),
98+
DailyTestRollup: CleanupResult(1),
6799
},
68100
)
69101

70-
assert Branch.objects.count() == 0
71-
assert Commit.objects.count() == 0
102+
assert list(TestInstance.objects.all()) == [remaining]
103+
# See the comment above why we have all of these objects
104+
assert Branch.objects.count() == 1
105+
assert Commit.objects.count() == 1
72106
assert CommitComparison.objects.count() == 0
73-
assert Owner.objects.count() == 0
74-
assert Pull.objects.count() == 0
75-
assert Repository.objects.count() == 0
107+
assert Owner.objects.count() == 3
108+
assert Pull.objects.count() == 1
109+
assert Repository.objects.count() == 2
110+
assert CommitReport.objects.count() == 1
111+
assert Upload.objects.count() == 1
112+
assert Test.objects.count() == 1
113+
assert DailyTestRollup.objects.count() == 0
76114

77115

78116
@pytest.mark.django_db(databases=["timeseries", "default"], transaction=True)

0 commit comments

Comments
 (0)