diff --git a/services/test_analytics/ta_cache_rollups.py b/services/test_analytics/ta_cache_rollups.py new file mode 100644 index 000000000..15ebb09b9 --- /dev/null +++ b/services/test_analytics/ta_cache_rollups.py @@ -0,0 +1,77 @@ +from datetime import UTC +from io import BytesIO + +import polars as pl +import shared.storage + +from django_scaffold import settings +from services.test_analytics.ta_timeseries import ( + get_branch_summary, + get_summary, + get_testrun_branch_summary_via_testrun, +) + + +def rollup_blob_path(repoid: int, branch: str | None = None) -> str: + return ( + f"test_analytics/branch_rollups/{repoid}/{branch}.arrow" + if branch + else f"test_analytics/repo_rollups/{repoid}.arrow" + ) + + +POLARS_SCHEMA = [ + "computed_name", + ("flags", pl.List(pl.String)), + "failing_commits", + "last_duration", + "avg_duration", + "pass_count", + "fail_count", + "flaky_fail_count", + "skip_count", + ("updated_at", pl.Datetime(time_zone=UTC)), + "timestamp_bin", +] + + +def cache_rollups(repoid: int, branch: str | None = None): + storage_service = shared.storage.get_appropriate_storage_service(repoid) + serialized_table: BytesIO + + if branch: + if branch in {"main", "master", "develop"}: + summaries = get_branch_summary(repoid, branch) + else: + summaries = get_testrun_branch_summary_via_testrun(repoid, branch) + else: + summaries = get_summary(repoid) + + data = [ + { + "computed_name": summary.computed_name, + "flags": summary.flags, + "failing_commits": summary.failing_commits, + "last_duration": summary.last_duration_seconds, + "avg_duration": summary.avg_duration_seconds, + "pass_count": summary.pass_count, + "fail_count": summary.fail_count, + "flaky_fail_count": summary.flaky_fail_count, + "skip_count": summary.skip_count, + "updated_at": summary.updated_at, + "timestamp_bin": summary.timestamp_bin.date(), + } + for summary in summaries + ] + + serialized_table = pl.DataFrame( + data, + POLARS_SCHEMA, + orient="row", + ).write_ipc(None) + + serialized_table.seek(0) + + storage_service.write_file( + settings.GCS_BUCKET_NAME, rollup_blob_path(repoid, branch), serialized_table + ) diff --git a/services/test_analytics/tests/snapshots/ta_cache_rollups__cache_test_rollups__0.json b/services/test_analytics/tests/snapshots/ta_cache_rollups__cache_test_rollups__0.json new file mode 100644 index 000000000..987b14e08 --- /dev/null +++ b/services/test_analytics/tests/snapshots/ta_cache_rollups__cache_test_rollups__0.json @@ -0,0 +1,42 @@ +{ + "computed_name": [ + "computed_name2", + "computed_name" + ], + "flags": [ + [ + "test-rollups2" + ], + [ + "test-rollups" + ] + ], + "failing_commits": [ + 2, + 1 + ], + "last_duration": [ + 200.0, + 100.0 + ], + "avg_duration": [ + 200.0, + 100.0 + ], + "pass_count": [ + 0, + 0 + ], + "fail_count": [ + 2, + 1 + ], + "flaky_fail_count": [ + 0, + 0 + ], + "skip_count": [ + 0, + 0 + ] +} diff --git a/services/test_analytics/tests/snapshots/ta_cache_rollups__cache_test_rollups_use_timeseries_branch__0.json b/services/test_analytics/tests/snapshots/ta_cache_rollups__cache_test_rollups_use_timeseries_branch__0.json new file mode 100644 index 000000000..6798c8832 --- /dev/null +++ b/services/test_analytics/tests/snapshots/ta_cache_rollups__cache_test_rollups_use_timeseries_branch__0.json @@ -0,0 +1,43 @@ +{ + "computed_name": [ + "computed_name", + "computed_name2" + ], + "flags": [ + [ + "test-rollups" + ], + [ + "test-rollups", + "test-rollups2" + ] + ], + "failing_commits": [ + 0, + 1 + ], + "last_duration": [ + 100.0, + 1.0 + ], + "avg_duration": [ + 100.0, + 50.5 + ], + "pass_count": [ + 1, + 1 + ], + "fail_count": [ + 0, + 1 + ], + "flaky_fail_count": [ + 0, + 0 + ], + "skip_count": [ + 0, + 0 + ] +} diff --git a/services/test_analytics/tests/snapshots/ta_cache_rollups__cache_test_rollups_use_timeseries_main__0.json b/services/test_analytics/tests/snapshots/ta_cache_rollups__cache_test_rollups_use_timeseries_main__0.json new file mode 100644 index 000000000..987b14e08 --- /dev/null +++ b/services/test_analytics/tests/snapshots/ta_cache_rollups__cache_test_rollups_use_timeseries_main__0.json @@ -0,0 +1,42 @@ +{ + "computed_name": [ + "computed_name2", + "computed_name" + ], + "flags": [ + [ + "test-rollups2" + ], + [ + "test-rollups" + ] + ], + "failing_commits": [ + 2, + 1 + ], + "last_duration": [ + 200.0, + 100.0 + ], + "avg_duration": [ + 200.0, + 100.0 + ], + "pass_count": [ + 0, + 0 + ], + "fail_count": [ + 2, + 1 + ], + "flaky_fail_count": [ + 0, + 0 + ], + "skip_count": [ + 0, + 0 + ] +} diff --git a/services/test_analytics/tests/test_ta_cache_rollups.py b/services/test_analytics/tests/test_ta_cache_rollups.py new file mode 100644 index 000000000..c8cf3b7b0 --- /dev/null +++ b/services/test_analytics/tests/test_ta_cache_rollups.py @@ -0,0 +1,274 @@ +import datetime as dt + +import polars as pl +import pytest +from shared.config import get_config +from shared.django_apps.ta_timeseries.models import ( + Testrun, + TestrunBranchSummary, + TestrunSummary, +) + +from services.test_analytics.utils import calc_test_id +from tasks.cache_test_rollups import CacheTestRollupsTask + + +def read_table(storage, storage_path: str): + decompressed_table: bytes = storage.read_file( + get_config("services", "minio", "bucket", default="archive"), storage_path + ) + return pl.read_ipc(decompressed_table) + + +@pytest.mark.django_db(databases=["ta_timeseries"], transaction=True) +def test_cache_test_rollups(storage, snapshot): + TestrunSummary.objects.create( + timestamp_bin=dt.datetime.now(dt.timezone.utc) - dt.timedelta(days=1), + repo_id=1, + name="name", + classname="classname", + testsuite="testsuite", + computed_name="computed_name", + failing_commits=1, + avg_duration_seconds=100, + last_duration_seconds=100, + pass_count=0, + fail_count=1, + skip_count=0, + flaky_fail_count=0, + updated_at=dt.datetime.now(dt.timezone.utc), + flags=["test-rollups"], + ) + + TestrunSummary.objects.create( + timestamp_bin=dt.datetime.now(dt.timezone.utc) - dt.timedelta(days=1), + repo_id=1, + name="name2", + classname="classname2", + testsuite="testsuite2", + computed_name="computed_name2", + failing_commits=2, + avg_duration_seconds=200, + last_duration_seconds=200, + pass_count=0, + fail_count=2, + skip_count=0, + flaky_fail_count=0, + updated_at=dt.datetime.now(dt.timezone.utc), + flags=["test-rollups2"], + ) + + TestrunSummary.objects.create( + timestamp_bin=dt.datetime.now(dt.timezone.utc) - dt.timedelta(days=61), + repo_id=1, + name="name3", + classname="classname3", + testsuite="testsuite3", + computed_name="computed_name3", + failing_commits=2, + avg_duration_seconds=200, + last_duration_seconds=200, + pass_count=0, + fail_count=2, + skip_count=0, + flaky_fail_count=0, + updated_at=dt.datetime.now(dt.timezone.utc), + flags=["test-rollups3"], + ) + + CacheTestRollupsTask().run_impl( + _db_session=None, + repoid=1, + branch=None, + impl_type="new", + ) + + table = read_table(storage, "test_analytics/repo_rollups/1.arrow") + table_dict = table.to_dict(as_series=False) + del table_dict["timestamp_bin"] + del table_dict["updated_at"] + assert snapshot("json") == table_dict + + +@pytest.mark.django_db(databases=["ta_timeseries"], transaction=True) +def test_cache_test_rollups_use_timeseries_main(storage, snapshot): + TestrunBranchSummary.objects.create( + timestamp_bin=dt.datetime.now(dt.timezone.utc) - dt.timedelta(days=1), + repo_id=1, + branch="main", + name="name", + classname="classname", + testsuite="testsuite", + computed_name="computed_name", + failing_commits=1, + avg_duration_seconds=100, + last_duration_seconds=100, + pass_count=0, + fail_count=1, + skip_count=0, + flaky_fail_count=0, + updated_at=dt.datetime.now(dt.timezone.utc), + flags=["test-rollups"], + ) + + TestrunBranchSummary.objects.create( + timestamp_bin=dt.datetime.now(dt.timezone.utc) - dt.timedelta(days=1), + repo_id=1, + branch="main", + name="name2", + classname="classname2", + testsuite="testsuite2", + computed_name="computed_name2", + failing_commits=2, + avg_duration_seconds=200, + last_duration_seconds=200, + pass_count=0, + fail_count=2, + skip_count=0, + flaky_fail_count=0, + updated_at=dt.datetime.now(dt.timezone.utc), + flags=["test-rollups2"], + ) + + TestrunBranchSummary.objects.create( + timestamp_bin=dt.datetime.now(dt.timezone.utc) - dt.timedelta(days=61), + repo_id=1, + branch="main", + name="name3", + classname="classname3", + testsuite="testsuite3", + computed_name="computed_name3", + failing_commits=2, + avg_duration_seconds=200, + last_duration_seconds=200, + pass_count=0, + fail_count=2, + skip_count=0, + flaky_fail_count=0, + updated_at=dt.datetime.now(dt.timezone.utc), + flags=["test-rollups3"], + ) + + TestrunBranchSummary.objects.create( + timestamp_bin=dt.datetime.now(dt.timezone.utc) - dt.timedelta(days=1), + repo_id=1, + branch="feature", + name="name4", + classname="classname4", + testsuite="testsuite4", + computed_name="computed_name4", + failing_commits=2, + avg_duration_seconds=200, + last_duration_seconds=200, + pass_count=0, + fail_count=2, + skip_count=0, + flaky_fail_count=0, + updated_at=dt.datetime.now(dt.timezone.utc), + flags=["test-rollups3"], + ) + + CacheTestRollupsTask().run_impl( + _db_session=None, + repoid=1, + branch="main", + impl_type="new", + ) + + table = read_table(storage, "test_analytics/branch_rollups/1/main.arrow") + table_dict = table.to_dict(as_series=False) + del table_dict["timestamp_bin"] + del table_dict["updated_at"] + assert snapshot("json") == table_dict + + +@pytest.mark.django_db(databases=["ta_timeseries"], transaction=True) +def test_cache_test_rollups_use_timeseries_branch(storage, snapshot): + Testrun.objects.create( + timestamp=dt.datetime.now(dt.timezone.utc) - dt.timedelta(days=1), + test_id=calc_test_id("name", "classname", "testsuite"), + name="name", + classname="classname", + testsuite="testsuite", + computed_name="computed_name", + outcome="pass", + duration_seconds=100, + failure_message="failure_message", + framework="framework", + filename="filename", + repo_id=1, + commit_sha="commit_sha", + branch="feature", + flags=["test-rollups"], + upload_id=1, + ) + + Testrun.objects.create( + timestamp=dt.datetime.now(dt.timezone.utc) - dt.timedelta(days=1), + test_id=calc_test_id("name2", "classname2", "testsuite2"), + name="name2", + classname="classname2", + testsuite="testsuite2", + computed_name="computed_name2", + outcome="pass", + duration_seconds=100, + failure_message="failure_message", + framework="framework", + filename="filename", + repo_id=1, + commit_sha="commit_sha", + branch="feature", + flags=["test-rollups"], + upload_id=1, + ) + + Testrun.objects.create( + timestamp=dt.datetime.now(dt.timezone.utc) - dt.timedelta(days=1), + test_id=calc_test_id("name2", "classname2", "testsuite2"), + name="name2", + classname="classname2", + testsuite="testsuite2", + computed_name="computed_name2", + outcome="failure", + duration_seconds=1, + failure_message="failure_message", + framework="framework", + filename="filename", + repo_id=1, + commit_sha="other_commit_sha", + branch="feature", + flags=["test-rollups", "test-rollups2"], + upload_id=1, + ) + + Testrun.objects.create( + timestamp=dt.datetime.now(dt.timezone.utc) - dt.timedelta(days=61), + test_id=calc_test_id("name3", "classname3", "testsuite3"), + name="name3", + classname="classname3", + testsuite="testsuite3", + computed_name="computed_name3", + outcome="pass", + duration_seconds=100, + failure_message="failure_message", + framework="framework", + filename="filename", + repo_id=1, + commit_sha="commit_sha", + branch="main", + flags=["test-rollups"], + upload_id=1, + ) + + CacheTestRollupsTask().run_impl( + _db_session=None, + repoid=1, + branch="feature", + impl_type="new", + ) + + table = read_table(storage, "test_analytics/branch_rollups/1/feature.arrow") + table_dict = table.to_dict(as_series=False) + del table_dict["timestamp_bin"] + del table_dict["updated_at"] + assert snapshot("json") == table_dict diff --git a/tasks/cache_test_rollups.py b/tasks/cache_test_rollups.py index 00936de59..3a0d00b8e 100644 --- a/tasks/cache_test_rollups.py +++ b/tasks/cache_test_rollups.py @@ -1,4 +1,5 @@ import datetime as dt +from typing import Literal import polars as pl import shared.storage @@ -11,6 +12,7 @@ from app import celery_app from django_scaffold import settings from services.redis import get_redis_connection +from services.test_analytics.ta_cache_rollups import cache_rollups from tasks.base import BaseCodecovTask # Reminder: `a BETWEEN x AND y` is equivalent to `a >= x AND a <= y` @@ -106,13 +108,25 @@ class CacheTestRollupsTask(BaseCodecovTask, name=cache_test_rollups_task_name): def run_impl( - self, _db_session, repoid: int, branch: str, update_date: bool = True, **kwargs + self, + _db_session, + repoid: int, + branch: str, + update_date: bool = True, + impl_type: Literal["old", "new", "both"] = "old", + **kwargs, ): redis_conn = get_redis_connection() try: with redis_conn.lock( f"rollups:{repoid}:{branch}", timeout=300, blocking_timeout=2 ): + if impl_type == "new" or impl_type == "both": + cache_rollups(repoid, branch) + cache_rollups(repoid, None) + if impl_type == "new": + return {"success": True} + self.run_impl_within_lock(repoid, branch) if update_date: @@ -121,7 +135,6 @@ def run_impl( branch=branch, defaults={"last_rollup_date": dt.date.today()}, ) - return {"success": True} except LockError: return {"in_progress": True} diff --git a/tasks/tests/unit/snapshots/cache_test_rollups__TestCacheTestRollupsTask__cache_test_rollups_use_timeseries__0.json b/tasks/tests/unit/snapshots/cache_test_rollups__TestCacheTestRollupsTask__cache_test_rollups_use_timeseries__0.json new file mode 100644 index 000000000..bf6dba038 --- /dev/null +++ b/tasks/tests/unit/snapshots/cache_test_rollups__TestCacheTestRollupsTask__cache_test_rollups_use_timeseries__0.json @@ -0,0 +1,50 @@ +{ + "computed_name": [ + "computed_name", + "computed_name2" + ], + "flags": [ + [ + "test-rollups" + ], + [ + "test-rollups2" + ] + ], + "failing_commits": [ + 1, + 2 + ], + "last_duration": [ + 100.0, + 200.0 + ], + "avg_duration": [ + 100.0, + 200.0 + ], + "pass_count": [ + 0, + 0 + ], + "fail_count": [ + 1, + 2 + ], + "flaky_fail_count": [ + 0, + 0 + ], + "skip_count": [ + 0, + 0 + ], + "updated_at": [ + "2025-01-01T00:00:00+00:00", + "2025-01-01T00:00:00+00:00" + ], + "timestamp_bin": [ + "2024-12-31", + "2024-12-31" + ] +} diff --git a/tasks/tests/unit/snapshots/cache_test_rollups__TestCacheTestRollupsTask__cache_test_rollups_use_timeseries_branch__0.json b/tasks/tests/unit/snapshots/cache_test_rollups__TestCacheTestRollupsTask__cache_test_rollups_use_timeseries_branch__0.json new file mode 100644 index 000000000..b69323b72 --- /dev/null +++ b/tasks/tests/unit/snapshots/cache_test_rollups__TestCacheTestRollupsTask__cache_test_rollups_use_timeseries_branch__0.json @@ -0,0 +1,37 @@ +{ + "computed_name": [ + "computed_name" + ], + "flags": [ + [ + "test-rollups" + ] + ], + "failing_commits": [ + 0 + ], + "last_duration": [ + 100.0 + ], + "avg_duration": [ + 100.0 + ], + "pass_count": [ + 1 + ], + "fail_count": [ + 0 + ], + "flaky_fail_count": [ + 0 + ], + "skip_count": [ + 0 + ], + "updated_at": [ + "2024-12-31T00:00:00+00:00" + ], + "timestamp_bin": [ + "2024-12-31" + ] +} diff --git a/tasks/tests/unit/snapshots/cache_test_rollups__TestCacheTestRollupsTask__cache_test_rollups_use_timeseries_main__0.json b/tasks/tests/unit/snapshots/cache_test_rollups__TestCacheTestRollupsTask__cache_test_rollups_use_timeseries_main__0.json new file mode 100644 index 000000000..bf6dba038 --- /dev/null +++ b/tasks/tests/unit/snapshots/cache_test_rollups__TestCacheTestRollupsTask__cache_test_rollups_use_timeseries_main__0.json @@ -0,0 +1,50 @@ +{ + "computed_name": [ + "computed_name", + "computed_name2" + ], + "flags": [ + [ + "test-rollups" + ], + [ + "test-rollups2" + ] + ], + "failing_commits": [ + 1, + 2 + ], + "last_duration": [ + 100.0, + 200.0 + ], + "avg_duration": [ + 100.0, + 200.0 + ], + "pass_count": [ + 0, + 0 + ], + "fail_count": [ + 1, + 2 + ], + "flaky_fail_count": [ + 0, + 0 + ], + "skip_count": [ + 0, + 0 + ], + "updated_at": [ + "2025-01-01T00:00:00+00:00", + "2025-01-01T00:00:00+00:00" + ], + "timestamp_bin": [ + "2024-12-31", + "2024-12-31" + ] +} diff --git a/tasks/tests/unit/test_cache_test_rollups.py b/tasks/tests/unit/test_cache_test_rollups.py index 5b9eb52c8..aa4c931a5 100644 --- a/tasks/tests/unit/test_cache_test_rollups.py +++ b/tasks/tests/unit/test_cache_test_rollups.py @@ -240,3 +240,26 @@ def test_cache_test_rollups_update_date_does_not_exist( repository_id=self.repo.repoid, branch="main" ).first() assert obj.last_rollup_date == dt.date.today() + + def test_cache_test_rollups_both(self, mock_storage, transactional_db, mocker): + mock_cache_rollups = mocker.patch("tasks.cache_test_rollups.cache_rollups") + task = CacheTestRollupsTask() + mocker.patch.object(task, "run_impl_within_lock") + self.repo = RepositoryFactory() + with time_machine.travel(dt.datetime.now(dt.UTC), tick=False): + _ = task.run_impl( + _db_session=None, + repoid=self.repo.repoid, + branch="main", + update_date=True, + impl_type="both", + ) + + mock_cache_rollups.assert_has_calls( + [ + mocker.call(self.repo.repoid, "main"), + mocker.call(self.repo.repoid, None), + ] + ) + + task.run_impl_within_lock.assert_called_once()