Skip to content
This repository was archived by the owner on May 5, 2025. It is now read-only.

Commit e628bde

Browse files
committed
feat: add version metadata in ta_cache_rollups
we want to be able to evolve the schema of the rollups and we can do that by including a version tag in the GCS object metadata. However, even though in this case we're making the change in worker first, in the future, we should modify the reading code to handle the new schema before modifying the write code, since if we deploy both reader and writer at the same time, its possible a rollup written by a new version of the writer is read by an old version of the reader which doesn't understand the new format
1 parent a077a2f commit e628bde

7 files changed

Lines changed: 51 additions & 13 deletions

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,4 +83,4 @@ dev-dependencies = [
8383
[tool.uv.sources]
8484
timestring = { git = "https://github.com/codecov/timestring", rev = "d37ceacc5954dff3b5bd2f887936a98a668dda42" }
8585
test-results-parser = { git = "https://github.com/codecov/test-results-parser", rev = "190bbc8a911099749928e13d5fe57f6027ca1e74" }
86-
shared = { git = "https://github.com/codecov/shared", rev = "1c6200a3f1a6cdefab730b3f7d731c9a5fa036da" }
86+
shared = { git = "https://github.com/codecov/shared", rev = "b7ee1c82c044cdf00fb09e35606d670c78683854" }

services/test_analytics/ta_cache_rollups.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from datetime import UTC
22
from io import BytesIO
3+
from typing import cast
34

45
import polars as pl
56
import shared.storage
@@ -15,6 +16,10 @@
1516
get_testrun_branch_summary_via_testrun,
1617
)
1718

19+
# no version: everything but testsuite field
20+
# version 1: add testsuite field
21+
VERSION = "1"
22+
1823

1924
def rollup_blob_path(repoid: int, branch: str | None = None) -> str:
2025
return (
@@ -26,6 +31,7 @@ def rollup_blob_path(repoid: int, branch: str | None = None) -> str:
2631

2732
POLARS_SCHEMA = [
2833
"computed_name",
34+
"testsuite",
2935
("flags", pl.List(pl.String)),
3036
"failing_commits",
3137
"last_duration",
@@ -40,7 +46,6 @@ def rollup_blob_path(repoid: int, branch: str | None = None) -> str:
4046

4147

4248
def cache_rollups(repoid: int, branch: str | None = None):
43-
storage_service = shared.storage.get_appropriate_storage_service(repoid)
4449
serialized_table: BytesIO
4550

4651
with read_rollups_from_db_summary.labels("new").time():
@@ -55,6 +60,7 @@ def cache_rollups(repoid: int, branch: str | None = None):
5560
data = [
5661
{
5762
"computed_name": summary.computed_name,
63+
"testsuite": summary.testsuite,
5864
"flags": summary.flags,
5965
"failing_commits": summary.failing_commits,
6066
"last_duration": summary.last_duration_seconds,
@@ -69,15 +75,20 @@ def cache_rollups(repoid: int, branch: str | None = None):
6975
for summary in summaries
7076
]
7177

72-
serialized_table = pl.DataFrame(
78+
df = pl.DataFrame(
7379
data,
7480
POLARS_SCHEMA,
7581
orient="row",
76-
).write_ipc(None)
82+
)
83+
serialized_table = df.write_ipc(None)
7784

7885
serialized_table.seek(0)
7986

87+
storage_service = shared.storage.get_appropriate_storage_service(repoid)
8088
storage_service.write_file(
81-
settings.GCS_BUCKET_NAME, rollup_blob_path(repoid, branch), serialized_table
89+
cast(str, settings.GCS_BUCKET_NAME),
90+
rollup_blob_path(repoid, branch),
91+
serialized_table,
92+
metadata={"version": VERSION},
8293
)
8394
rollup_size_summary.labels("new").observe(serialized_table.tell())

services/test_analytics/tests/snapshots/ta_cache_rollups__cache_test_rollups__0.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
"computed_name2",
44
"computed_name"
55
],
6+
"testsuite": [
7+
"testsuite2",
8+
"testsuite"
9+
],
610
"flags": [
711
[
812
"test-rollups2"

services/test_analytics/tests/snapshots/ta_cache_rollups__cache_test_rollups_use_timeseries_branch__0.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
"computed_name",
44
"computed_name2"
55
],
6+
"testsuite": [
7+
"testsuite",
8+
"testsuite2"
9+
],
610
"flags": [
711
[
812
"test-rollups"

services/test_analytics/tests/snapshots/ta_cache_rollups__cache_test_rollups_use_timeseries_main__0.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
"computed_name2",
44
"computed_name"
55
],
6+
"testsuite": [
7+
"testsuite2",
8+
"testsuite"
9+
],
610
"flags": [
711
[
812
"test-rollups2"

services/test_analytics/tests/test_ta_cache_rollups.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import datetime as dt
2+
from typing import cast
23

34
import polars as pl
45
import pytest
@@ -8,14 +9,22 @@
89
TestrunBranchSummary,
910
TestrunSummary,
1011
)
12+
from shared.storage.minio import MinioStorageService
1113

14+
from services.test_analytics.ta_cache_rollups import VERSION
1215
from services.test_analytics.utils import calc_test_id
1316
from tasks.cache_test_rollups import CacheTestRollupsTask
1417

1518

16-
def read_table(storage, storage_path: str):
19+
def read_table(
20+
storage: MinioStorageService,
21+
storage_path: str,
22+
meta_container: dict[str, str] | None = None,
23+
):
1724
decompressed_table: bytes = storage.read_file(
18-
get_config("services", "minio", "bucket", default="archive"), storage_path
25+
cast(str, get_config("services", "minio", "bucket", default="archive")),
26+
storage_path,
27+
metadata_container=meta_container,
1928
)
2029
return pl.read_ipc(decompressed_table)
2130

@@ -82,8 +91,11 @@ def test_cache_test_rollups(storage, snapshot):
8291
branch=None,
8392
impl_type="new",
8493
)
85-
86-
table = read_table(storage, "test_analytics/repo_rollups/1.arrow")
94+
meta = {}
95+
table = read_table(
96+
storage, "test_analytics/repo_rollups/1.arrow", meta_container=meta
97+
)
98+
assert meta["version"] == VERSION
8799
table_dict = table.to_dict(as_series=False)
88100
del table_dict["timestamp_bin"]
89101
del table_dict["updated_at"]
@@ -174,8 +186,11 @@ def test_cache_test_rollups_use_timeseries_main(storage, snapshot):
174186
branch="main",
175187
impl_type="new",
176188
)
177-
178-
table = read_table(storage, "test_analytics/branch_rollups/1/main.arrow")
189+
meta = {}
190+
table = read_table(
191+
storage, "test_analytics/branch_rollups/1/main.arrow", meta_container=meta
192+
)
193+
assert meta["version"] == VERSION
179194
table_dict = table.to_dict(as_series=False)
180195
del table_dict["timestamp_bin"]
181196
del table_dict["updated_at"]

uv.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)