Skip to content
This repository was archived by the owner on May 5, 2025. It is now read-only.

Commit 36e33aa

Browse files
committed
feat: implement cache rollup task using TA timeseries models
we want to make it so the cache rollup task is capable of reading test analytics information from the timeseries db this also changes the format of the dataframe being cached, so we'll also change the format of the path at which we will store the cached dataframe the logic for reading from the timeseries db is: - if no branch is specified -> read from the repo wide continuous aggs - if a branch is specified - if it's one of the more popular main branch names -> read from the branch scoped continuous aggregates - else, directly aggregate from the individual testruns
1 parent c9ed88b commit 36e33aa

10 files changed

Lines changed: 653 additions & 2 deletions
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
from datetime import UTC
2+
from io import BytesIO
3+
4+
import polars as pl
5+
import shared.storage
6+
7+
from django_scaffold import settings
8+
from services.test_analytics.ta_timeseries import (
9+
get_branch_summary,
10+
get_summary,
11+
get_testrun_branch_summary_via_testrun,
12+
)
13+
14+
15+
def rollup_blob_path(repoid: int, branch: str | None = None) -> str:
16+
return (
17+
f"test_analytics/branch_rollups/{repoid}/{branch}.arrow"
18+
if branch
19+
else f"test_analytics/repo_rollups/{repoid}.arrow"
20+
)
21+
22+
23+
POLARS_SCHEMA = [
24+
"computed_name",
25+
("flags", pl.List(pl.String)),
26+
"failing_commits",
27+
"last_duration",
28+
"avg_duration",
29+
"pass_count",
30+
"fail_count",
31+
"flaky_fail_count",
32+
"skip_count",
33+
("updated_at", pl.Datetime(time_zone=UTC)),
34+
"timestamp_bin",
35+
]
36+
37+
38+
def cache_rollups(repoid: int, branch: str | None = None):
39+
storage_service = shared.storage.get_appropriate_storage_service(repoid)
40+
serialized_table: BytesIO
41+
42+
if branch:
43+
if branch in {"main", "master", "develop"}:
44+
summaries = get_branch_summary(repoid, branch)
45+
else:
46+
summaries = get_testrun_branch_summary_via_testrun(repoid, branch)
47+
else:
48+
summaries = get_summary(repoid)
49+
50+
data = [
51+
{
52+
"computed_name": summary.computed_name,
53+
"flags": summary.flags,
54+
"failing_commits": summary.failing_commits,
55+
"last_duration": summary.last_duration_seconds,
56+
"avg_duration": summary.avg_duration_seconds,
57+
"pass_count": summary.pass_count,
58+
"fail_count": summary.fail_count,
59+
"flaky_fail_count": summary.flaky_fail_count,
60+
"skip_count": summary.skip_count,
61+
"updated_at": summary.updated_at,
62+
"timestamp_bin": summary.timestamp_bin.date(),
63+
}
64+
for summary in summaries
65+
]
66+
67+
serialized_table = pl.DataFrame(
68+
data,
69+
POLARS_SCHEMA,
70+
orient="row",
71+
).write_ipc(None)
72+
73+
serialized_table.seek(0)
74+
75+
storage_service.write_file(
76+
settings.GCS_BUCKET_NAME, rollup_blob_path(repoid, branch), serialized_table
77+
)
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
{
2+
"computed_name": [
3+
"computed_name2",
4+
"computed_name"
5+
],
6+
"flags": [
7+
[
8+
"test-rollups2"
9+
],
10+
[
11+
"test-rollups"
12+
]
13+
],
14+
"failing_commits": [
15+
2,
16+
1
17+
],
18+
"last_duration": [
19+
200.0,
20+
100.0
21+
],
22+
"avg_duration": [
23+
200.0,
24+
100.0
25+
],
26+
"pass_count": [
27+
0,
28+
0
29+
],
30+
"fail_count": [
31+
2,
32+
1
33+
],
34+
"flaky_fail_count": [
35+
0,
36+
0
37+
],
38+
"skip_count": [
39+
0,
40+
0
41+
]
42+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
{
2+
"computed_name": [
3+
"computed_name",
4+
"computed_name2"
5+
],
6+
"flags": [
7+
[
8+
"test-rollups"
9+
],
10+
[
11+
"test-rollups",
12+
"test-rollups2"
13+
]
14+
],
15+
"failing_commits": [
16+
0,
17+
1
18+
],
19+
"last_duration": [
20+
100.0,
21+
1.0
22+
],
23+
"avg_duration": [
24+
100.0,
25+
50.5
26+
],
27+
"pass_count": [
28+
1,
29+
1
30+
],
31+
"fail_count": [
32+
0,
33+
1
34+
],
35+
"flaky_fail_count": [
36+
0,
37+
0
38+
],
39+
"skip_count": [
40+
0,
41+
0
42+
]
43+
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
{
2+
"computed_name": [
3+
"computed_name2",
4+
"computed_name"
5+
],
6+
"flags": [
7+
[
8+
"test-rollups2"
9+
],
10+
[
11+
"test-rollups"
12+
]
13+
],
14+
"failing_commits": [
15+
2,
16+
1
17+
],
18+
"last_duration": [
19+
200.0,
20+
100.0
21+
],
22+
"avg_duration": [
23+
200.0,
24+
100.0
25+
],
26+
"pass_count": [
27+
0,
28+
0
29+
],
30+
"fail_count": [
31+
2,
32+
1
33+
],
34+
"flaky_fail_count": [
35+
0,
36+
0
37+
],
38+
"skip_count": [
39+
0,
40+
0
41+
]
42+
}

0 commit comments

Comments
 (0)