Skip to content
This repository was archived by the owner on May 5, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions services/test_analytics/ta_cache_rollups.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from datetime import UTC
from io import BytesIO

import polars as pl
import shared.storage

from django_scaffold import settings
from services.test_analytics.ta_timeseries import (
get_branch_summary,
get_summary,
get_testrun_branch_summary_via_testrun,
)


def rollup_blob_path(repoid: int, branch: str | None = None) -> str:
return (
f"test_analytics/branch_rollups/{repoid}/{branch}.arrow"
if branch
else f"test_analytics/repo_rollups/{repoid}.arrow"
)


POLARS_SCHEMA = [
"computed_name",
("flags", pl.List(pl.String)),
"failing_commits",
"last_duration",
"avg_duration",
"pass_count",
"fail_count",
"flaky_fail_count",
"skip_count",
("updated_at", pl.Datetime(time_zone=UTC)),
"timestamp_bin",
]


def cache_rollups(repoid: int, branch: str | None = None):
storage_service = shared.storage.get_appropriate_storage_service(repoid)
serialized_table: BytesIO

if branch:
if branch in {"main", "master", "develop"}:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Repository has a dedicated field for the main branch, we should probably use that instead of hardcoding a set here.
If you still want to hardcode the list, better to define it as a top level const so its more discoverable.

Copy link
Copy Markdown
Contributor Author

@joseph-sentry joseph-sentry Mar 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Repository has a dedicated field for the main branch, we should probably use that instead of hardcoding a set here.

it would make sense to do this, except we want to use the continuous aggregates, and to do this we would need to access the repo.branch from timescale, which isn't possible right now. I have ideas on how to do this in the future, but for now, I think this will be good enough for most of our users.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that does make sense, yes.
maybe a boolean is_main_branch or something that you feed in when processing, at which time you have access to the repo metadata.

summaries = get_branch_summary(repoid, branch)
else:
summaries = get_testrun_branch_summary_via_testrun(repoid, branch)
else:
summaries = get_summary(repoid)

data = [
{
"computed_name": summary.computed_name,
"flags": summary.flags,
"failing_commits": summary.failing_commits,
"last_duration": summary.last_duration_seconds,
"avg_duration": summary.avg_duration_seconds,
"pass_count": summary.pass_count,
"fail_count": summary.fail_count,
"flaky_fail_count": summary.flaky_fail_count,
"skip_count": summary.skip_count,
"updated_at": summary.updated_at,
"timestamp_bin": summary.timestamp_bin.date(),
}
for summary in summaries
]

serialized_table = pl.DataFrame(
data,
POLARS_SCHEMA,
orient="row",
).write_ipc(None)

serialized_table.seek(0)

storage_service.write_file(
settings.GCS_BUCKET_NAME, rollup_blob_path(repoid, branch), serialized_table
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"computed_name": [
"computed_name2",
"computed_name"
],
"flags": [
[
"test-rollups2"
],
[
"test-rollups"
]
],
"failing_commits": [
2,
1
],
"last_duration": [
200.0,
100.0
],
"avg_duration": [
200.0,
100.0
],
"pass_count": [
0,
0
],
"fail_count": [
2,
1
],
"flaky_fail_count": [
0,
0
],
"skip_count": [
0,
0
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{
"computed_name": [
"computed_name",
"computed_name2"
],
"flags": [
[
"test-rollups"
],
[
"test-rollups",
"test-rollups2"
]
],
"failing_commits": [
0,
1
],
"last_duration": [
100.0,
1.0
],
"avg_duration": [
100.0,
50.5
],
"pass_count": [
1,
1
],
"fail_count": [
0,
1
],
"flaky_fail_count": [
0,
0
],
"skip_count": [
0,
0
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"computed_name": [
"computed_name2",
"computed_name"
],
"flags": [
[
"test-rollups2"
],
[
"test-rollups"
]
],
"failing_commits": [
2,
1
],
"last_duration": [
200.0,
100.0
],
"avg_duration": [
200.0,
100.0
],
"pass_count": [
0,
0
],
"fail_count": [
2,
1
],
"flaky_fail_count": [
0,
0
],
"skip_count": [
0,
0
]
}
Loading
Loading