Skip to content
This repository was archived by the owner on May 5, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion services/processing/flake_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def process_flake_in_transaction(
report__report_type=CommitReport.ReportType.TEST_RESULTS.value,
report__commit__repository__repoid=repo_id,
report__commit__commitid=commit_id,
state__in=["processed", "v2_finished"],
state__in=["processed"],
)

curr_flakes = fetch_curr_flakes(repo_id)
Expand Down
121 changes: 121 additions & 0 deletions services/test_analytics/ta_process_flakes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import logging
from datetime import datetime

from django.db import transaction
from django.db.models import Q, QuerySet
from redis.exceptions import LockError
from shared.django_apps.reports.models import CommitReport, ReportSession
from shared.django_apps.ta_timeseries.models import Testrun
from shared.django_apps.test_analytics.models import Flake

from services.redis import get_redis_connection

log = logging.getLogger(__name__)

FAIL_FILTER = Q(outcome="failure") | Q(outcome="flaky_failure") | Q(outcome="error")

LOCK_NAME = "ta_flake_lock:{}"
KEY_NAME = "ta_flake_key:{}"


def get_relevant_uploads(repo_id: int, commit_id: str) -> QuerySet[ReportSession]:
return ReportSession.objects.filter(
report__report_type=CommitReport.ReportType.TEST_RESULTS.value,
report__commit__repository__repoid=repo_id,
report__commit__commitid=commit_id,
state__in=["processed"],
)


def fetch_current_flakes(repo_id: int) -> dict[bytes, Flake]:
return {
bytes(flake.test_id): flake for flake in Flake.objects.filter(repoid=repo_id)
}


def get_testruns(
upload: ReportSession, curr_flakes: dict[bytes, Flake]
) -> QuerySet[Testrun]:
upload_filter = Q(upload_id=upload.id)
flaky_pass_filter = Q(outcome="pass") & Q(test_id__in=curr_flakes.keys())
return Testrun.objects.filter(upload_filter & (FAIL_FILTER | flaky_pass_filter))


def handle_pass(curr_flakes: dict[bytes, Flake], test_id: bytes):
# possible that we expire it and stop caring about it
if test_id not in curr_flakes:
return

curr_flakes[test_id].recent_passes_count += 1
Comment thread
Swatinem marked this conversation as resolved.
curr_flakes[test_id].count += 1
if curr_flakes[test_id].recent_passes_count == 30:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

moving this magic constant to the top level would make sense. and an explanation of "after X passes in a row, the test is not marked as flaky anymore"

curr_flakes[test_id].end_date = datetime.now()
curr_flakes[test_id].save()
del curr_flakes[test_id]


def handle_failure(
curr_flakes: dict[bytes, Flake], test_id: bytes, testrun: Testrun, repo_id: int
):
existing_flake = curr_flakes.get(test_id)
if existing_flake:
existing_flake.fail_count += 1
existing_flake.count += 1
existing_flake.recent_passes_count = 0
else:
if testrun.outcome != "flaky_failure":
testrun.outcome = "flaky_failure"
new_flake = Flake(
repoid=repo_id,
test_id=test_id,
count=1,
fail_count=1,
recent_passes_count=0,
start_date=datetime.now(),
)
curr_flakes[test_id] = new_flake


def process_flakes_for_commit(repo_id: int, commit_id: str):
uploads = get_relevant_uploads(repo_id, commit_id)

curr_flakes = fetch_current_flakes(repo_id)

for upload in uploads:
testruns = get_testruns(upload, curr_flakes)

for testrun in testruns:
test_id = bytes(testrun.test_id)
match testrun.outcome:
case "pass":
handle_pass(curr_flakes, test_id)
case "failure" | "flaky_failure" | "error":
handle_failure(curr_flakes, test_id, testrun, repo_id)
case _:
continue

Check warning on line 95 in services/test_analytics/ta_process_flakes.py

View check run for this annotation

Codecov Notifications / codecov/patch

services/test_analytics/ta_process_flakes.py#L94-L95

Added lines #L94 - L95 were not covered by tests

Testrun.objects.bulk_update(testruns, ["outcome"])

Flake.objects.bulk_create(
curr_flakes.values(),
update_conflicts=True,
unique_fields=["id"],
update_fields=["end_date", "count", "recent_passes_count", "fail_count"],
)

transaction.commit()


def process_flakes_for_repo(repo_id: int):
redis_client = get_redis_connection()
lock_name = LOCK_NAME.format(repo_id)
key_name = KEY_NAME.format(repo_id)
try:
with redis_client.lock(lock_name, timeout=300, blocking_timeout=3):
while commit_ids := redis_client.lpop(key_name, 10):
for commit_id in commit_ids:
process_flakes_for_commit(repo_id, commit_id.decode())
return True
except LockError:
log.warning("Failed to acquire lock for repo %s", repo_id)
return False

Check warning on line 121 in services/test_analytics/ta_process_flakes.py

View check run for this annotation

Codecov Notifications / codecov/patch

services/test_analytics/ta_process_flakes.py#L119-L121

Added lines #L119 - L121 were not covered by tests
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"test1": {
"count": 6,
"fail_count": 2,
"recent_passes_count": 1
},
"test3": {
"count": 1,
"fail_count": 1,
"recent_passes_count": 0
},
"test4": {
"count": 1,
"fail_count": 1,
"recent_passes_count": 0
},
"test5": {
"count": 1,
"fail_count": 1,
"recent_passes_count": 0
}
}
Loading
Loading