Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 30 additions & 13 deletions apps/worker/services/test_analytics/ta_process_flakes.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
from collections import defaultdict
from datetime import timedelta

import sentry_sdk
Expand Down Expand Up @@ -33,14 +34,20 @@ def fetch_current_flakes(repo_id: int) -> dict[bytes, Flake]:
}


def get_testruns(upload: ReportSession) -> QuerySet[Testrun]:
upload_filter = Q(upload_id=upload.id)

# we won't process flakes for testruns older than 1 day
return Testrun.objects.filter(
Q(timestamp__gte=timezone.now() - timedelta(days=1)) & upload_filter
def get_all_testruns(
upload_ids: list[int],
) -> dict[int, list[Testrun]]:
"""Fetch all testruns for the given upload IDs in a single query and group them by upload_id."""
testruns = Testrun.objects.filter(
Q(timestamp__gte=timezone.now() - timedelta(days=1))
& Q(upload_id__in=upload_ids)
).order_by("timestamp")

grouped: dict[int, list[Testrun]] = defaultdict(list)
for testrun in testruns:
grouped[testrun.upload_id].append(testrun)
return grouped


def handle_pass(curr_flakes: dict[bytes, Flake], test_id: bytes):
# possible that we expire it and stop caring about it
Expand Down Expand Up @@ -81,10 +88,12 @@ def handle_failure(

@sentry_sdk.trace
def process_single_upload(
upload: ReportSession, curr_flakes: dict[bytes, Flake], repo_id: int
):
testruns = get_testruns(upload)

upload: ReportSession,
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unused upload parameter in process_single_upload

Low Severity

The upload parameter of process_single_upload is no longer used anywhere in the function body after the refactoring. It was previously needed to call get_testruns(upload), but now that testruns are passed in directly via the new testruns parameter, upload is dead code. The @sentry_sdk.trace decorator does not automatically capture function parameters either, so nothing consumes this value. This adds confusion about the function's interface and dependencies.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 0746c1e. Configure here.

curr_flakes: dict[bytes, Flake],
repo_id: int,
testruns: list[Testrun],
) -> list[Testrun]:
"""Process a single upload's testruns and return the list of testruns that were modified."""
for testrun in testruns:
test_id = bytes(testrun.test_id)
match testrun.outcome:
Expand All @@ -98,15 +107,15 @@ def process_single_upload(
case _:
continue

Testrun.objects.bulk_update(testruns, ["outcome"])
return testruns


@sentry_sdk.trace
def process_flakes_for_commit(repo_id: int, commit_id: str):
log.info(
"process_flakes_for_commit: starting processing",
)
uploads = get_relevant_uploads(repo_id, commit_id)
uploads = list(get_relevant_uploads(repo_id, commit_id))

log.info(
"process_flakes_for_commit: fetched uploads",
Expand All @@ -120,13 +129,21 @@ def process_flakes_for_commit(repo_id: int, commit_id: str):
extra={"flakes": [flake.test_id.hex() for flake in curr_flakes.values()]},
)

upload_ids = [upload.id for upload in uploads]
testruns_by_upload = get_all_testruns(upload_ids)

all_testruns: list[Testrun] = []
for upload in uploads:
process_single_upload(upload, curr_flakes, repo_id)
upload_testruns = testruns_by_upload.get(upload.id, [])
process_single_upload(upload, curr_flakes, repo_id, upload_testruns)
all_testruns.extend(upload_testruns)
log.info(
"process_flakes_for_commit: processed upload",
extra={"upload": upload.id},
)

Testrun.objects.bulk_update(all_testruns, ["outcome"])

log.info(
"process_flakes_for_commit: bulk creating flakes",
extra={"flakes": [flake.test_id.hex() for flake in curr_flakes.values()]},
Expand Down
Loading