Skip to content
This repository was archived by the owner on May 5, 2025. It is now read-only.

Commit 166da1e

Browse files
authored
Merge the Report._files/_chunks (#553)
* Merge the `Report._files/_chunks` Instead of maintaining two separate fields, this merges the previous `_files` which was a dict from filename to `totals`, and the `_chunks` which was either a fully parsed `ReportFile`, or just a shallow `list` of lines. Maintaining only a single structure for the list of files makes a ton of internals of the `Report` simpler. It also solves the problem of `file_totals` getting out of sync with the `ReportFile.totals`. * make parsing/serializing more on-demand * Move parsing of the `details` to on-demand * review
1 parent e30c1c7 commit 166da1e

14 files changed

Lines changed: 234 additions & 566 deletions

shared/reports/carryforward.py

Lines changed: 14 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import re
33
from typing import Mapping, Sequence
44

5-
from shared.reports.editable import EditableReport
65
from shared.reports.resources import Report
76
from shared.utils.match import Matcher
87
from shared.utils.sessions import SessionType
@@ -36,49 +35,23 @@ def generate_carryforward_report(
3635
flags: Sequence[str],
3736
paths: Sequence[str],
3837
session_extras: Mapping[str, str] | None = None,
39-
) -> EditableReport:
38+
) -> Report:
4039
"""
41-
Generates a carriedforward report starting from report `report`, flags `flags`
42-
and paths `paths`
40+
Generates a carriedforward report by filtering the given `report` in-place,
41+
to only those files and sessions matching the given `flags` and `paths`.
4342
44-
What this function does it basically take a report `report` and creates a new report
45-
from it (so no changes are done in-place). On this new report, it adds all the information
46-
from `report` that relates to sessions that have any of the flags `f`
47-
48-
This way, for example, if none of the sessions in `report` have a flag in `flags`,
49-
it will just produce an empty report
50-
51-
If there are sessions with any of the flags in `flags`, let's call them `relevant_sessions`,
52-
this function will go through all files in `report` that match any of the paths `paths
53-
and build a new 'carriedforward' ReportFile from it, with only the ReportLines
54-
that had at least one LineSession among the `relevant_sessions` (and proper filter out
55-
all the the other sessions from that line). Then all the new EditableReportFile will
56-
be added to the report.
57-
58-
Also, the old sessions are copied over to the new report, with their numbering changed to match
59-
the new session order they are in now (they could be the fifth session before,
60-
and the first session now)
61-
62-
Args:
63-
report (Report): Description
64-
flags (Sequence[str]): Description
65-
66-
Returns:
67-
EditableReport: A new report with only the info related to `flags` on it, as described above
68-
"""
69-
new_report = EditableReport(
70-
chunks=report._chunks,
71-
files=report._files,
72-
sessions=report.sessions,
73-
totals=None,
74-
)
43+
The sessions that are matching the `flags` are being flagged as `carriedforward`,
44+
and other sessions are removed from the report."""
7545
if paths:
7646
matcher = Matcher(paths)
77-
for filename in new_report.files:
78-
if not matcher.match(filename):
79-
del new_report[filename]
47+
files_to_delete = {
48+
filename for filename in report._files.keys() if not matcher.match(filename)
49+
}
50+
for filename in files_to_delete:
51+
del report[filename]
52+
8053
sessions_to_delete = []
81-
for sid, session in new_report.sessions.items():
54+
for sid, session in report.sessions.items():
8255
if not contain_any_of_the_flags(flags, session.flags):
8356
sessions_to_delete.append(int(sid))
8457
else:
@@ -89,8 +62,8 @@ def generate_carryforward_report(
8962
"Removing sessions that are not supposed to carryforward",
9063
extra=dict(deleted_sessions=sessions_to_delete),
9164
)
92-
new_report.delete_multiple_sessions(sessions_to_delete)
93-
return new_report
65+
report.delete_multiple_sessions(sessions_to_delete)
66+
return report
9467

9568

9669
def contain_any_of_the_flags(expected_flags, actual_flags):

shared/reports/filtered.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,9 +165,9 @@ def network(self):
165165
if file:
166166
yield fname, make_network_file(file.totals)
167167

168-
def get(self, filename, _else=None):
168+
def get(self, filename):
169169
if not self.should_include(filename):
170-
return _else
170+
return None
171171
if not self.flags:
172172
return self.report.get(filename)
173173
r = self.report.get(filename)

shared/reports/readonly.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,7 @@ def from_chunks(cls, files=None, sessions=None, totals=None, chunks=None):
6565
)
6666
totals = inner_report._totals
6767
filename_mapping = {
68-
filename: file_summary.file_index
69-
for (filename, file_summary) in inner_report._files.items()
68+
filename: idx for idx, filename in enumerate(inner_report._files.keys())
7069
}
7170
session_mapping = {
7271
sid: (session.flags or []) for sid, session in inner_report.sessions.items()
@@ -116,10 +115,6 @@ def get_flag_names(self) -> list[str]:
116115
def sessions(self):
117116
return self.inner_report.sessions
118117

119-
@property
120-
def size(self):
121-
return self.inner_report.size
122-
123118
def apply_diff(self, *args, **kwargs):
124119
return self.inner_report.apply_diff(*args, **kwargs)
125120

shared/reports/reportfile.py

Lines changed: 47 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,21 @@
1313
log = logging.getLogger(__name__)
1414

1515

16-
class ReportFile(object):
16+
class ReportFile:
17+
name: str
18+
_totals: ReportTotals | None
19+
diff_totals: ReportTotals | None
20+
_raw_lines: str | None
21+
_parsed_lines: list[None | str | ReportLine]
22+
_details: dict[str, Any]
23+
__present_sessions: set[int] | None
24+
1725
def __init__(
1826
self,
1927
name: str,
2028
totals: ReportTotals | list | None = None,
2129
lines: list[None | str | ReportLine] | str | None = None,
30+
diff_totals: ReportTotals | list | None = None,
2231
ignore=None,
2332
):
2433
"""
@@ -32,19 +41,18 @@ def __init__(
3241
{eof:N, lines:[1,10]}
3342
"""
3443
self.name = name
35-
self._details: dict[str, Any] = {}
44+
self._totals = None
45+
self.diff_totals = None
46+
self._raw_lines = None
47+
self._parsed_lines = []
48+
self._details = {}
49+
self.__present_sessions = None
3650

37-
# lines = [<details dict()>, <Line #1>, ....]
38-
self._lines: list[None | str | ReportLine] = []
3951
if lines:
4052
if isinstance(lines, list):
41-
self._lines = lines
42-
53+
self._parsed_lines = lines
4354
else:
44-
lines = lines.splitlines()
45-
if detailsline := lines.pop(0):
46-
self._details = orjson.loads(detailsline) or {}
47-
self._lines = lines
55+
self._raw_lines = lines
4856

4957
self._ignore = _ignore_to_func(ignore) if ignore else None
5058

@@ -54,22 +62,38 @@ def __init__(
5462
# All mutating methods (like `append`, `merge`, etc) will either re-calculate these values
5563
# directly, or clear them so the `@property` accessors re-calculate them when needed.
5664

57-
self._totals: ReportTotals | None = None
5865
if isinstance(totals, ReportTotals):
5966
self._totals = totals
6067
elif totals:
6168
self._totals = ReportTotals(*totals)
6269

63-
self.__present_sessions: set[int] | None = None
64-
if present_sessions := self._details.get("present_sessions"):
65-
self.__present_sessions = set(present_sessions)
70+
if isinstance(diff_totals, ReportTotals):
71+
self.diff_totals = diff_totals
72+
elif diff_totals:
73+
self.diff_totals = ReportTotals(*diff_totals)
6674

6775
def _invalidate_caches(self):
6876
self._totals = None
77+
self.diff_totals = None
6978
self.__present_sessions = None
7079

80+
@property
81+
def _lines(self):
82+
if self._raw_lines:
83+
self._parsed_lines = self._raw_lines.splitlines()
84+
detailsline = self._parsed_lines.pop(0)
85+
86+
self._details = orjson.loads(detailsline or "null") or {}
87+
if present_sessions := self._details.get("present_sessions"):
88+
self.__present_sessions = set(present_sessions)
89+
90+
self._raw_lines = None
91+
92+
return self._parsed_lines
93+
7194
@property
7295
def _present_sessions(self):
96+
_ensure_is_parsed = self._lines
7397
if self.__present_sessions is None:
7498
self.__present_sessions = set()
7599
for _, line in self.lines:
@@ -78,18 +102,16 @@ def _present_sessions(self):
78102

79103
@property
80104
def details(self):
105+
_ensure_is_parsed = self._lines
81106
self._details["present_sessions"] = sorted(self._present_sessions)
82107
return self._details
83108

84109
@property
85110
def totals(self):
86111
if not self._totals:
87-
self._totals = self._process_totals()
112+
self._totals = get_line_totals(line for _ln, line in self.lines)
88113
return self._totals
89114

90-
def _process_totals(self) -> ReportTotals:
91-
return get_line_totals(line for _ln, line in self.lines)
92-
93115
def __repr__(self):
94116
try:
95117
return "<%s name=%s lines=%s>" % (
@@ -282,7 +304,8 @@ def merge(self, other_file, joined=True):
282304
):
283305
# previous file was boil-the-ocean
284306
# OR previous file had END issue
285-
self._lines = other_file._lines
307+
self._parsed_lines = other_file._lines.copy()
308+
self._raw_lines = None
286309
log.warning(
287310
"Doing something weird because of weird .rb logic",
288311
extra=dict(report_filename=self.name),
@@ -302,10 +325,11 @@ def merge(self, other_file, joined=True):
302325

303326
else:
304327
# set new lines object
305-
self._lines = [
328+
self._parsed_lines = [
306329
merge_line(before, after, joined)
307330
for before, after in zip_longest(self, other_file)
308331
]
332+
self._raw_lines = None
309333

310334
self._invalidate_caches()
311335
return True
@@ -460,7 +484,9 @@ def delete_multiple_sessions(self, session_ids_to_delete: set[int]):
460484
self._invalidate_caches()
461485

462486
if not new_sessions:
463-
self._lines = [] # no remaining sessions means no line data
487+
# no remaining sessions means no line data
488+
self._parsed_lines = []
489+
self._raw_lines = None
464490
return
465491

466492
for index, line in self.lines:

0 commit comments

Comments
 (0)