Skip to content
This repository was archived by the owner on May 5, 2025. It is now read-only.

Commit 1d68d39

Browse files
authored
Slight ParsedRawReport cleanup (#932)
1 parent dcf7304 commit 1d68d39

12 files changed

Lines changed: 148 additions & 233 deletions

File tree

services/path_fixer/fixpaths.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ def clean_toc(toc: str) -> list[str]:
115115
continue
116116

117117
# This path is good; save it.
118-
rv.append(path)
118+
if path:
119+
rv.append(path)
119120

120121
return rv

services/path_fixer/tests/unit/test_fixpaths.py

Lines changed: 28 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import pytest
44

55
from services.path_fixer import fixpaths
6-
from test_utils.base import BaseTestCase
76

87
# Hand-written TOCs.
98
paths = [
@@ -24,33 +23,35 @@
2423
}
2524

2625

27-
class TestFixpaths(BaseTestCase):
28-
@pytest.mark.parametrize("toc, result", paths)
29-
def test_clean_toc(self, toc, result):
30-
assert fixpaths.clean_toc(toc) == result
31-
32-
def test_clean_toc_with_space(self):
33-
assert fixpaths.clean_toc("a\\ b") == ["a b"]
34-
35-
@pytest.mark.parametrize("path, result", list(unquoted_files.items()))
36-
def test_unquote_git_path(self, path, result):
37-
assert fixpaths.unquote_git_path(path) == result
38-
39-
def test_some_real_git_paths(self):
40-
prefix = "services/path_fixer/tests/testdir"
41-
filenames = [
42-
"café.txt",
43-
"comma,txt",
44-
"🍭.txt",
45-
'fixture/get_breakdown_values_escaped_".json',
46-
]
47-
joined = [os.path.join(prefix, filename) for filename in filenames]
48-
toc = """"services/path_fixer/tests/testdir/caf\\303\\251.txt"
26+
@pytest.mark.parametrize("toc, result", paths)
27+
def test_clean_toc(toc, result):
28+
assert fixpaths.clean_toc(toc) == result
29+
30+
31+
def test_clean_toc_with_space():
32+
assert fixpaths.clean_toc("a\\ b") == ["a b"]
33+
34+
35+
@pytest.mark.parametrize("path, result", list(unquoted_files.items()))
36+
def test_unquote_git_path(path, result):
37+
assert fixpaths.unquote_git_path(path) == result
38+
39+
40+
def test_some_real_git_paths():
41+
prefix = "services/path_fixer/tests/testdir"
42+
filenames = [
43+
"café.txt",
44+
"comma,txt",
45+
"🍭.txt",
46+
'fixture/get_breakdown_values_escaped_".json',
47+
]
48+
joined = [os.path.join(prefix, filename) for filename in filenames]
49+
toc = """"services/path_fixer/tests/testdir/caf\\303\\251.txt"
4950
services/path_fixer/tests/testdir/comma,txt
5051
"services/path_fixer/tests/testdir/\\360\\237\\215\\255.txt"
5152
"services/path_fixer/tests/testdir/fixture/get_breakdown_values_escaped_\\".json"
5253
"""
53-
cleaned = fixpaths.clean_toc(toc)
54-
joined.sort()
55-
cleaned.sort()
56-
assert joined == cleaned
54+
cleaned = fixpaths.clean_toc(toc)
55+
joined.sort()
56+
cleaned.sort()
57+
assert joined == cleaned

services/processing/processing.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,4 @@ def rewrite_or_delete_upload(
9999

100100
elif isinstance(report_info.raw_report, VersionOneParsedRawReport):
101101
# only a version 1 report needs to be "rewritten readable"
102-
103-
archive_service.write_file(
104-
archive_url, report_info.raw_report.content().getvalue()
105-
)
102+
archive_service.write_file(archive_url, report_info.raw_report.as_readable())

services/report/__init__.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -539,14 +539,8 @@ def parse_raw_report_from_storage(
539539
"""
540540
archive_service = self.get_archive_service(repo)
541541
archive_url = upload.storage_path
542-
543542
log.info(
544-
"Parsing the raw report from storage",
545-
extra=dict(
546-
commit=upload.report.commit_id,
547-
repoid=repo.repoid,
548-
archive_url=archive_url,
549-
),
543+
"Parsing the raw report from storage", extra=dict(archive_url=archive_url)
550544
)
551545

552546
archive_file = archive_service.read_file(archive_url)
@@ -559,13 +553,11 @@ def parse_raw_report_from_storage(
559553

560554
raw_uploaded_report = parser.parse_raw_report_from_bytes(archive_file)
561555

562-
raw_report_count = len(raw_uploaded_report.get_uploaded_files())
556+
raw_report_count = len(raw_uploaded_report.uploaded_files)
563557
if raw_report_count < 1:
564558
log.warning(
565559
"Raw upload contains no uploaded files",
566560
extra=dict(
567-
commit=upload.report.commit_id,
568-
repoid=repo.repoid,
569561
raw_report_count=raw_report_count,
570562
upload_version=upload_version,
571563
archive_url=archive_url,

services/report/parser/legacy.py

Lines changed: 16 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import sentry_sdk
55

6+
from services.path_fixer.fixpaths import clean_toc
67
from services.report.parser.types import LegacyParsedRawReport, ParsedUploadedReportFile
78

89

@@ -67,7 +68,6 @@ def cut_sections(self, raw_report: bytes):
6768
6869
This function takes the proper steps to find all the relevant sections of a report:
6970
- toc: the 'network', list of files present on this report
70-
- env: the envvars the user set on the upload
7171
- uploaded_files: the actual report files
7272
- report_fixes: the report fixes some languages need
7373
@@ -99,8 +99,8 @@ def cut_sections(self, raw_report: bytes):
9999
while i_start < i_end and raw_report[i_start] in whitespaces:
100100
i_start += 1
101101
yield {
102-
"contents": raw_report[i_start:i_end],
103102
"filename": filename,
103+
"contents": raw_report[i_start:i_end],
104104
"footer": separator,
105105
}
106106

@@ -110,32 +110,27 @@ def parse_raw_report_from_bytes(self, raw_report: bytes) -> LegacyParsedRawRepor
110110
self.ignore_from_now_on_marker
111111
)
112112
sections = self.cut_sections(raw_report)
113-
res = self._generate_parsed_report_from_sections(sections)
114-
return res
113+
return self._generate_parsed_report_from_sections(sections)
115114

116115
def _generate_parsed_report_from_sections(self, sections):
117116
uploaded_files = []
118-
toc_section = None
119-
env_section = None
120-
report_fixes_section = None
117+
toc = None
118+
report_fixes = None
121119
for sect in sections:
122120
if sect["footer"] == self.network_separator:
123-
toc_section = sect["contents"]
121+
toc = clean_toc(sect["contents"].decode(errors="replace").strip())
124122
elif sect["footer"] == self.env_separator:
125-
env_section = sect["contents"]
123+
pass
124+
elif sect["filename"] == "fixes":
125+
report_fixes = sect["contents"]
126126
else:
127-
if sect["filename"] == "fixes":
128-
report_fixes_section = sect["contents"]
129-
else:
130-
uploaded_files.append(
131-
ParsedUploadedReportFile(
132-
filename=sect.get("filename"),
133-
file_contents=sect["contents"],
134-
)
135-
)
127+
file = ParsedUploadedReportFile(
128+
filename=sect["filename"], file_contents=sect["contents"]
129+
)
130+
uploaded_files.append(file)
131+
136132
return LegacyParsedRawReport(
137-
toc=toc_section,
138-
env=env_section,
133+
toc=toc or [],
139134
uploaded_files=uploaded_files,
140-
report_fixes=report_fixes_section,
135+
report_fixes=report_fixes,
141136
)

services/report/parser/tests/unit/test_version_one_parser.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from services.report.parser.version_one import (
55
ParsedUploadedReportFile,
66
VersionOneReportParser,
7+
_parse_coverage_file_contents,
78
)
89

910
input_data = b"""{
@@ -47,7 +48,6 @@
4748
def test_version_one_parser():
4849
subject = VersionOneReportParser()
4950
res = subject.parse_raw_report_from_bytes(input_data)
50-
assert res.get_env() is None
5151
assert res.get_report_fixes(None) == {
5252
"SwiftExample/AppDelegate.swift": {
5353
"eof": 15,
@@ -58,13 +58,13 @@ def test_version_one_parser():
5858
"lines": [1, 17, 3, 22, 7, 9, 12, 14],
5959
},
6060
}
61-
assert res.get_toc() == [
61+
assert res.toc == [
6262
"path/to/file1.c",
6363
"path/from/another.cpp",
6464
"path/from/aaaaaa.cpp",
6565
]
66-
assert len(res.get_uploaded_files()) == 2
67-
first_file, second_file = res.get_uploaded_files()
66+
assert len(res.uploaded_files) == 2
67+
first_file, second_file = res.uploaded_files
6868
assert isinstance(first_file, ParsedUploadedReportFile)
6969
assert first_file.filename == "coverage.xml"
7070
assert (
@@ -82,28 +82,26 @@ def test_version_one_parser():
8282
assert second_file.labels == ["simple", "a.py::fileclass::test_simple"]
8383

8484
assert (
85-
res.content().getvalue().decode("utf-8")
86-
== f"path/to/file1.c\npath/from/another.cpp\npath/from/aaaaaa.cpp\n<<<<<< network\n\n# path=coverage.xml\n{first_file.contents.decode('utf-8')}\n<<<<<< EOF\n\n# path=another.coverage.json\n{second_file.contents.decode('utf-8')}\n<<<<<< EOF\n\n"
85+
res.as_readable()
86+
== f"path/to/file1.c\npath/from/another.cpp\npath/from/aaaaaa.cpp\n<<<<<< network\n\n# path=coverage.xml\n{first_file.contents.decode()}\n<<<<<< EOF\n\n# path=another.coverage.json\n{second_file.contents.decode()}\n<<<<<< EOF\n\n".encode()
8787
)
8888

8989

9090
def test_version_one_parser_parse_coverage_file_contents_bad_format():
91-
subject = VersionOneReportParser()
9291
coverage_file = {"format": "unknown", "data": b"simple", "filename": "filename.py"}
93-
assert subject._parse_coverage_file_contents(coverage_file) == b"simple"
92+
assert _parse_coverage_file_contents(coverage_file) == b"simple"
9493

9594

9695
def test_version_one_parser_parse_coverage_file_contents_base64_zip_format():
9796
original_input = b"some_cool_string right \n here"
9897
formatted_input = base64.b64encode(zlib.compress(original_input))
9998
# An assert for the sake of showing the result
10099
assert formatted_input == b"eJwrzs9NjU/Oz8+JLy4pysxLVyjKTM8oUeBSyEgtSgUArOcK4w=="
101-
subject = VersionOneReportParser()
102100
coverage_file = {
103101
"format": "base64+compressed",
104102
"data": formatted_input,
105103
"filename": "filename.py",
106104
}
107-
res = subject._parse_coverage_file_contents(coverage_file)
105+
res = _parse_coverage_file_contents(coverage_file)
108106
assert isinstance(res, bytes)
109107
assert res == b"some_cool_string right \n here"

services/report/parser/types.py

Lines changed: 22 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from io import BytesIO
22
from typing import Any
33

4-
from services.path_fixer.fixpaths import clean_toc
54
from services.report.fixes import get_fixes_from_raw
65

76

@@ -30,8 +29,6 @@ class ParsedRawReport(object):
3029
toc
3130
table of contents, this lists the files relevant to the report,
3231
i.e. the files contained in the repository
33-
env
34-
list of env vars in environment of uploader (legacy only)
3532
uploaded_files
3633
list of class ParsedUploadedReportFile describing uploaded coverage files
3734
report_fixes
@@ -41,41 +38,31 @@ class ParsedRawReport(object):
4138

4239
def __init__(
4340
self,
44-
toc: Any,
45-
env: Any,
41+
toc: list[str],
4642
uploaded_files: list[ParsedUploadedReportFile],
4743
report_fixes: Any,
4844
):
4945
self.toc = toc
50-
self.env = env
5146
self.uploaded_files = uploaded_files
5247
self.report_fixes = report_fixes
5348

54-
def has_toc(self) -> bool:
55-
return self.toc is not None
56-
57-
def has_env(self) -> bool:
58-
return self.env is not None
59-
6049
def has_report_fixes(self) -> bool:
6150
return self.report_fixes is not None
6251

6352
@property
6453
def size(self):
6554
return sum(f.size for f in self.uploaded_files)
6655

67-
def content(self) -> BytesIO:
68-
buffer = BytesIO()
69-
if self.has_toc():
70-
for file in self.get_toc():
71-
buffer.write(f"{file}\n".encode("utf-8"))
72-
buffer.write(b"<<<<<< network\n\n")
73-
for file in self.uploaded_files:
74-
buffer.write(f"# path={file.filename}\n".encode("utf-8"))
75-
buffer.write(file.contents)
76-
buffer.write(b"\n<<<<<< EOF\n\n")
77-
buffer.seek(0)
78-
return buffer
56+
57+
class LegacyParsedRawReport(ParsedRawReport):
58+
"""
59+
report_fixes : bytes
60+
<filename>:<line number>,<line number>,...
61+
"""
62+
63+
def get_report_fixes(self, path_fixer) -> dict[str, dict[str, Any]]:
64+
report_fixes = self.report_fixes.decode(errors="replace")
65+
return get_fixes_from_raw(report_fixes, path_fixer)
7966

8067

8168
class VersionOneParsedRawReport(ParsedRawReport):
@@ -90,34 +77,17 @@ class VersionOneParsedRawReport(ParsedRawReport):
9077
}
9178
"""
9279

93-
def get_toc(self) -> list[str]:
94-
return self.toc
95-
96-
def get_env(self):
97-
return self.env
98-
99-
def get_uploaded_files(self):
100-
return self.uploaded_files
101-
10280
def get_report_fixes(self, path_fixer) -> dict[str, dict[str, Any]]:
10381
return self.report_fixes
10482

105-
106-
class LegacyParsedRawReport(ParsedRawReport):
107-
"""
108-
report_fixes : BinaryIO
109-
<filename>:<line number>,<line number>,...
110-
"""
111-
112-
def get_toc(self) -> list[str]:
113-
return clean_toc(self.toc.decode(errors="replace").strip())
114-
115-
def get_env(self):
116-
return self.env.decode(errors="replace")
117-
118-
def get_uploaded_files(self):
119-
return self.uploaded_files
120-
121-
def get_report_fixes(self, path_fixer) -> dict[str, dict[str, Any]]:
122-
report_fixes = self.report_fixes.decode(errors="replace")
123-
return get_fixes_from_raw(report_fixes, path_fixer)
83+
def as_readable(self) -> bytes:
84+
buffer = b""
85+
if self.toc:
86+
for path in self.toc:
87+
buffer += f"{path}\n".encode()
88+
buffer += b"<<<<<< network\n\n"
89+
for file in self.uploaded_files:
90+
buffer += f"# path={file.filename}\n".encode()
91+
buffer += file.contents
92+
buffer += b"\n<<<<<< EOF\n\n"
93+
return buffer

0 commit comments

Comments
 (0)