Skip to content

Commit 2823df2

Browse files
committed
Stabilize the fingerprint
Use a combination of the error and the flagged line as the fingerprint of the issue. This is more stable compared to the line number itself, which changes whenever code is added or removed above the issue. Current file contents are cached to avoid reading the same file over and over again.
1 parent 33c6232 commit 2823df2

2 files changed

Lines changed: 49 additions & 12 deletions

File tree

mypy_gitlab_code_quality/__init__.py

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@
55
import re
66
from enum import Enum
77
from functools import reduce
8+
from pathlib import Path
89
from sys import stdin, stdout
9-
from typing import TYPE_CHECKING, TypedDict
10+
from typing import TYPE_CHECKING, Any, TypedDict
1011

1112
if TYPE_CHECKING:
1213
from collections.abc import Iterable
@@ -35,7 +36,7 @@ class GitlabIssue(TypedDict):
3536
location: GitlabIssueLocation
3637

3738

38-
def parse_issue(line: str) -> GitlabIssue | None:
39+
def parse_issue(line: str, cache: dict[str, Any] | None = None) -> GitlabIssue | None:
3940
if line.startswith("{"):
4041
try:
4142
match = json.loads(line)
@@ -54,17 +55,43 @@ def parse_issue(line: str) -> GitlabIssue | None:
5455
)
5556
if match is None:
5657
return None
57-
fingerprint = hashlib.md5(line.encode("utf-8"), usedforsecurity=False).hexdigest()
5858
error_levels_table = {"error": Severity.major, "note": Severity.info}
59+
60+
path = match["file"]
61+
line_number = int(match["line"])
62+
error_level = match["severity"]
63+
message = match["message"]
64+
error_code = match["code"]
65+
66+
if cache is None:
67+
cache = {}
68+
if cache.get("path", "") != path:
69+
cache["path"] = path
70+
try:
71+
cache["contents"] = Path(path).read_text().splitlines()
72+
except FileNotFoundError:
73+
# ignore non-existent files
74+
cache["contents"] = []
75+
76+
try:
77+
flagged_line = cache["contents"][line_number - 1]
78+
except IndexError:
79+
flagged_line = ""
80+
81+
fingerprint = f"{path}::{error_level}::{error_code}::{message}::{flagged_line}"
82+
fingerprint = hashlib.md5(
83+
fingerprint.encode("utf-8"), usedforsecurity=False
84+
).hexdigest()
85+
5986
return {
60-
"description": match["message"],
61-
"check_name": match["code"],
87+
"description": message,
88+
"check_name": error_code,
6289
"fingerprint": fingerprint,
63-
"severity": error_levels_table.get(match["severity"], Severity.unknown),
90+
"severity": error_levels_table.get(error_level, Severity.unknown),
6491
"location": {
65-
"path": match["file"],
92+
"path": path,
6693
"lines": {
67-
"begin": int(match["line"]),
94+
"begin": line_number,
6895
},
6996
},
7097
}
@@ -91,7 +118,8 @@ def append_or_extend(issues: list[GitlabIssue], new: GitlabIssue) -> list[Gitlab
91118

92119

93120
def generate_report(lines: Iterable[str]) -> list[GitlabIssue]:
94-
issues = filter(None, map(parse_issue, lines))
121+
cache: dict[str, Any] = {}
122+
issues = filter(None, (parse_issue(line, cache) for line in lines))
95123
return reduce(append_or_extend, issues, [])
96124

97125

tests.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def test_line_number(self):
1414

1515
def test_fingerprint(self):
1616
issue = parse_issue("module.py:2: error: Description")
17-
self.assertEqual("a19285c6cdf4dafe237cc5d2de6c0308", issue["fingerprint"])
17+
self.assertEqual("28f9bd9c79566d41c2edb373a3a06fa6", issue["fingerprint"])
1818

1919
def test_error_level_error(self):
2020
issue = parse_issue("module.py:2: error: Description")
@@ -80,6 +80,14 @@ def test_line_number(self):
8080
self.assertEqual(2, issue["location"]["lines"]["begin"])
8181

8282
def test_fingerprint(self):
83+
cache = {
84+
"path": "module.py",
85+
"contents": [
86+
"foo",
87+
"bar",
88+
"baz",
89+
],
90+
}
8391
issue = parse_issue(
8492
r"""{
8593
"file": "module.py",
@@ -89,9 +97,10 @@ def test_fingerprint(self):
8997
"hint": null,
9098
"code": "error-code",
9199
"severity": "error"
92-
}"""
100+
}""",
101+
cache,
93102
)
94-
self.assertEqual("4455bb04f307121aa95a7b3725996837", issue["fingerprint"])
103+
self.assertEqual("0a8a778330d2d25373c68fe1c7a381c1", issue["fingerprint"])
95104

96105
def test_error_level_error(self):
97106
issue = parse_issue(

0 commit comments

Comments
 (0)