Skip to content

Commit e772d83

Browse files
Sarif reporter merge branch (#46)
Co-authored-by: DANIDEVOLP <danibruno34@outlook.it>
2 parents 4a94461 + a51ba63 commit e772d83

1 file changed

Lines changed: 192 additions & 54 deletions

File tree

src/pyspector/reporting.py

Lines changed: 192 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,111 @@
11
import json
22
import html as html_module
3-
# Added 'Region' to imports for better SARIF compliance
4-
from sarif_om import SarifLog, Tool, Run, ReportingDescriptor, Result, ArtifactLocation, Location, PhysicalLocation, Region
5-
# Removed 'asdict' from imports as it is not needed for sarif_om
6-
from dataclasses import asdict, is_dataclass
3+
import importlib.metadata
4+
5+
from sarif_om import (
6+
SarifLog,
7+
Tool,
8+
ToolComponent,
9+
Run,
10+
ReportingDescriptor,
11+
ReportingConfiguration,
12+
MultiformatMessageString,
13+
Result,
14+
ArtifactLocation,
15+
Location,
16+
PhysicalLocation,
17+
Region,
18+
Message,
19+
)
20+
21+
22+
# Maps internal severity levels to SARIF-compliant level strings.
23+
_SEVERITY_TO_SARIF_LEVEL = {
24+
"CRITICAL": "error",
25+
"HIGH": "error",
26+
"MEDIUM": "warning",
27+
"LOW": "note",
28+
}
29+
30+
31+
def _get_version():
32+
"""Return installed PySpector version dynamically."""
33+
try:
34+
return importlib.metadata.version("pyspector")
35+
except importlib.metadata.PackageNotFoundError:
36+
return "dev"
37+
38+
39+
_PYSPECTOR_VERSION = _get_version()
40+
41+
42+
def _severity_key(issue) -> str:
43+
"""Normalize enum-like severity values."""
44+
return str(issue.severity).split(".")[-1].upper()
45+
46+
47+
def _clean(obj):
48+
49+
if isinstance(obj, list):
50+
return [_clean(item) for item in obj]
51+
52+
if isinstance(obj, dict):
53+
return {
54+
k: _clean(v)
55+
for k, v in obj.items()
56+
if v is not None
57+
}
58+
59+
if hasattr(obj, "__dict__"):
60+
return {
61+
k: _clean(v)
62+
for k, v in obj.__dict__.items()
63+
if v is not None
64+
}
65+
66+
return obj
67+
768

869
class Reporter:
970
def __init__(self, issues: list, report_format: str):
1071
self.issues = issues
1172
self.format = report_format
1273

1374
def generate(self) -> str:
14-
if self.format == 'json':
75+
if self.format == "json":
1576
return self.to_json()
16-
if self.format == 'sarif':
77+
if self.format == "sarif":
1778
return self.to_sarif()
18-
if self.format == 'html':
79+
if self.format == "html":
1980
return self.to_html()
2081
return self.to_console()
2182

83+
2284
def to_console(self) -> str:
2385
if not self.issues:
2486
return "\nNo issues found."
2587

2688
output = []
89+
severity_order = ["CRITICAL", "HIGH", "MEDIUM", "LOW"]
2790

28-
# Define severity order (highest to lowest priority)
29-
severity_order = ['CRITICAL', 'HIGH', 'MEDIUM', 'LOW']
30-
31-
# Group issues by severity
32-
issues_by_severity = {}
91+
issues_by_severity: dict[str, list] = {}
3392
for issue in self.issues:
34-
severity = str(issue.severity).split('.')[-1].upper()
35-
if severity not in issues_by_severity:
36-
issues_by_severity[severity] = []
37-
issues_by_severity[severity].append(issue)
93+
severity = _severity_key(issue)
94+
issues_by_severity.setdefault(severity, []).append(issue)
3895

39-
# Output grouped by severity (in priority order)
4096
for severity in severity_order:
4197
if severity not in issues_by_severity:
4298
continue
4399

44-
issues = issues_by_severity[severity]
45-
# Sort issues within each severity group by file path and line number
46-
sorted_issues = sorted(issues, key=lambda i: (i.file_path, i.line_number))
100+
sorted_issues = sorted(
101+
issues_by_severity[severity],
102+
key=lambda i: (i.file_path, i.line_number),
103+
)
47104

48-
# Add severity header
49105
output.append(f"\n{'='*60}")
50-
output.append(f" {severity} ({len(sorted_issues)} issue{'s' if len(sorted_issues) != 1 else ''})")
106+
output.append(
107+
f" {severity} ({len(sorted_issues)} issue{'s' if len(sorted_issues) != 1 else ''})"
108+
)
51109
output.append(f"{'='*60}")
52110

53111
for issue in sorted_issues:
@@ -60,6 +118,10 @@ def to_console(self) -> str:
60118

61119
return "\n".join(output)
62120

121+
# ------------------------------------------------------------------ #
122+
# JSON #
123+
# ------------------------------------------------------------------ #
124+
63125
def to_json(self) -> str:
64126
report = {
65127
"summary": {"issue_count": len(self.issues)},
@@ -70,47 +132,120 @@ def to_json(self) -> str:
70132
"file_path": issue.file_path,
71133
"line_number": issue.line_number,
72134
"code": issue.code,
73-
"severity": str(issue.severity).split('.')[-1],
135+
"severity": _severity_key(issue),
74136
"remediation": issue.remediation,
75-
} for issue in self.issues
76-
]
137+
}
138+
for issue in self.issues
139+
],
77140
}
141+
78142
return json.dumps(report, indent=2)
79143

144+
# ------------------------------------------------------------------ #
145+
# SARIF #
146+
# ------------------------------------------------------------------ #
147+
80148
def to_sarif(self) -> str:
81-
tool = Tool(driver=ReportingDescriptor(id="pyspector", name="PySpector"))
82-
rules = []
83-
results = []
84-
85-
# Create a unique list of rules for the SARIF report
86-
rule_map = {}
149+
150+
rule_index_map: dict[str, int] = {}
151+
rules: list[ReportingDescriptor] = []
152+
87153
for issue in self.issues:
88-
if issue.rule_id not in rule_map:
89-
rule_map[issue.rule_id] = ReportingDescriptor(id=issue.rule_id, name=issue.description)
90-
91-
# sarif_om expects lists, not values view
92-
tool.driver.rules = list(rule_map.values())
154+
155+
if issue.rule_id in rule_index_map:
156+
continue
157+
158+
severity_key = _severity_key(issue)
159+
160+
rule = ReportingDescriptor(
161+
id=issue.rule_id,
162+
name=issue.rule_id,
163+
short_description=MultiformatMessageString(
164+
text=issue.description
165+
),
166+
help=MultiformatMessageString(
167+
text=issue.remediation or issue.description,
168+
markdown=(
169+
f"**Remediation:** {issue.remediation}"
170+
if issue.remediation
171+
else None
172+
),
173+
),
174+
default_configuration=ReportingConfiguration(
175+
level=_SEVERITY_TO_SARIF_LEVEL.get(
176+
severity_key,
177+
"warning",
178+
)
179+
),
180+
)
181+
182+
rule_index_map[issue.rule_id] = len(rules)
183+
rules.append(rule)
184+
185+
driver = ToolComponent(
186+
name="PySpector",
187+
version=_PYSPECTOR_VERSION,
188+
information_uri="https://github.com/your-org/pyspector",
189+
rules=rules,
190+
)
191+
192+
tool = Tool(driver=driver)
193+
194+
results: list[Result] = []
93195

94196
for issue in self.issues:
95-
# FIX: Use the Region object from sarif_om instead of a raw dict
96-
region = Region(start_line=issue.line_number)
97-
197+
198+
severity_key = _severity_key(issue)
199+
level = _SEVERITY_TO_SARIF_LEVEL.get(
200+
severity_key,
201+
"warning",
202+
)
203+
204+
region = Region(
205+
start_line=issue.line_number,
206+
snippet=MultiformatMessageString(
207+
text=issue.code.strip()
208+
),
209+
)
210+
98211
location = Location(
99212
physical_location=PhysicalLocation(
100-
artifact_location=ArtifactLocation(uri=issue.file_path),
101-
region=region
213+
artifact_location=ArtifactLocation(
214+
uri=issue.file_path,
215+
uri_base_id="%SRCROOT%",
216+
),
217+
region=region,
102218
)
103219
)
104-
results.append(Result(rule_id=issue.rule_id, message={"text": issue.description}, locations=[location]))
105-
220+
221+
result = Result(
222+
rule_id=issue.rule_id,
223+
rule_index=rule_index_map[issue.rule_id],
224+
level=level,
225+
message=Message(text=issue.description),
226+
locations=[location],
227+
)
228+
229+
results.append(result)
230+
106231
run = Run(tool=tool, results=results)
107-
log = SarifLog(version="2.1.0", schema_uri="https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json", runs=[run])
108-
109-
# FIX: Remove asdict(). Use default lambda to serialize non-dataclass objects.
110-
return json.dumps(log, default=lambda o: o.__dict__, indent=2)
111-
232+
233+
log = SarifLog(
234+
version="2.1.0",
235+
schema_uri=(
236+
"https://raw.githubusercontent.com/oasis-tcs/"
237+
"sarif-spec/master/Schemata/sarif-schema-2.1.0.json"
238+
),
239+
runs=[run],
240+
)
241+
242+
return json.dumps(_clean(log), indent=2)
243+
244+
# ------------------------------------------------------------------ #
245+
# HTML #
246+
# ------------------------------------------------------------------ #
247+
112248
def to_html(self) -> str:
113-
# A simple HTML report
114249
html = f"""
115250
<html>
116251
<head><title>PySpector Scan Report</title></head>
@@ -119,13 +254,14 @@ def to_html(self) -> str:
119254
<h2>Found {len(self.issues)} issues.</h2>
120255
<table border='1' style='border-collapse: collapse; width: 100%;'>
121256
<tr style='background-color: #f2f2f2;'>
122-
<th style='padding: 8px; text-align: left;'>File</th>
123-
<th style='padding: 8px; text-align: left;'>Line</th>
124-
<th style='padding: 8px; text-align: left;'>Severity</th>
125-
<th style='padding: 8px; text-align: left;'>Description</th>
126-
<th style='padding: 8px; text-align: left;'>Code</th>
257+
<th style='padding: 8px;'>File</th>
258+
<th style='padding: 8px;'>Line</th>
259+
<th style='padding: 8px;'>Severity</th>
260+
<th style='padding: 8px;'>Description</th>
261+
<th style='padding: 8px;'>Code</th>
127262
</tr>
128263
"""
264+
129265
for issue in self.issues:
130266
html += f"""
131267
<tr>
@@ -136,5 +272,7 @@ def to_html(self) -> str:
136272
<td style='padding: 8px;'><pre><code>{html_module.escape(issue.code)}</code></pre></td>
137273
</tr>
138274
"""
275+
139276
html += "</table></body></html>"
277+
140278
return html

0 commit comments

Comments
 (0)