11import json
22import html as html_module
3- # Added 'Region' to imports for better SARIF compliance
4- from sarif_om import SarifLog , Tool , Run , ReportingDescriptor , Result , ArtifactLocation , Location , PhysicalLocation , Region
5- # Removed 'asdict' from imports as it is not needed for sarif_om
6- from dataclasses import asdict , is_dataclass
3+ import importlib .metadata
4+
5+ from sarif_om import (
6+ SarifLog ,
7+ Tool ,
8+ ToolComponent ,
9+ Run ,
10+ ReportingDescriptor ,
11+ ReportingConfiguration ,
12+ MultiformatMessageString ,
13+ Result ,
14+ ArtifactLocation ,
15+ Location ,
16+ PhysicalLocation ,
17+ Region ,
18+ Message ,
19+ )
20+
21+
22+ # Maps internal severity levels to SARIF-compliant level strings.
23+ _SEVERITY_TO_SARIF_LEVEL = {
24+ "CRITICAL" : "error" ,
25+ "HIGH" : "error" ,
26+ "MEDIUM" : "warning" ,
27+ "LOW" : "note" ,
28+ }
29+
30+
31+ def _get_version ():
32+ """Return installed PySpector version dynamically."""
33+ try :
34+ return importlib .metadata .version ("pyspector" )
35+ except importlib .metadata .PackageNotFoundError :
36+ return "dev"
37+
38+
39+ _PYSPECTOR_VERSION = _get_version ()
40+
41+
42+ def _severity_key (issue ) -> str :
43+ """Normalize enum-like severity values."""
44+ return str (issue .severity ).split ("." )[- 1 ].upper ()
45+
46+
47+ def _clean (obj ):
48+
49+ if isinstance (obj , list ):
50+ return [_clean (item ) for item in obj ]
51+
52+ if isinstance (obj , dict ):
53+ return {
54+ k : _clean (v )
55+ for k , v in obj .items ()
56+ if v is not None
57+ }
58+
59+ if hasattr (obj , "__dict__" ):
60+ return {
61+ k : _clean (v )
62+ for k , v in obj .__dict__ .items ()
63+ if v is not None
64+ }
65+
66+ return obj
67+
768
869class Reporter :
970 def __init__ (self , issues : list , report_format : str ):
1071 self .issues = issues
1172 self .format = report_format
1273
1374 def generate (self ) -> str :
14- if self .format == ' json' :
75+ if self .format == " json" :
1576 return self .to_json ()
16- if self .format == ' sarif' :
77+ if self .format == " sarif" :
1778 return self .to_sarif ()
18- if self .format == ' html' :
79+ if self .format == " html" :
1980 return self .to_html ()
2081 return self .to_console ()
2182
83+
2284 def to_console (self ) -> str :
2385 if not self .issues :
2486 return "\n No issues found."
2587
2688 output = []
89+ severity_order = ["CRITICAL" , "HIGH" , "MEDIUM" , "LOW" ]
2790
28- # Define severity order (highest to lowest priority)
29- severity_order = ['CRITICAL' , 'HIGH' , 'MEDIUM' , 'LOW' ]
30-
31- # Group issues by severity
32- issues_by_severity = {}
91+ issues_by_severity : dict [str , list ] = {}
3392 for issue in self .issues :
34- severity = str (issue .severity ).split ('.' )[- 1 ].upper ()
35- if severity not in issues_by_severity :
36- issues_by_severity [severity ] = []
37- issues_by_severity [severity ].append (issue )
93+ severity = _severity_key (issue )
94+ issues_by_severity .setdefault (severity , []).append (issue )
3895
39- # Output grouped by severity (in priority order)
4096 for severity in severity_order :
4197 if severity not in issues_by_severity :
4298 continue
4399
44- issues = issues_by_severity [severity ]
45- # Sort issues within each severity group by file path and line number
46- sorted_issues = sorted (issues , key = lambda i : (i .file_path , i .line_number ))
100+ sorted_issues = sorted (
101+ issues_by_severity [severity ],
102+ key = lambda i : (i .file_path , i .line_number ),
103+ )
47104
48- # Add severity header
49105 output .append (f"\n { '=' * 60 } " )
50- output .append (f" { severity } ({ len (sorted_issues )} issue{ 's' if len (sorted_issues ) != 1 else '' } )" )
106+ output .append (
107+ f" { severity } ({ len (sorted_issues )} issue{ 's' if len (sorted_issues ) != 1 else '' } )"
108+ )
51109 output .append (f"{ '=' * 60 } " )
52110
53111 for issue in sorted_issues :
@@ -60,6 +118,10 @@ def to_console(self) -> str:
60118
61119 return "\n " .join (output )
62120
121+ # ------------------------------------------------------------------ #
122+ # JSON #
123+ # ------------------------------------------------------------------ #
124+
63125 def to_json (self ) -> str :
64126 report = {
65127 "summary" : {"issue_count" : len (self .issues )},
@@ -70,47 +132,120 @@ def to_json(self) -> str:
70132 "file_path" : issue .file_path ,
71133 "line_number" : issue .line_number ,
72134 "code" : issue .code ,
73- "severity" : str (issue . severity ). split ( '.' )[ - 1 ] ,
135+ "severity" : _severity_key (issue ) ,
74136 "remediation" : issue .remediation ,
75- } for issue in self .issues
76- ]
137+ }
138+ for issue in self .issues
139+ ],
77140 }
141+
78142 return json .dumps (report , indent = 2 )
79143
144+ # ------------------------------------------------------------------ #
145+ # SARIF #
146+ # ------------------------------------------------------------------ #
147+
80148 def to_sarif (self ) -> str :
81- tool = Tool (driver = ReportingDescriptor (id = "pyspector" , name = "PySpector" ))
82- rules = []
83- results = []
84-
85- # Create a unique list of rules for the SARIF report
86- rule_map = {}
149+
150+ rule_index_map : dict [str , int ] = {}
151+ rules : list [ReportingDescriptor ] = []
152+
87153 for issue in self .issues :
88- if issue .rule_id not in rule_map :
89- rule_map [issue .rule_id ] = ReportingDescriptor (id = issue .rule_id , name = issue .description )
90-
91- # sarif_om expects lists, not values view
92- tool .driver .rules = list (rule_map .values ())
154+
155+ if issue .rule_id in rule_index_map :
156+ continue
157+
158+ severity_key = _severity_key (issue )
159+
160+ rule = ReportingDescriptor (
161+ id = issue .rule_id ,
162+ name = issue .rule_id ,
163+ short_description = MultiformatMessageString (
164+ text = issue .description
165+ ),
166+ help = MultiformatMessageString (
167+ text = issue .remediation or issue .description ,
168+ markdown = (
169+ f"**Remediation:** { issue .remediation } "
170+ if issue .remediation
171+ else None
172+ ),
173+ ),
174+ default_configuration = ReportingConfiguration (
175+ level = _SEVERITY_TO_SARIF_LEVEL .get (
176+ severity_key ,
177+ "warning" ,
178+ )
179+ ),
180+ )
181+
182+ rule_index_map [issue .rule_id ] = len (rules )
183+ rules .append (rule )
184+
185+ driver = ToolComponent (
186+ name = "PySpector" ,
187+ version = _PYSPECTOR_VERSION ,
188+ information_uri = "https://github.com/your-org/pyspector" ,
189+ rules = rules ,
190+ )
191+
192+ tool = Tool (driver = driver )
193+
194+ results : list [Result ] = []
93195
94196 for issue in self .issues :
95- # FIX: Use the Region object from sarif_om instead of a raw dict
96- region = Region (start_line = issue .line_number )
97-
197+
198+ severity_key = _severity_key (issue )
199+ level = _SEVERITY_TO_SARIF_LEVEL .get (
200+ severity_key ,
201+ "warning" ,
202+ )
203+
204+ region = Region (
205+ start_line = issue .line_number ,
206+ snippet = MultiformatMessageString (
207+ text = issue .code .strip ()
208+ ),
209+ )
210+
98211 location = Location (
99212 physical_location = PhysicalLocation (
100- artifact_location = ArtifactLocation (uri = issue .file_path ),
101- region = region
213+ artifact_location = ArtifactLocation (
214+ uri = issue .file_path ,
215+ uri_base_id = "%SRCROOT%" ,
216+ ),
217+ region = region ,
102218 )
103219 )
104- results .append (Result (rule_id = issue .rule_id , message = {"text" : issue .description }, locations = [location ]))
105-
220+
221+ result = Result (
222+ rule_id = issue .rule_id ,
223+ rule_index = rule_index_map [issue .rule_id ],
224+ level = level ,
225+ message = Message (text = issue .description ),
226+ locations = [location ],
227+ )
228+
229+ results .append (result )
230+
106231 run = Run (tool = tool , results = results )
107- log = SarifLog (version = "2.1.0" , schema_uri = "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json" , runs = [run ])
108-
109- # FIX: Remove asdict(). Use default lambda to serialize non-dataclass objects.
110- return json .dumps (log , default = lambda o : o .__dict__ , indent = 2 )
111-
232+
233+ log = SarifLog (
234+ version = "2.1.0" ,
235+ schema_uri = (
236+ "https://raw.githubusercontent.com/oasis-tcs/"
237+ "sarif-spec/master/Schemata/sarif-schema-2.1.0.json"
238+ ),
239+ runs = [run ],
240+ )
241+
242+ return json .dumps (_clean (log ), indent = 2 )
243+
244+ # ------------------------------------------------------------------ #
245+ # HTML #
246+ # ------------------------------------------------------------------ #
247+
112248 def to_html (self ) -> str :
113- # A simple HTML report
114249 html = f"""
115250 <html>
116251 <head><title>PySpector Scan Report</title></head>
@@ -119,13 +254,14 @@ def to_html(self) -> str:
119254 <h2>Found { len (self .issues )} issues.</h2>
120255 <table border='1' style='border-collapse: collapse; width: 100%;'>
121256 <tr style='background-color: #f2f2f2;'>
122- <th style='padding: 8px; text-align: left; '>File</th>
123- <th style='padding: 8px; text-align: left; '>Line</th>
124- <th style='padding: 8px; text-align: left; '>Severity</th>
125- <th style='padding: 8px; text-align: left; '>Description</th>
126- <th style='padding: 8px; text-align: left; '>Code</th>
257+ <th style='padding: 8px;'>File</th>
258+ <th style='padding: 8px;'>Line</th>
259+ <th style='padding: 8px;'>Severity</th>
260+ <th style='padding: 8px;'>Description</th>
261+ <th style='padding: 8px;'>Code</th>
127262 </tr>
128263 """
264+
129265 for issue in self .issues :
130266 html += f"""
131267 <tr>
@@ -136,5 +272,7 @@ def to_html(self) -> str:
136272 <td style='padding: 8px;'><pre><code>{ html_module .escape (issue .code )} </code></pre></td>
137273 </tr>
138274 """
275+
139276 html += "</table></body></html>"
277+
140278 return html
0 commit comments