Skip to content

Commit 1aab58c

Browse files
committed
perf(core): add lazy suppression/report fast-paths and streamline extractor binding
1 parent f7ebd78 commit 1aab58c

5 files changed

Lines changed: 329 additions & 78 deletions

File tree

codeclone/_cli_reports.py

Lines changed: 37 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -22,38 +22,14 @@ class _QuietArgs(Protocol):
2222
quiet: bool
2323

2424

25-
class _OutputPaths(Protocol):
26-
@property
27-
def html(self) -> Path | None: ...
25+
def _path_attr(obj: object, name: str) -> Path | None:
26+
value = getattr(obj, name, None)
27+
return value if isinstance(value, Path) else None
2828

29-
@property
30-
def json(self) -> Path | None: ...
3129

32-
@property
33-
def md(self) -> Path | None: ...
34-
35-
@property
36-
def sarif(self) -> Path | None: ...
37-
38-
@property
39-
def text(self) -> Path | None: ...
40-
41-
42-
class _ReportArtifacts(Protocol):
43-
@property
44-
def html(self) -> str | None: ...
45-
46-
@property
47-
def json(self) -> str | None: ...
48-
49-
@property
50-
def md(self) -> str | None: ...
51-
52-
@property
53-
def sarif(self) -> str | None: ...
54-
55-
@property
56-
def text(self) -> str | None: ...
30+
def _text_attr(obj: object, name: str) -> str | None:
31+
value = getattr(obj, name, None)
32+
return value if isinstance(value, str) else None
5733

5834

5935
def _write_report_output(
@@ -83,60 +59,70 @@ def _open_html_report_in_browser(*, path: Path) -> None:
8359
def write_report_outputs(
8460
*,
8561
args: _QuietArgs,
86-
output_paths: _OutputPaths,
87-
report_artifacts: _ReportArtifacts,
62+
output_paths: object,
63+
report_artifacts: object,
8864
console: _PrinterLike,
8965
open_html_report: bool = False,
9066
) -> str | None:
9167
html_report_path: str | None = None
9268
saved_reports: list[tuple[str, Path]] = []
93-
94-
if output_paths.html and report_artifacts.html is not None:
95-
out = output_paths.html
69+
html_path = _path_attr(output_paths, "html")
70+
json_path = _path_attr(output_paths, "json")
71+
md_path = _path_attr(output_paths, "md")
72+
sarif_path = _path_attr(output_paths, "sarif")
73+
text_path = _path_attr(output_paths, "text")
74+
html_report = _text_attr(report_artifacts, "html")
75+
json_report = _text_attr(report_artifacts, "json")
76+
md_report = _text_attr(report_artifacts, "md")
77+
sarif_report = _text_attr(report_artifacts, "sarif")
78+
text_report = _text_attr(report_artifacts, "text")
79+
80+
if html_path and html_report is not None:
81+
out = html_path
9682
_write_report_output(
9783
out=out,
98-
content=report_artifacts.html,
84+
content=html_report,
9985
label="HTML",
10086
console=console,
10187
)
10288
html_report_path = str(out)
10389
saved_reports.append(("HTML", out))
10490

105-
if output_paths.json and report_artifacts.json is not None:
106-
out = output_paths.json
91+
if json_path and json_report is not None:
92+
out = json_path
10793
_write_report_output(
10894
out=out,
109-
content=report_artifacts.json,
95+
content=json_report,
11096
label="JSON",
11197
console=console,
11298
)
11399
saved_reports.append(("JSON", out))
114100

115-
if output_paths.md and report_artifacts.md is not None:
116-
out = output_paths.md
101+
if md_path and md_report is not None:
102+
out = md_path
117103
_write_report_output(
118104
out=out,
119-
content=report_artifacts.md,
105+
content=md_report,
120106
label="Markdown",
121107
console=console,
122108
)
123109
saved_reports.append(("Markdown", out))
124110

125-
if output_paths.sarif and report_artifacts.sarif is not None:
126-
out = output_paths.sarif
111+
if sarif_path and sarif_report is not None:
112+
out = sarif_path
127113
_write_report_output(
128114
out=out,
129-
content=report_artifacts.sarif,
115+
content=sarif_report,
130116
label="SARIF",
131117
console=console,
132118
)
133119
saved_reports.append(("SARIF", out))
134120

135-
if output_paths.text and report_artifacts.text is not None:
136-
out = output_paths.text
121+
if text_path and text_report is not None:
122+
out = text_path
137123
_write_report_output(
138124
out=out,
139-
content=report_artifacts.text,
125+
content=text_report,
140126
label="text",
141127
console=console,
142128
)
@@ -152,12 +138,10 @@ def write_report_outputs(
152138
display = path
153139
console.print(f" [bold]{label} report saved:[/bold] [dim]{display}[/dim]")
154140

155-
if open_html_report and output_paths.html is not None:
141+
if open_html_report and html_path is not None:
156142
try:
157-
_open_html_report_in_browser(path=output_paths.html)
143+
_open_html_report_in_browser(path=html_path)
158144
except Exception as exc:
159-
console.print(
160-
ui.fmt_html_report_open_failed(path=output_paths.html, error=exc)
161-
)
145+
console.print(ui.fmt_html_report_open_failed(path=html_path, error=exc))
162146

163147
return html_report_path

codeclone/extractor.py

Lines changed: 76 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ class _ParseTimeoutError(Exception):
7777

7878
FunctionNode = ast.FunctionDef | ast.AsyncFunctionDef
7979
_NamedDeclarationNode = FunctionNode | ast.ClassDef
80+
_DeclarationTokenIndexKey = tuple[int, int, str]
8081

8182

8283
def _consumed_cpu_seconds(resource_module: object) -> float:
@@ -177,7 +178,10 @@ def _declaration_token_index(
177178
start_line: int,
178179
start_col: int,
179180
declaration_token: str,
181+
source_token_index: Mapping[_DeclarationTokenIndexKey, int] | None = None,
180182
) -> int | None:
183+
if source_token_index is not None:
184+
return source_token_index.get((start_line, start_col, declaration_token))
181185
for idx, token in enumerate(source_tokens):
182186
if token.start != (start_line, start_col):
183187
continue
@@ -186,6 +190,19 @@ def _declaration_token_index(
186190
return None
187191

188192

193+
def _build_declaration_token_index(
194+
source_tokens: tuple[tokenize.TokenInfo, ...],
195+
) -> Mapping[_DeclarationTokenIndexKey, int]:
196+
indexed: dict[_DeclarationTokenIndexKey, int] = {}
197+
for idx, token in enumerate(source_tokens):
198+
if token.type != tokenize.NAME:
199+
continue
200+
if token.string not in {"def", "async", "class"}:
201+
continue
202+
indexed[(token.start[0], token.start[1], token.string)] = idx
203+
return indexed
204+
205+
189206
def _scan_declaration_colon_line(
190207
*,
191208
source_tokens: tuple[tokenize.TokenInfo, ...],
@@ -223,6 +240,7 @@ def _declaration_end_line(
223240
node: ast.AST,
224241
*,
225242
source_tokens: tuple[tokenize.TokenInfo, ...],
243+
source_token_index: Mapping[_DeclarationTokenIndexKey, int] | None = None,
226244
) -> int:
227245
start_line = int(getattr(node, "lineno", 0))
228246
start_col = int(getattr(node, "col_offset", 0))
@@ -235,6 +253,7 @@ def _declaration_end_line(
235253
start_line=start_line,
236254
start_col=start_col,
237255
declaration_token=declaration_token,
256+
source_token_index=source_token_index,
238257
)
239258
if start_index is None:
240259
return _fallback_declaration_end_line(node, start_line=start_line)
@@ -788,7 +807,9 @@ def _collect_declaration_targets(
788807
filepath: str,
789808
module_name: str,
790809
collector: _QualnameCollector,
791-
source_tokens: tuple[tokenize.TokenInfo, ...],
810+
source_tokens: tuple[tokenize.TokenInfo, ...] = (),
811+
source_token_index: Mapping[_DeclarationTokenIndexKey, int] | None = None,
812+
include_inline_lines: bool = False,
792813
) -> tuple[DeclarationTarget, ...]:
793814
declarations: list[DeclarationTarget] = []
794815

@@ -797,9 +818,14 @@ def _collect_declaration_targets(
797818
end = int(getattr(node, "end_lineno", 0))
798819
if start <= 0 or end <= 0:
799820
continue
800-
declaration_end_line = _declaration_end_line(
801-
node,
802-
source_tokens=source_tokens,
821+
declaration_end_line = (
822+
_declaration_end_line(
823+
node,
824+
source_tokens=source_tokens,
825+
source_token_index=source_token_index,
826+
)
827+
if include_inline_lines
828+
else None
803829
)
804830
kind: Literal["function", "method"] = (
805831
"method" if "." in local_name else "function"
@@ -820,9 +846,14 @@ def _collect_declaration_targets(
820846
end = int(getattr(class_node, "end_lineno", 0))
821847
if start <= 0 or end <= 0:
822848
continue
823-
declaration_end_line = _declaration_end_line(
824-
class_node,
825-
source_tokens=source_tokens,
849+
declaration_end_line = (
850+
_declaration_end_line(
851+
class_node,
852+
source_tokens=source_tokens,
853+
source_token_index=source_token_index,
854+
)
855+
if include_inline_lines
856+
else None
826857
)
827858
declarations.append(
828859
DeclarationTarget(
@@ -849,6 +880,42 @@ def _collect_declaration_targets(
849880
)
850881

851882

883+
def _build_suppression_index_for_source(
884+
*,
885+
source: str,
886+
filepath: str,
887+
module_name: str,
888+
collector: _QualnameCollector,
889+
) -> Mapping[SuppressionTargetKey, tuple[str, ...]]:
890+
suppression_directives = extract_suppression_directives(source)
891+
if not suppression_directives:
892+
return {}
893+
894+
needs_inline_binding = any(
895+
directive.binding == "inline" for directive in suppression_directives
896+
)
897+
source_tokens: tuple[tokenize.TokenInfo, ...] = ()
898+
source_token_index: Mapping[_DeclarationTokenIndexKey, int] | None = None
899+
if needs_inline_binding:
900+
source_tokens = _source_tokens(source)
901+
if source_tokens:
902+
source_token_index = _build_declaration_token_index(source_tokens)
903+
904+
declaration_targets = _collect_declaration_targets(
905+
filepath=filepath,
906+
module_name=module_name,
907+
collector=collector,
908+
source_tokens=source_tokens,
909+
source_token_index=source_token_index,
910+
include_inline_lines=needs_inline_binding,
911+
)
912+
suppression_bindings = bind_suppressions_to_declarations(
913+
directives=suppression_directives,
914+
declarations=declaration_targets,
915+
)
916+
return build_suppression_index(suppression_bindings)
917+
918+
852919
# =========================
853920
# Public API
854921
# =========================
@@ -883,7 +950,6 @@ def extract_units_and_stats_from_source(
883950
collector = _QualnameCollector()
884951
collector.visit(tree)
885952
source_lines = source.splitlines()
886-
source_tokens = _source_tokens(source)
887953
source_line_count = len(source_lines)
888954

889955
is_test_file = is_test_filepath(filepath)
@@ -902,18 +968,12 @@ def extract_units_and_stats_from_source(
902968
protocol_symbol_aliases = _walk.protocol_symbol_aliases
903969
protocol_module_aliases = _walk.protocol_module_aliases
904970

905-
suppression_directives = extract_suppression_directives(source)
906-
declaration_targets = _collect_declaration_targets(
971+
suppression_index = _build_suppression_index_for_source(
972+
source=source,
907973
filepath=filepath,
908974
module_name=module_name,
909975
collector=collector,
910-
source_tokens=source_tokens,
911-
)
912-
suppression_bindings = bind_suppressions_to_declarations(
913-
directives=suppression_directives,
914-
declarations=declaration_targets,
915976
)
916-
suppression_index = build_suppression_index(suppression_bindings)
917977
class_names = frozenset(class_node.name for _, class_node in collector.class_nodes)
918978
module_import_names = set(import_names)
919979
module_class_names = set(class_names)

codeclone/pipeline.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -56,16 +56,11 @@
5656
)
5757
from .normalize import NormalizationConfig
5858
from .paths import is_test_filepath
59-
from .report import (
60-
build_block_group_facts,
61-
prepare_block_report_groups,
62-
prepare_segment_report_groups,
63-
render_json_report_document,
64-
render_text_report_document,
65-
to_markdown_report,
66-
to_sarif_report,
67-
)
59+
from .report.blocks import prepare_block_report_groups
60+
from .report.explain import build_block_group_facts
6861
from .report.json_contract import build_report_document
62+
from .report.segments import prepare_segment_report_groups
63+
from .report.serialize import render_json_report_document, render_text_report_document
6964
from .report.suggestions import generate_suggestions
7065
from .scanner import iter_py_files, module_name_from_path
7166
from .structural_findings import build_clone_cohort_structural_findings
@@ -1531,6 +1526,8 @@ def report(
15311526
contents["json"] = render_json_report_document(report_document)
15321527

15331528
if boot.output_paths.md and report_document is not None:
1529+
from .report.markdown import to_markdown_report
1530+
15341531
contents["md"] = to_markdown_report(
15351532
report_document=report_document,
15361533
meta=report_meta,
@@ -1548,6 +1545,8 @@ def report(
15481545
)
15491546

15501547
if boot.output_paths.sarif and report_document is not None:
1548+
from .report.sarif import to_sarif_report
1549+
15511550
contents["sarif"] = to_sarif_report(
15521551
report_document=report_document,
15531552
meta=report_meta,

0 commit comments

Comments
 (0)