Skip to content

Commit e33a6c0

Browse files
committed
feat(report): add baseline provenance metadata to html/text/json and raise test coverage to 100%
1 parent c0187f2 commit e33a6c0

10 files changed

Lines changed: 1010 additions & 103 deletions

File tree

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,11 @@ codeclone . \
140140
--text .cache/codeclone/report.txt
141141
```
142142

143+
All report formats include provenance metadata for auditability:
144+
`codeclone_version`, `python_version`, `baseline_path`, `baseline_version`,
145+
`baseline_schema_version`, `baseline_python_version`, `baseline_loaded`,
146+
`baseline_status` (and cache metadata when available).
147+
143148
Generate an HTML report:
144149

145150
```bash

codeclone/cli.py

Lines changed: 109 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
build_segment_groups,
3434
prepare_segment_report_groups,
3535
to_json_report,
36-
to_text,
36+
to_text_report,
3737
)
3838
from .scanner import iter_py_files, module_name_from_path
3939

@@ -181,6 +181,33 @@ def _validate_output_path(path: str, *, expected_suffix: str, label: str) -> Pat
181181
return out.resolve()
182182

183183

184+
def _current_python_version() -> str:
185+
return f"{sys.version_info.major}.{sys.version_info.minor}"
186+
187+
188+
def _build_report_meta(
189+
*,
190+
baseline_path: Path,
191+
baseline: Baseline,
192+
baseline_loaded: bool,
193+
baseline_status: str,
194+
cache_path: Path,
195+
cache_used: bool,
196+
) -> dict[str, Any]:
197+
return {
198+
"codeclone_version": __version__,
199+
"python_version": _current_python_version(),
200+
"baseline_path": str(baseline_path),
201+
"baseline_version": baseline.baseline_version,
202+
"baseline_schema_version": baseline.schema_version,
203+
"baseline_python_version": baseline.python_version,
204+
"baseline_loaded": baseline_loaded,
205+
"baseline_status": baseline_status,
206+
"cache_path": str(cache_path),
207+
"cache_used": cache_used,
208+
}
209+
210+
184211
def main() -> None:
185212
ap = argparse.ArgumentParser(
186213
prog="codeclone",
@@ -664,53 +691,74 @@ def process_sequential(with_progress: bool) -> None:
664691

665692
baseline = Baseline(baseline_path)
666693
baseline_exists = baseline_path.exists()
694+
baseline_loaded = False
695+
baseline_status = "missing"
696+
baseline_failure_code: int | None = None
667697

668698
if baseline_exists:
669-
baseline.load()
670-
if not args.update_baseline:
671-
if baseline.baseline_version != __version__:
672-
if baseline.baseline_version is None:
673-
console.print(
674-
"[error]Baseline version mismatch.[/error]\n"
675-
"Baseline version missing (legacy baseline format).\n"
676-
f"Current version: {__version__}.\n"
677-
"Please regenerate the baseline with --update-baseline."
678-
)
679-
else:
680-
console.print(
681-
"[error]Baseline version mismatch.[/error]\n"
682-
"Baseline was generated with CodeClone "
683-
f"{baseline.baseline_version}.\n"
684-
f"Current version: {__version__}.\n"
685-
"Please regenerate the baseline with --update-baseline."
686-
)
687-
sys.exit(2)
688-
if (
689-
baseline.schema_version is not None
690-
and baseline.schema_version != BASELINE_SCHEMA_VERSION
691-
):
699+
try:
700+
baseline.load()
701+
except ValueError as e:
702+
baseline_status = "invalid"
703+
if not args.update_baseline:
692704
console.print(
693-
"[error]Baseline schema version mismatch.[/error]\n"
694-
f"Baseline schema: {baseline.schema_version}. "
695-
f"Current schema: {BASELINE_SCHEMA_VERSION}.\n"
705+
"[error]Invalid baseline file.[/error]\n"
706+
f"{e}\n"
696707
"Please regenerate the baseline with --update-baseline."
697708
)
698-
sys.exit(2)
699-
if not args.update_baseline and baseline.python_version:
700-
current_version = f"{sys.version_info.major}.{sys.version_info.minor}"
701-
if baseline.python_version != current_version:
702-
console.print(
703-
"[warning]Baseline Python version mismatch.[/warning]\n"
704-
f"Baseline was generated with Python {baseline.python_version}.\n"
705-
f"Current interpreter: Python {current_version}."
706-
)
707-
if args.fail_on_new:
709+
baseline_failure_code = 2
710+
else:
711+
baseline_loaded = True
712+
baseline_status = "ok"
713+
if not args.update_baseline:
714+
if baseline.baseline_version != __version__:
715+
baseline_status = "mismatch"
716+
if baseline.baseline_version is None:
717+
console.print(
718+
"[error]Baseline version mismatch.[/error]\n"
719+
"Baseline version missing (legacy baseline format).\n"
720+
f"Current version: {__version__}.\n"
721+
"Please regenerate the baseline with --update-baseline."
722+
)
723+
else:
724+
console.print(
725+
"[error]Baseline version mismatch.[/error]\n"
726+
"Baseline was generated with CodeClone "
727+
f"{baseline.baseline_version}.\n"
728+
f"Current version: {__version__}.\n"
729+
"Please regenerate the baseline with --update-baseline."
730+
)
731+
baseline_failure_code = 2
732+
if (
733+
baseline.schema_version is not None
734+
and baseline.schema_version != BASELINE_SCHEMA_VERSION
735+
):
736+
baseline_status = "mismatch"
708737
console.print(
709-
"[error]Baseline checks require the same Python version to "
710-
"ensure deterministic results. Please regenerate the baseline "
711-
"using the current interpreter.[/error]"
738+
"[error]Baseline schema version mismatch.[/error]\n"
739+
f"Baseline schema: {baseline.schema_version}. "
740+
f"Current schema: {BASELINE_SCHEMA_VERSION}.\n"
741+
"Please regenerate the baseline with --update-baseline."
742+
)
743+
baseline_failure_code = 2
744+
if not args.update_baseline and baseline.python_version:
745+
current_version = _current_python_version()
746+
if baseline.python_version != current_version:
747+
baseline_status = "mismatch"
748+
console.print(
749+
"[warning]Baseline Python version mismatch.[/warning]\n"
750+
"Baseline was generated with Python "
751+
f"{baseline.python_version}.\n"
752+
f"Current interpreter: Python {current_version}."
712753
)
713-
sys.exit(2)
754+
if args.fail_on_new:
755+
console.print(
756+
"[error]Baseline checks require the same Python version to "
757+
"ensure deterministic results. Please regenerate the "
758+
"baseline "
759+
"using the current interpreter.[/error]"
760+
)
761+
baseline_failure_code = 2
714762
else:
715763
if not args.update_baseline:
716764
console.print(
@@ -735,6 +783,15 @@ def process_sequential(with_progress: bool) -> None:
735783
# When updating, we don't fail on new, we just saved the new state.
736784
# But we might still want to print the summary.
737785

786+
report_meta = _build_report_meta(
787+
baseline_path=baseline_path,
788+
baseline=baseline,
789+
baseline_loaded=baseline_loaded,
790+
baseline_status=baseline_status,
791+
cache_path=cache_path.resolve(),
792+
cache_used=cache.load_warning is None,
793+
)
794+
738795
# Diff
739796
new_func, new_block = baseline.diff(func_groups, block_groups)
740797
new_clones_count = len(new_func) + len(new_block)
@@ -770,6 +827,7 @@ def process_sequential(with_progress: bool) -> None:
770827
func_groups=func_groups,
771828
block_groups=block_groups,
772829
segment_groups=segment_groups,
830+
report_meta=report_meta,
773831
title="CodeClone Report",
774832
context_lines=3,
775833
max_snippet_lines=220,
@@ -784,7 +842,7 @@ def process_sequential(with_progress: bool) -> None:
784842
out = json_out_path
785843
out.parent.mkdir(parents=True, exist_ok=True)
786844
out.write_text(
787-
to_json_report(func_groups, block_groups, segment_groups),
845+
to_json_report(func_groups, block_groups, segment_groups, report_meta),
788846
"utf-8",
789847
)
790848
if not args.quiet:
@@ -794,17 +852,20 @@ def process_sequential(with_progress: bool) -> None:
794852
out = text_out_path
795853
out.parent.mkdir(parents=True, exist_ok=True)
796854
out.write_text(
797-
"FUNCTION CLONES\n"
798-
+ to_text(func_groups)
799-
+ "\nBLOCK CLONES\n"
800-
+ to_text(block_groups)
801-
+ "\nSEGMENT CLONES\n"
802-
+ to_text(segment_groups),
855+
to_text_report(
856+
meta=report_meta,
857+
func_groups=func_groups,
858+
block_groups=block_groups,
859+
segment_groups=segment_groups,
860+
),
803861
"utf-8",
804862
)
805863
if not args.quiet:
806864
console.print(f"[info]Text report saved:[/info] {out}")
807865

866+
if baseline_failure_code is not None:
867+
sys.exit(baseline_failure_code)
868+
808869
# Exit Codes
809870
if args.fail_on_new and (new_func or new_block):
810871
default_report = Path(".cache/codeclone/report.html")

codeclone/html_report.py

Lines changed: 73 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,15 @@ def _escape(v: Any) -> str:
229229
return html.escape("" if v is None else str(v))
230230

231231

232+
def _meta_display(v: Any) -> str:
233+
if isinstance(v, bool):
234+
return "true" if v else "false"
235+
if v is None:
236+
return "n/a"
237+
text = str(v).strip()
238+
return text if text else "n/a"
239+
240+
232241
def _group_sort_key(items: list[dict[str, Any]]) -> tuple[int, int]:
233242
return (
234243
-len(items),
@@ -241,6 +250,7 @@ def build_html_report(
241250
func_groups: dict[str, list[dict[str, Any]]],
242251
block_groups: dict[str, list[dict[str, Any]]],
243252
segment_groups: dict[str, list[dict[str, Any]]],
253+
report_meta: dict[str, Any] | None = None,
244254
title: str = "CodeClone Report",
245255
context_lines: int = 3,
246256
max_snippet_lines: int = 220,
@@ -404,6 +414,8 @@ def render_section(
404414

405415
out.append(
406416
f'<div class="group" data-group="{section_id}" '
417+
f'data-group-index="{idx}" '
418+
f'data-group-key="{_escape(gkey)}" '
407419
f'data-search="{search_blob_escaped}">'
408420
)
409421

@@ -416,7 +428,7 @@ def render_section(
416428
f'<span class="pill small {pill_cls}">{len(items)} items</span>'
417429
"</div>"
418430
'<div class="group-right">'
419-
f'<code class="gkey">{_escape(gkey)}</code>'
431+
f'<code class="gkey" title="{_escape(gkey)}">{_escape(gkey)}</code>'
420432
"</div>"
421433
"</div>"
422434
)
@@ -437,16 +449,18 @@ def render_section(
437449
max_lines=max_snippet_lines,
438450
)
439451

452+
qualname = _escape(item["qualname"])
453+
filepath = _escape(item["filepath"])
454+
start_line = int(item["start_line"])
455+
end_line = int(item["end_line"])
440456
out.append(
441-
'<div class="item">'
442-
f'<div class="item-head" title="{_escape(item["qualname"])}">'
443-
f"{_escape(item['qualname'])}"
444-
"</div>"
457+
f'<div class="item" data-qualname="{qualname}" '
458+
f'data-filepath="{filepath}" data-start-line="{start_line}" '
459+
f'data-end-line="{end_line}">'
460+
f'<div class="item-head" title="{qualname}">{qualname}</div>'
445461
f'<div class="item-file" '
446-
f'title="{_escape(item["filepath"])}:'
447-
f'{item["start_line"]}-{item["end_line"]}">'
448-
f"{_escape(item['filepath'])}:"
449-
f"{item['start_line']}-{item['end_line']}"
462+
f'title="{filepath}:{start_line}-{end_line}">'
463+
f"{filepath}:{start_line}-{end_line}"
450464
f"</div>"
451465
f"{snippet.code_html}"
452466
"</div>"
@@ -489,11 +503,61 @@ def render_section(
489503
"segments", "Segment clones", segment_sorted, "pill-segment"
490504
)
491505

506+
meta = dict(report_meta or {})
507+
meta_rows: list[tuple[str, Any]] = [
508+
("CodeClone", meta.get("codeclone_version", __version__)),
509+
("Python", meta.get("python_version")),
510+
("Baseline", meta.get("baseline_path")),
511+
("Baseline version", meta.get("baseline_version")),
512+
("Baseline schema", meta.get("baseline_schema_version")),
513+
("Baseline Python", meta.get("baseline_python_version")),
514+
("Baseline loaded", meta.get("baseline_loaded")),
515+
("Baseline status", meta.get("baseline_status")),
516+
]
517+
if "cache_path" in meta:
518+
meta_rows.append(("Cache path", meta.get("cache_path")))
519+
if "cache_used" in meta:
520+
meta_rows.append(("Cache used", meta.get("cache_used")))
521+
522+
meta_attrs = " ".join(
523+
[
524+
(
525+
'data-codeclone-version="'
526+
f'{_escape(meta.get("codeclone_version", __version__))}"'
527+
),
528+
f'data-python-version="{_escape(meta.get("python_version"))}"',
529+
f'data-baseline-path="{_escape(meta.get("baseline_path"))}"',
530+
f'data-baseline-version="{_escape(meta.get("baseline_version"))}"',
531+
f'data-baseline-schema-version="{_escape(meta.get("baseline_schema_version"))}"',
532+
f'data-baseline-python-version="{_escape(meta.get("baseline_python_version"))}"',
533+
f'data-baseline-loaded="{_escape(_meta_display(meta.get("baseline_loaded")))}"',
534+
f'data-baseline-status="{_escape(meta.get("baseline_status"))}"',
535+
f'data-cache-path="{_escape(meta.get("cache_path"))}"',
536+
f'data-cache-used="{_escape(_meta_display(meta.get("cache_used")))}"',
537+
]
538+
)
539+
meta_rows_html = "".join(
540+
(
541+
'<div class="meta-row">'
542+
f"<dt>{_escape(label)}</dt>"
543+
f"<dd>{_escape(_meta_display(value))}</dd>"
544+
"</div>"
545+
)
546+
for label, value in meta_rows
547+
)
548+
report_meta_html = (
549+
f'<section class="meta-panel" id="report-meta" {meta_attrs}>'
550+
'<div class="meta-title">Report Provenance</div>'
551+
f'<dl class="meta-grid">{meta_rows_html}</dl>'
552+
"</section>"
553+
)
554+
492555
return REPORT_TEMPLATE.substitute(
493556
title=_escape(title),
494557
version=__version__,
495558
pyg_dark=pyg_dark,
496559
pyg_light=pyg_light,
560+
report_meta_html=report_meta_html,
497561
empty_state_html=empty_state_html,
498562
func_section=func_section,
499563
block_section=block_section,

0 commit comments

Comments
 (0)