Skip to content

Commit 59c395d

Browse files
committed
fix: resolve all SonarCloud issues and security hotspots
1 parent fa631d2 commit 59c395d

9 files changed

Lines changed: 424 additions & 297 deletions

File tree

src/treemapper/cli.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def _validate_budget(budget: int | None) -> None:
4242

4343

4444
def _validate_alpha(alpha: float) -> None:
45-
if alpha <= 0 or alpha >= 1:
45+
if not (0 < alpha < 1):
4646
_exit_error(f"--alpha must be between 0 and 1 (exclusive), got {alpha}")
4747

4848

@@ -106,9 +106,9 @@ def _warn_diff_only_flags(args: argparse.Namespace) -> None:
106106
used = []
107107
if args.budget is not None:
108108
used.append("--budget")
109-
if args.alpha != 0.60:
109+
if abs(args.alpha - 0.60) > 1e-9:
110110
used.append("--alpha/--context-precision")
111-
if args.tau != 0.08:
111+
if abs(args.tau - 0.08) > 1e-9:
112112
used.append("--tau/--min-relevance")
113113
if args.full:
114114
used.append("--full")

src/treemapper/diffctx/__init__.py

Lines changed: 148 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,27 @@ def _is_generated_file(path: Path, content: str) -> bool:
193193
return False
194194

195195

196+
def _truncate_generated_fragments(file_frags: list[Fragment]) -> list[Fragment]:
197+
truncated: list[Fragment] = []
198+
for frag in file_frags:
199+
if frag.line_count <= _MAX_GENERATED_LINES:
200+
truncated.append(frag)
201+
continue
202+
lines = frag.content.splitlines()
203+
remaining = len(lines) - _MAX_GENERATED_LINES
204+
lines = lines[:_MAX_GENERATED_LINES]
205+
truncated_content = "\n".join(lines) + f"\n# ... [{remaining} more lines]"
206+
truncated.append(
207+
Fragment(
208+
id=FragmentId(frag.path, frag.start_line, frag.start_line + len(lines) - 1),
209+
kind=frag.kind,
210+
content=truncated_content,
211+
identifiers=extract_identifiers(truncated_content),
212+
)
213+
)
214+
return truncated
215+
216+
196217
def _process_files_for_fragments(
197218
files: list[Path],
198219
root_dir: Path,
@@ -221,24 +242,7 @@ def _process_files_for_fragments(
221242
)
222243

223244
if is_generated:
224-
truncated: list[Fragment] = []
225-
for frag in file_frags:
226-
if frag.line_count > _MAX_GENERATED_LINES:
227-
lines = frag.content.splitlines()
228-
remaining = len(lines) - _MAX_GENERATED_LINES
229-
lines = lines[:_MAX_GENERATED_LINES]
230-
truncated_content = "\n".join(lines) + f"\n# ... [{remaining} more lines]"
231-
truncated.append(
232-
Fragment(
233-
id=FragmentId(frag.path, frag.start_line, frag.start_line + len(lines) - 1),
234-
kind=frag.kind,
235-
content=truncated_content,
236-
identifiers=extract_identifiers(truncated_content),
237-
)
238-
)
239-
else:
240-
truncated.append(frag)
241-
file_frags = truncated
245+
file_frags = _truncate_generated_fragments(file_frags)
242246

243247
for frag in file_frags:
244248
fragments.append(frag)
@@ -303,10 +307,10 @@ def _apply_same_file_floor(
303307
_HUB_REVERSE_THRESHOLD = 3
304308

305309

306-
def _find_hub_noise_paths(
310+
def _classify_semantic_edges(
307311
graph: Graph,
308312
changed_paths: set[Path],
309-
) -> set[Path]:
313+
) -> tuple[dict[Path, set[Path]], set[Path]]:
310314
reverse_deps: dict[Path, set[Path]] = defaultdict(set)
311315
direct_edge_paths: set[Path] = set()
312316
for (src, dst), category in graph.edge_categories.items():
@@ -327,6 +331,14 @@ def _find_hub_noise_paths(
327331
reverse_deps[changed_frag.path].add(other_frag.path)
328332
else:
329333
direct_edge_paths.add(other_frag.path)
334+
return reverse_deps, direct_edge_paths
335+
336+
337+
def _find_hub_noise_paths(
338+
graph: Graph,
339+
changed_paths: set[Path],
340+
) -> set[Path]:
341+
reverse_deps, direct_edge_paths = _classify_semantic_edges(graph, changed_paths)
330342

331343
noise_counts: dict[Path, int] = {}
332344
for deps in reverse_deps.values():
@@ -440,6 +452,44 @@ def _filter_low_relevance_fragments(
440452
return kept
441453

442454

455+
def _create_whole_file_fragment(
456+
path: Path,
457+
root_dir: Path,
458+
preferred_revs: list[str],
459+
) -> Fragment | None:
460+
content = _read_file_content(path, root_dir, preferred_revs)
461+
if not content or not content.strip():
462+
return None
463+
if _is_generated_file(path, content):
464+
lines = content.splitlines()
465+
if len(lines) > _MAX_GENERATED_LINES:
466+
remaining = len(lines) - _MAX_GENERATED_LINES
467+
content = "\n".join(lines[:_MAX_GENERATED_LINES]) + f"\n# ... [{remaining} more lines]"
468+
lines = content.splitlines()
469+
frag = Fragment(
470+
id=FragmentId(path=path, start_line=1, end_line=len(lines)),
471+
kind="chunk",
472+
content=content,
473+
identifiers=extract_identifiers(content),
474+
)
475+
frag.token_count = count_tokens(content).count + _OVERHEAD_PER_FRAGMENT
476+
return frag
477+
478+
479+
def _pick_smallest_fitting(
480+
candidates: list[Fragment],
481+
selected_ids: set[FragmentId],
482+
budget_left: int,
483+
) -> Fragment | None:
484+
ranked = sorted(candidates, key=lambda f: f.token_count)
485+
for cand in ranked:
486+
if cand.token_count <= 0 or cand.id in selected_ids:
487+
continue
488+
if cand.token_count <= budget_left:
489+
return cand
490+
return None
491+
492+
443493
def _ensure_changed_files_represented(
444494
selected: list[Fragment],
445495
all_fragments: list[Fragment],
@@ -449,8 +499,7 @@ def _ensure_changed_files_represented(
449499
preferred_revs: list[str],
450500
) -> list[Fragment]:
451501
selected_paths = {f.path for f in selected}
452-
changed_paths = set(changed_files)
453-
missing_paths = changed_paths - selected_paths
502+
missing_paths = set(changed_files) - selected_paths
454503

455504
if not missing_paths:
456505
return selected
@@ -466,37 +515,11 @@ def _ensure_changed_files_represented(
466515

467516
for path in sorted(missing_paths):
468517
candidates = frags_by_path.get(path, [])
469-
470518
if not candidates:
471-
content = _read_file_content(path, root_dir, preferred_revs)
472-
if content and content.strip():
473-
if _is_generated_file(path, content):
474-
lines = content.splitlines()
475-
if len(lines) > _MAX_GENERATED_LINES:
476-
remaining = len(lines) - _MAX_GENERATED_LINES
477-
content = "\n".join(lines[:_MAX_GENERATED_LINES]) + f"\n# ... [{remaining} more lines]"
478-
lines = content.splitlines()
479-
else:
480-
lines = content.splitlines()
481-
frag = Fragment(
482-
id=FragmentId(path=path, start_line=1, end_line=len(lines)),
483-
kind="chunk",
484-
content=content,
485-
identifiers=extract_identifiers(content),
486-
)
487-
frag.token_count = count_tokens(content).count + _OVERHEAD_PER_FRAGMENT
488-
candidates = [frag]
519+
fallback = _create_whole_file_fragment(path, root_dir, preferred_revs)
520+
candidates = [fallback] if fallback else []
489521

490-
if not candidates:
491-
continue
492-
ranked = sorted(candidates, key=lambda f: f.token_count)
493-
picked = None
494-
for cand in ranked:
495-
if cand.token_count <= 0 or cand.id in selected_ids:
496-
continue
497-
if cand.token_count <= budget_left:
498-
picked = cand
499-
break
522+
picked = _pick_smallest_fitting(candidates, selected_ids, budget_left)
500523
if picked is not None:
501524
added.append(picked)
502525
selected_ids.add(picked.id)
@@ -546,6 +569,25 @@ def _select_with_ppr(
546569
return selected.selected, selected
547570

548571

572+
def _resolve_changed_files(
573+
root_dir: Path,
574+
diff_range: str,
575+
untracked: list[Path],
576+
combined_spec: pathspec.PathSpec,
577+
wl_spec: pathspec.PathSpec | None,
578+
) -> list[Path]:
579+
changed_files = get_changed_files(root_dir, diff_range)
580+
changed_files = [_normalize_path(p, root_dir) for p in changed_files]
581+
changed_files.extend(untracked)
582+
changed_files = _filter_ignored(changed_files, root_dir, combined_spec)
583+
changed_files = _filter_whitelist(changed_files, root_dir, wl_spec)
584+
585+
excluded_paths = get_deleted_files(root_dir, diff_range) | get_renamed_old_paths(root_dir, diff_range)
586+
if excluded_paths:
587+
changed_files = [f for f in changed_files if f.resolve() not in excluded_paths]
588+
return changed_files
589+
590+
549591
def build_diff_context(
550592
root_dir: Path,
551593
diff_range: str,
@@ -568,9 +610,8 @@ def build_diff_context(
568610
combined_spec = get_ignore_specs(root_dir, ignore_file, no_default_ignores, None)
569611
wl_spec = get_whitelist_spec(whitelist_file, root_dir)
570612

571-
untracked: list[Path] = []
572-
if is_working_tree_diff:
573-
untracked = _discover_untracked_files(root_dir, combined_spec)
613+
untracked = _discover_untracked_files(root_dir, combined_spec) if is_working_tree_diff else []
614+
if untracked:
574615
hunks.extend(_synthetic_hunks(untracked))
575616

576617
if not hunks:
@@ -581,15 +622,7 @@ def build_diff_context(
581622
if untracked:
582623
expansion_concepts = _enrich_concepts(expansion_concepts, untracked)
583624

584-
changed_files = get_changed_files(root_dir, diff_range)
585-
changed_files = [_normalize_path(p, root_dir) for p in changed_files]
586-
changed_files.extend(untracked)
587-
changed_files = _filter_ignored(changed_files, root_dir, combined_spec)
588-
changed_files = _filter_whitelist(changed_files, root_dir, wl_spec)
589-
590-
excluded_paths = get_deleted_files(root_dir, diff_range) | get_renamed_old_paths(root_dir, diff_range)
591-
if excluded_paths:
592-
changed_files = [f for f in changed_files if f.resolve() not in excluded_paths]
625+
changed_files = _resolve_changed_files(root_dir, diff_range, untracked, combined_spec, wl_spec)
593626

594627
preferred_revs = _build_preferred_revs(base_rev, head_rev)
595628

@@ -736,44 +769,75 @@ def _coherence_post_pass(
736769
)
737770

738771

739-
def _compute_seed_weights(
772+
def _map_hunks_to_fragments(
740773
hunks: list[DiffHunk],
741774
core_ids: set[FragmentId],
742775
all_fragments: list[Fragment],
743776
) -> dict[FragmentId, float]:
744-
frag_hunk_lines: dict[FragmentId, float] = {}
777+
result: dict[FragmentId, float] = {}
745778
for h in hunks:
746779
h_start, h_end = h.core_selection_range
747780
hunk_size = max(1, h_end - h_start + 1)
748781
for frag in all_fragments:
749782
if frag.id not in core_ids or frag.path != h.path:
750783
continue
751784
if frag.start_line <= h_end and frag.end_line >= h_start:
752-
frag_hunk_lines[frag.id] = frag_hunk_lines.get(frag.id, 0) + hunk_size
753-
if not frag_hunk_lines:
754-
return {}
785+
result[frag.id] = result.get(frag.id, 0) + hunk_size
786+
return result
787+
755788

789+
def _add_container_weights(
790+
frag_hunk_lines: dict[FragmentId, float],
791+
core_ids: set[FragmentId],
792+
all_fragments: list[Fragment],
793+
) -> None:
756794
for frag in all_fragments:
757795
if frag.id not in core_ids or frag.id in frag_hunk_lines:
758796
continue
759-
if frag.kind in _CONTAINER_FRAGMENT_KINDS:
760-
contained_weight = sum(
761-
w
762-
for fid, w in frag_hunk_lines.items()
763-
if fid.path == frag.path and frag.start_line <= fid.start_line and fid.end_line <= frag.end_line
764-
)
765-
if contained_weight > 0:
766-
frag_hunk_lines[frag.id] = contained_weight
797+
if frag.kind not in _CONTAINER_FRAGMENT_KINDS:
798+
continue
799+
contained_weight = sum(
800+
w
801+
for fid, w in frag_hunk_lines.items()
802+
if fid.path == frag.path and frag.start_line <= fid.start_line and fid.end_line <= frag.end_line
803+
)
804+
if contained_weight > 0:
805+
frag_hunk_lines[frag.id] = contained_weight
767806

807+
808+
def _best_hunk_size_for_path(hunks: list[DiffHunk], path: Path) -> int:
809+
best = 0
810+
for h in hunks:
811+
if h.path == path:
812+
h_start, h_end = h.core_selection_range
813+
best = max(best, h_end - h_start + 1)
814+
return best
815+
816+
817+
def _fill_missing_core_weights(
818+
frag_hunk_lines: dict[FragmentId, float],
819+
core_ids: set[FragmentId],
820+
hunks: list[DiffHunk],
821+
) -> None:
768822
for fid in core_ids:
769-
if fid not in frag_hunk_lines:
770-
best_hunk_size = 0
771-
for h in hunks:
772-
if h.path == fid.path:
773-
h_start, h_end = h.core_selection_range
774-
best_hunk_size = max(best_hunk_size, h_end - h_start + 1)
775-
if best_hunk_size > 0:
776-
frag_hunk_lines[fid] = best_hunk_size
823+
if fid in frag_hunk_lines:
824+
continue
825+
best = _best_hunk_size_for_path(hunks, fid.path)
826+
if best > 0:
827+
frag_hunk_lines[fid] = best
828+
829+
830+
def _compute_seed_weights(
831+
hunks: list[DiffHunk],
832+
core_ids: set[FragmentId],
833+
all_fragments: list[Fragment],
834+
) -> dict[FragmentId, float]:
835+
frag_hunk_lines = _map_hunks_to_fragments(hunks, core_ids, all_fragments)
836+
if not frag_hunk_lines:
837+
return {}
838+
839+
_add_container_weights(frag_hunk_lines, core_ids, all_fragments)
840+
_fill_missing_core_weights(frag_hunk_lines, core_ids, hunks)
777841

778842
return frag_hunk_lines
779843

src/treemapper/diffctx/edges/semantic/javascript.py

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,23 @@
2626
re.MULTILINE,
2727
)
2828

29+
30+
def _add_name_if_valid(name: str, target: set[str]) -> None:
31+
if name and len(name) >= 2:
32+
target.add(name.lower())
33+
34+
35+
def _extract_exports_from_content(content: str, exported: set[str]) -> None:
36+
for m in _EXPORT_DECL_RE.finditer(content):
37+
_add_name_if_valid(m.group(1), exported)
38+
for m in _EXPORT_DEFAULT_NAME_RE.finditer(content):
39+
_add_name_if_valid(m.group(1), exported)
40+
for m in _EXPORT_LIST_RE.finditer(content):
41+
for part in m.group(1).split(","):
42+
part = part.strip().split(" as ")[0].strip()
43+
_add_name_if_valid(part, exported)
44+
45+
2946
_JS_WEIGHTS = LANG_WEIGHTS["javascript"]
3047
_TS_WEIGHTS = LANG_WEIGHTS["typescript"]
3148

@@ -253,19 +270,7 @@ def _collect_exported_names(self, js_changed: list[Path]) -> set[str]:
253270
content = f.read_text(encoding="utf-8")
254271
except (OSError, UnicodeDecodeError):
255272
continue
256-
for m in _EXPORT_DECL_RE.finditer(content):
257-
name = m.group(1)
258-
if name and len(name) >= 2:
259-
exported.add(name.lower())
260-
for m in _EXPORT_DEFAULT_NAME_RE.finditer(content):
261-
name = m.group(1)
262-
if name and len(name) >= 2:
263-
exported.add(name.lower())
264-
for m in _EXPORT_LIST_RE.finditer(content):
265-
for part in m.group(1).split(","):
266-
part = part.strip().split(" as ")[0].strip()
267-
if part and len(part) >= 2:
268-
exported.add(part.lower())
273+
_extract_exports_from_content(content, exported)
269274
return exported
270275

271276
def _find_files_importing_names(

0 commit comments

Comments
 (0)