Skip to content

Commit 577fa99

Browse files
committed
fix(qa): SonarCloud cleanup
- whitelist_vulture: bare names -> _ = expr (fixes 16 S905 BUG false-positives) - contextbench_diffctx: drop unused 'ok' params from breakdown helpers - common.py run_parallel: extract _run_pool/_run_serial/_collect_result (S3776 17->under 15) - forensic_contextbench evaluate_one: extract header/dump helpers (S3776 19->under 15) - graph.py: use np.divide(out=) instead of /= so SonarCloud sees mutation
1 parent 461d9a5 commit 577fa99

5 files changed

Lines changed: 110 additions & 106 deletions

File tree

benchmarks/common.py

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -298,29 +298,35 @@ def worker_dir(base: Path) -> Path:
298298
return d
299299

300300

301-
def run_parallel(worker_fn, run_args: list, workers: int, collect: str = "append") -> list:
301+
def _collect_result(results: list, r, collect: str) -> None:
302+
if collect == "extend":
303+
results.extend(r)
304+
elif r:
305+
results.append(r)
306+
307+
308+
def _run_serial(worker_fn, run_args: list, collect: str) -> list:
302309
results: list = []
303-
if workers > 1:
304-
from concurrent.futures import ProcessPoolExecutor, as_completed
305-
306-
with ProcessPoolExecutor(max_workers=workers, initializer=_init_worker) as pool:
307-
futures = {pool.submit(worker_fn, a): a[0] for a in run_args}
308-
for future in as_completed(futures):
309-
try:
310-
r = future.result()
311-
except Exception as e:
312-
idx = futures[future]
313-
print(f" WORKER CRASH [{idx}]: {type(e).__name__}: {e}", flush=True)
314-
continue
315-
if collect == "extend":
316-
results.extend(r)
317-
elif r:
318-
results.append(r)
319-
else:
320-
for a in run_args:
321-
r = worker_fn(a)
322-
if collect == "extend":
323-
results.extend(r)
324-
elif r:
325-
results.append(r)
310+
for a in run_args:
311+
_collect_result(results, worker_fn(a), collect)
326312
return results
313+
314+
315+
def _run_pool(worker_fn, run_args: list, workers: int, collect: str) -> list:
316+
from concurrent.futures import ProcessPoolExecutor, as_completed
317+
318+
results: list = []
319+
with ProcessPoolExecutor(max_workers=workers, initializer=_init_worker) as pool:
320+
futures = {pool.submit(worker_fn, a): a[0] for a in run_args}
321+
for future in as_completed(futures):
322+
try:
323+
_collect_result(results, future.result(), collect)
324+
except Exception as e:
325+
print(f" WORKER CRASH [{futures[future]}]: {type(e).__name__}: {e}", flush=True)
326+
return results
327+
328+
329+
def run_parallel(worker_fn, run_args: list, workers: int, collect: str = "append") -> list:
330+
if workers > 1:
331+
return _run_pool(worker_fn, run_args, workers, collect)
332+
return _run_serial(worker_fn, run_args, collect)

benchmarks/contextbench_diffctx.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,7 @@ def evaluate_instance(
338338
return result
339339

340340

341-
def _print_per_language_breakdown(ok: list[dict], by_lang: dict) -> None:
341+
def _print_per_language_breakdown(by_lang: dict) -> None:
342342
if len(by_lang) <= 1:
343343
return
344344
print("\nPer-language breakdown:")
@@ -350,7 +350,7 @@ def _print_per_language_breakdown(ok: list[dict], by_lang: dict) -> None:
350350
print(f" {lang:12s} (n={len(lr):3d}): file_recall={avg_fr:.3f} nontrivial={avg_ntr:.3f} line_recall={avg_lr:.3f}")
351351

352352

353-
def _print_per_repo_breakdown(ok: list[dict], by_repo: dict) -> None:
353+
def _print_per_repo_breakdown(by_repo: dict) -> None:
354354
if len(by_repo) <= 1:
355355
return
356356
print("\nPer-repo breakdown:")
@@ -405,12 +405,12 @@ def aggregate(results: list[dict]) -> None:
405405
by_lang: dict[str, list[dict]] = defaultdict(list)
406406
for r in ok:
407407
by_lang[r["language"]].append(r)
408-
_print_per_language_breakdown(ok, by_lang)
408+
_print_per_language_breakdown(by_lang)
409409

410410
by_repo: dict[str, list[dict]] = defaultdict(list)
411411
for r in ok:
412412
by_repo[r["repo"]].append(r)
413-
_print_per_repo_breakdown(ok, by_repo)
413+
_print_per_repo_breakdown(by_repo)
414414

415415
zero_frag = sum(1 for r in ok if r["fragments"] == 0)
416416
zero_line = sum(1 for r in ok if r["line_recall"] < 1e-9 and r["fragments"] > 0)

benchmarks/forensic_contextbench.py

Lines changed: 45 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -235,82 +235,79 @@ def _classify_nontrivial_stages(
235235
return stage_per_file
236236

237237

238-
def evaluate_one(inst: dict, budget: int) -> dict:
239-
iid = inst["instance_id"]
238+
def _print_instance_header(
239+
inst: dict, gold_blocks: list, gold_set: set, p_set: set, added: set, deleted: set, modified: set, nontrivial: set
240+
) -> None:
240241
print("\n" + "=" * 78)
241-
print(f"INSTANCE: {iid}")
242+
print(f"INSTANCE: {inst['instance_id']}")
242243
print(f"Repo: {inst['repo']} Lang: {inst['language']}")
243244
print(f"Base: {inst['base_commit'][:12]}")
244-
245-
gold_blocks = json.loads(inst["gold_context"]) if isinstance(inst["gold_context"], str) else inst["gold_context"]
246-
for g in gold_blocks:
247-
g["file"] = normalize_gold_path(g["file"])
248-
gold_set = {g["file"] for g in gold_blocks}
249-
added, deleted, modified = patch_files_detailed(inst["patch"])
250-
p_set = added | deleted | modified
251-
nontrivial = gold_set - p_set
252-
253245
print(f"\n[GOLD] {len(gold_set):3d} files, {len(gold_blocks):3d} blocks")
254246
for f in sorted(gold_set)[:8]:
255247
marker = " (in patch)" if f in p_set else " (NONTRIVIAL)"
256248
print(f" {f}{marker}")
257249
if len(gold_set) > 8:
258250
print(f" ... and {len(gold_set) - 8} more")
259-
260251
print(f"\n[PATCH] {len(p_set):3d} files (added={len(added)}, deleted={len(deleted)}, modified={len(modified)})")
261252
if deleted:
262253
print(f" DELETED FILES: {sorted(deleted)}")
263-
264254
print(f"\n[NONTRIVIAL GOLD] {len(nontrivial):3d} files")
265255

266-
repo_dir = ensure_repo(inst["repo_url"], inst["repo"], inst["base_commit"], REPOS_DIR)
267-
if not repo_dir:
268-
return {"id": iid, "status": "clone_fail"}
269-
270-
if not apply_as_commit(repo_dir, inst["patch"]):
271-
run_cmd(["git", "-C", str(repo_dir), "checkout", "--force", inst["base_commit"]], check=False)
272-
return {"id": iid, "status": "apply_fail"}
273256

274-
t0 = time.time()
275-
output, err = run_diffctx(repo_dir, budget)
276-
elapsed = time.time() - t0
277-
278-
if not output:
279-
run_cmd(["git", "-C", str(repo_dir), "reset", "--hard", "HEAD~1"], check=False)
280-
print(f" DIFFCTX FAIL: {err}")
281-
return {"id": iid, "status": "diffctx_fail"}
282-
283-
selected = {f["path"] for f in output["fragments"]}
257+
def _print_pipeline_dump(
258+
output: dict, elapsed: float, p_set: set, deleted: set, repo_dir: Path, nontrivial: set, gold_set: set, selected: set
259+
) -> tuple[set, set, set]:
284260
print(f"\n[DIFFCTX] {len(selected):3d} files, {output['fragment_count']:3d} fragments, {elapsed:.1f}s")
285-
286261
_print_patch_coverage(p_set, selected, deleted, repo_dir)
287-
288-
nontrivial_hits = nontrivial & selected
289-
nontrivial_missed = nontrivial - selected
290262
universe = read_dump_set("universe.txt")
291263
fragmented = read_dump_set("fragmented.txt")
292264
sel_dump = read_dump_set("selected.txt")
293265
candidates_info = (DUMP_DIR / "candidates.txt").read_text().strip() if (DUMP_DIR / "candidates.txt").exists() else ""
294-
295266
print(f"\n[PIPELINE STAGES]\n {candidates_info}")
296267
print(f" universe: {len(universe)} fragmented: {len(fragmented)} selected: {len(sel_dump)}")
297-
298-
_print_nontrivial_report(nontrivial, nontrivial_hits, nontrivial_missed, fragmented, universe, sel_dump)
299-
268+
nontrivial_hits = nontrivial & selected
269+
_print_nontrivial_report(nontrivial, nontrivial_hits, nontrivial - selected, fragmented, universe, sel_dump)
300270
extra = selected - gold_set
301271
if extra:
302272
print(f"\n[DIFFCTX EXTRA] {len(extra)} files not in gold")
303273
for f in sorted(extra)[:5]:
304274
print(f" {f} ({'patch' if f in p_set else 'discovered'})")
275+
return universe, fragmented, sel_dump
305276

306-
run_cmd(["git", "-C", str(repo_dir), "reset", "--hard", "HEAD~1"], check=False)
307277

308-
file_recall = len(gold_set & selected) / len(gold_set)
309-
nt_recall = len(nontrivial_hits) / len(nontrivial) if nontrivial else 0.0
310-
patch_coverage = len(p_set & selected) / len(p_set) if p_set else 0.0
278+
def evaluate_one(inst: dict, budget: int) -> dict:
279+
iid = inst["instance_id"]
280+
gold_blocks = json.loads(inst["gold_context"]) if isinstance(inst["gold_context"], str) else inst["gold_context"]
281+
for g in gold_blocks:
282+
g["file"] = normalize_gold_path(g["file"])
283+
gold_set = {g["file"] for g in gold_blocks}
284+
added, deleted, modified = patch_files_detailed(inst["patch"])
285+
p_set = added | deleted | modified
286+
nontrivial = gold_set - p_set
287+
_print_instance_header(inst, gold_blocks, gold_set, p_set, added, deleted, modified, nontrivial)
311288

312-
stage_per_file = _classify_nontrivial_stages(nontrivial, selected, sel_dump, fragmented, universe)
289+
repo_dir = ensure_repo(inst["repo_url"], inst["repo"], inst["base_commit"], REPOS_DIR)
290+
if not repo_dir:
291+
return {"id": iid, "status": "clone_fail"}
292+
if not apply_as_commit(repo_dir, inst["patch"]):
293+
run_cmd(["git", "-C", str(repo_dir), "checkout", "--force", inst["base_commit"]], check=False)
294+
return {"id": iid, "status": "apply_fail"}
295+
296+
t0 = time.time()
297+
output, err = run_diffctx(repo_dir, budget)
298+
elapsed = time.time() - t0
299+
if not output:
300+
run_cmd(["git", "-C", str(repo_dir), "reset", "--hard", "HEAD~1"], check=False)
301+
print(f" DIFFCTX FAIL: {err}")
302+
return {"id": iid, "status": "diffctx_fail"}
303+
304+
selected = {f["path"] for f in output["fragments"]}
305+
universe, fragmented, sel_dump = _print_pipeline_dump(
306+
output, elapsed, p_set, deleted, repo_dir, nontrivial, gold_set, selected
307+
)
308+
run_cmd(["git", "-C", str(repo_dir), "reset", "--hard", "HEAD~1"], check=False)
313309

310+
nontrivial_hits = nontrivial & selected
314311
return {
315312
"id": iid,
316313
"status": "ok",
@@ -319,10 +316,10 @@ def evaluate_one(inst: dict, budget: int) -> dict:
319316
"n_patch": len(p_set),
320317
"n_nontrivial": len(nontrivial),
321318
"n_deleted_in_patch": len(deleted),
322-
"patch_coverage": round(patch_coverage, 3),
323-
"file_recall": round(file_recall, 3),
324-
"nt_recall": round(nt_recall, 3),
325-
"stage_per_file": stage_per_file,
319+
"patch_coverage": round(len(p_set & selected) / len(p_set) if p_set else 0.0, 3),
320+
"file_recall": round(len(gold_set & selected) / len(gold_set), 3),
321+
"nt_recall": round(len(nontrivial_hits) / len(nontrivial) if nontrivial else 0.0, 3),
322+
"stage_per_file": _classify_nontrivial_stages(nontrivial, selected, sel_dump, fragmented, universe),
326323
}
327324

328325

src/treemapper/diffctx/graph.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,8 @@ def _suppress_semantic_hubs(
283283
sem_file_deg[si] = len(files)
284284
src_sem_deg = sem_file_deg[src_idx]
285285
sem_hub_mask = is_semantic & (src_sem_deg >= _HUB_OUT_DEGREE_THRESHOLD)
286-
weights /= np.where(sem_hub_mask, np.sqrt(src_sem_deg.astype(np.float64)), 1.0)
286+
divisor = np.where(sem_hub_mask, np.sqrt(src_sem_deg.astype(np.float64)), 1.0)
287+
np.divide(weights, divisor, out=weights)
287288

288289

289290
def _apply_hub_suppression(

whitelist_vulture.py

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -7,34 +7,34 @@
77
from treemapper.diffctx.tokenizer import detect_profile, is_nlp_available
88
from treemapper.mcp.server import get_diff_context, get_file_context, get_tree_map, run_server
99

10-
clipboard_available
11-
detect_profile
12-
is_nlp_available
13-
Graph.add_node
14-
Graph.to_csr
15-
Graph.ego_graph
16-
QuotientNode.fragment_count
10+
_ = clipboard_available
11+
_ = detect_profile
12+
_ = is_nlp_available
13+
_ = Graph.add_node
14+
_ = Graph.to_csr
15+
_ = Graph.ego_graph
16+
_ = QuotientNode.fragment_count
1717
_ = ProjectGraph.edges_of_type
1818
_ = ProjectGraph.subgraph
19-
get_diff_context
20-
get_tree_map
21-
get_file_context
22-
run_server
23-
DiffContextTimeoutError
24-
git.is_git_repo
25-
git.get_diff_text
26-
git.parse_diff
27-
git.split_diff_range
28-
git.get_commit_message
29-
universe._expand_universe_by_rare_identifiers
30-
universe._resolve_changed_files
31-
universe._discover_untracked_files
32-
universe._synthetic_hunks
33-
universe._enrich_concepts
34-
types.DiffHunk.core_selection_range
35-
types.extract_identifier_list
36-
file_importance.compute_file_importance
37-
fragmentation._create_whole_file_fragment
38-
graph_analytics.blast_radius
39-
CSRGraph.out_weight_sum
40-
CSRGraph.idx_to_node
19+
_ = get_diff_context
20+
_ = get_tree_map
21+
_ = get_file_context
22+
_ = run_server
23+
_ = DiffContextTimeoutError
24+
_ = git.is_git_repo
25+
_ = git.get_diff_text
26+
_ = git.parse_diff
27+
_ = git.split_diff_range
28+
_ = git.get_commit_message
29+
_ = universe._expand_universe_by_rare_identifiers
30+
_ = universe._resolve_changed_files
31+
_ = universe._discover_untracked_files
32+
_ = universe._synthetic_hunks
33+
_ = universe._enrich_concepts
34+
_ = types.DiffHunk.core_selection_range
35+
_ = types.extract_identifier_list
36+
_ = file_importance.compute_file_importance
37+
_ = fragmentation._create_whole_file_fragment
38+
_ = graph_analytics.blast_radius
39+
_ = CSRGraph.out_weight_sum
40+
_ = CSRGraph.idx_to_node

0 commit comments

Comments
 (0)