Skip to content

Commit 96f5ff4

Browse files
committed
fix(ci): seal 11th fix-forward — Path import + polish-loop wave-1 bundle
CI on 8547599 failed: ruff F821 "Undefined name Path" at tests/test_sarif_tag_coverage.py:192 — W588 fragile-path migration in the 10th fix-forward dropped `from pathlib import Path` but `Path` remained as a type annotation on `_collect_emitter_funcs(source_path: Path, ...)`. Re-add the import; ruff src/ tests/ now clean. Bundle in the polish-loop wave-1 accumulated work for CI validation: Pattern-2 detector sweep (W805 family — 5 real bugs sealed): - cmd_n1: empty_corpus / no_models disclosure on 0-ORM-models path - cmd_over_fetch: empty_corpus / no_php_models via detector_state field (legacy `state` enum preserved for back-compat) - cmd_dark_matter: text branch + real git_cochange query in JSON branch (was inferring no-cochange from empty pairs) - cmd_duplicates: empty_corpus / no_candidates / insufficient_candidates on len(candidates) < 2 short-circuit - cmd_laws (mine): empty_corpus / no_laws_passed_thresholds (was hardcoded partial_success: False) Bare-except drift-guard (W666): - critique/ added to _GUARDED_DIRS; zero new violations - runs/ already covered (W746) W931/W932/W937: - mypy>=1.10 added to [typecheck] extras - detectors._finding: evidence param narrowed to Mapping[str,Any]|None - 28 cp1253-mojibake em-dashes scrubbed across 5 src files - test_w937_no_mojibake_em_dashes.py drift-guard added
1 parent 8547599 commit 96f5ff4

16 files changed

Lines changed: 357 additions & 59 deletions

pyproject.toml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,15 @@ learned = [
8787
# Optional — current `--rerank fast` blend stays the default.
8888
"lightgbm>=4.0",
8989
]
90+
typecheck = [
91+
# W931: mypy convenience extras. Discovered during W919 / W925 type-annotation
92+
# validation that there was no first-class way to install a typechecker into
93+
# the `.venv` (`pip install -e .[typecheck]`). mypy is intentionally NOT a
94+
# `dev` dependency because the test suite does not require it; treat it as
95+
# an opt-in tool for type-stamping work. Pin to a recent stable; tightening
96+
# the lower bound is fine when annotations need a newer mypy feature.
97+
"mypy>=1.10",
98+
]
9099
dev = [
91100
"pytest>=7.0",
92101
"pytest-xdist>=3.0",

src/roam/catalog/detectors.py

Lines changed: 33 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ def _finding(
217217
reason: str,
218218
confidence: str = "medium",
219219
*,
220-
evidence: Mapping[str, Any] | Any | None = None,
220+
evidence: Mapping[str, Any] | None = None,
221221
fix: str | Mapping[str, Any] | None = None,
222222
match_line: int | None = None,
223223
snippet: str | None = None,
@@ -253,6 +253,16 @@ def _finding(
253253
that contributed to the verdict (e.g. ``["nested-loop", "sort+slice"]``
254254
for sort-to-select). Surfaces in evidence so users can see WHY a
255255
finding fired without grepping the detector source.
256+
257+
W932 (W925 follow-up): ``evidence`` is annotated as
258+
``Mapping[str, Any] | None``. Audited all 4 ``evidence=`` keyword
259+
call-sites in this module — every one passes a dict literal; there
260+
are no positional or non-dict callers (the ``*`` makes ``evidence``
261+
keyword-only). The runtime defensive branch below
262+
(``not isinstance(evidence, dict)``) is kept as belt-and-braces for
263+
forward compatibility with future plugin detectors that may pass
264+
non-Mapping shapes, but the static type stays narrow so misuse
265+
surfaces at mypy time rather than at runtime as a silent rewrap.
256266
"""
257267
bw = best_way(task_id)
258268
sym_line = sym["line_start"]
@@ -810,12 +820,12 @@ def detect_linear_search(conn: sqlite3.Connection) -> list[dict]:
810820
query_cost=QUERY_COST_MEDIUM,
811821
)
812822
def detect_list_membership(conn: sqlite3.Connection) -> list[dict]:
813-
"""Nested loops with equality comparisons β€” structural pattern for
823+
"""Nested loops with equality comparisons structural pattern for
814824
O(n^2) membership testing regardless of function name.
815825
816826
Note on the LIKE patterns: ``_`` is a single-char wildcard in SQL
817827
LIKE, so ``LIKE '%in_%'`` matches *any* identifier with "in" followed
818-
by another char (``find_x``, ``intent``, ``something_else`` β€” all
828+
by another char (``find_x``, ``intent``, ``something_else`` all
819829
spurious hits). We use ``ESCAPE '\\'`` and double-write the literal
820830
``\\_`` so we only match the intended idiomatic prefixes
821831
(``has_x``, ``is_in_y``, ``contains_z``).
@@ -970,7 +980,7 @@ def detect_manual_dedup(conn: sqlite3.Connection) -> list[dict]:
970980
def detect_manual_maxmin(conn: sqlite3.Connection) -> list[dict]:
971981
"""Loops with comparisons in max/min-named functions.
972982
973-
Same Big-O (both O(n)) β€” this is an idiom improvement, flagged at low
983+
Same Big-O (both O(n)) this is an idiom improvement, flagged at low
974984
confidence.
975985
"""
976986
rows = conn.execute(
@@ -1019,7 +1029,7 @@ def detect_manual_maxmin(conn: sqlite3.Connection) -> list[dict]:
10191029
def detect_manual_accumulation(conn: sqlite3.Connection) -> list[dict]:
10201030
"""Loops with accumulator in sum/total-named functions.
10211031
1022-
Same Big-O (both O(n)) β€” idiom improvement, flagged at low confidence.
1032+
Same Big-O (both O(n)) idiom improvement, flagged at low confidence.
10231033
"""
10241034
rows = conn.execute(
10251035
"SELECT s.id, s.name, s.qualified_name, s.kind, f.path as file_path, "
@@ -1294,7 +1304,7 @@ def detect_matrix_mult(conn: sqlite3.Connection) -> list[dict]:
12941304
def detect_naive_fibonacci(conn: sqlite3.Connection) -> list[dict]:
12951305
"""Recursive functions named *fib* without memoization.
12961306
1297-
O(2^n) -> O(n) β€” one of the strongest algorithmic improvements.
1307+
O(2^n) -> O(n) one of the strongest algorithmic improvements.
12981308
"""
12991309
rows = conn.execute(
13001310
"SELECT s.id, s.name, s.qualified_name, s.kind, f.path as file_path, "
@@ -1478,7 +1488,7 @@ def detect_nested_lookup(conn: sqlite3.Connection) -> list[dict]:
14781488
def detect_manual_groupby(conn: sqlite3.Connection) -> list[dict]:
14791489
"""Loops in group/categorize-named functions without defaultdict/groupby.
14801490
1481-
Same Big-O (both O(n)) β€” idiom improvement.
1491+
Same Big-O (both O(n)) idiom improvement.
14821492
"""
14831493
rows = conn.execute(
14841494
"SELECT s.id, s.name, s.qualified_name, s.kind, f.path as file_path, "
@@ -1523,10 +1533,10 @@ def detect_manual_groupby(conn: sqlite3.Connection) -> list[dict]:
15231533
query_cost=QUERY_COST_LOW,
15241534
)
15251535
def detect_busy_wait(conn: sqlite3.Connection) -> list[dict]:
1526-
"""Loops that call sleep β€” polling / busy-wait pattern.
1536+
"""Loops that call sleep polling / busy-wait pattern.
15271537
15281538
Suppresses intentional polling: functions named *poll*, *retry*,
1529-
*health_check*, *monitor*, *wait_for* β€” these are legitimate patterns.
1539+
*health_check*, *monitor*, *wait_for* these are legitimate patterns.
15301540
"""
15311541
rows = conn.execute(
15321542
"SELECT s.id, s.name, s.qualified_name, s.kind, f.path as file_path, "
@@ -1538,7 +1548,7 @@ def detect_busy_wait(conn: sqlite3.Connection) -> list[dict]:
15381548
"AND ms.loop_depth >= 1"
15391549
).fetchall()
15401550

1541-
# Intentional polling patterns β€” suppress these
1551+
# Intentional polling patterns suppress these
15421552
_POLL_NAMES = {
15431553
"poll",
15441554
"retry",
@@ -1632,7 +1642,7 @@ def _max_sleep_arg_seconds(snippet: str) -> float | None:
16321642
query_cost=QUERY_COST_LOW,
16331643
)
16341644
def detect_regex_in_loop(conn: sqlite3.Connection) -> list[dict]:
1635-
"""Regex compilation inside a loop β€” recompiles on every iteration.
1645+
"""Regex compilation inside a loop recompiles on every iteration.
16361646
16371647
O(n*p) wasted compilation when O(p + n*m) is achievable by compiling
16381648
once outside the loop. Applies to all languages with regex engines.
@@ -1666,7 +1676,7 @@ def detect_regex_in_loop(conn: sqlite3.Connection) -> list[dict]:
16661676
"MatchString",
16671677
}
16681678

1669-
# Module-level regex prefixes β€” calls like `re.match()` recompile each time,
1679+
# Module-level regex prefixes calls like `re.match()` recompile each time,
16701680
# but `compiled_pattern.match()` does not. Only flag the former.
16711681
_REGEX_MODULE_PREFIXES = {"re.", "regexp.", "regex.", "Pattern."}
16721682

@@ -1677,7 +1687,7 @@ def detect_regex_in_loop(conn: sqlite3.Connection) -> list[dict]:
16771687
calls = _iter_loop_calls(r)
16781688
# Direct compile in loop is always bad
16791689
compile_calls = _call_in(calls, _REGEX_COMPILE_CALLS)
1680-
# Convenience regex calls β€” only flag when called via the regex module
1690+
# Convenience regex calls only flag when called via the regex module
16811691
# (e.g. `re.findall`), NOT when called on a pre-compiled pattern object
16821692
# (e.g. `_MY_RE.findall`). Check qualified names for module prefix.
16831693
qcalls = _json_list(_row_value(r, "calls_in_loops_qualified", ""))
@@ -2492,7 +2502,7 @@ def _io_emit_finding(
24922502
query_cost=QUERY_COST_HIGH,
24932503
)
24942504
def detect_io_in_loop(conn: sqlite3.Connection) -> list[dict]:
2495-
"""Database query, HTTP request, or file I/O inside a loop β€” N+1 pattern.
2505+
"""Database query, HTTP request, or file I/O inside a loop N+1 pattern.
24962506
24972507
One of the most impactful performance anti-patterns in web applications.
24982508
Each iteration incurs a full I/O round trip.
@@ -2612,7 +2622,7 @@ def detect_io_in_loop(conn: sqlite3.Connection) -> list[dict]:
26122622
query_cost=QUERY_COST_MEDIUM,
26132623
)
26142624
def detect_list_prepend(conn: sqlite3.Connection) -> list[dict]:
2615-
"""insert(0, x), unshift(), or pop(0) inside a loop β€” O(n) per op
2625+
"""insert(0, x), unshift(), or pop(0) inside a loop O(n) per op
26162626
due to array shifting, O(n^2) total."""
26172627
try:
26182628
rows = conn.execute(
@@ -2637,7 +2647,7 @@ def detect_list_prepend(conn: sqlite3.Connection) -> list[dict]:
26372647
"AND ms.loop_depth >= 1"
26382648
).fetchall()
26392649

2640-
# deque operations are O(1) β€” suppress when popleft/appendleft are the ops
2650+
# deque operations are O(1) suppress when popleft/appendleft are the ops
26412651
_DEQUE_OPS = {"popleft", "appendleft", "extendleft"}
26422652

26432653
results = []
@@ -2648,7 +2658,7 @@ def detect_list_prepend(conn: sqlite3.Connection) -> list[dict]:
26482658
# If the only front-ops are deque methods, this is already optimal
26492659
front_calls = _call_in(calls, {"insert", "unshift", "shift", "popleft", "appendleft", "extendleft"})
26502660
if front_calls and all(_call_leaf(c) in _DEQUE_OPS for c in front_calls):
2651-
continue # Already using deque β€” no issue
2661+
continue # Already using deque no issue
26522662
# New indexes precompute the exact front-op signal.
26532663
if _row_value(r, "front_ops_in_loop", None) == 1:
26542664
results.append(
@@ -2662,7 +2672,7 @@ def detect_list_prepend(conn: sqlite3.Connection) -> list[dict]:
26622672
)
26632673
continue
26642674
# Fallback heuristic (conservative): only list front APIs.
2665-
# Note: appendleft/popleft are deque O(1) ops β€” do NOT flag those.
2675+
# Note: appendleft/popleft are deque O(1) ops do NOT flag those.
26662676
if _call_in(calls, {"insert", "unshift", "shift"}):
26672677
results.append(
26682678
_finding(
@@ -3693,7 +3703,7 @@ def detect_branching_recursion(conn: sqlite3.Connection) -> list[dict]:
36933703
Generalizes fibonacci to any branching recursion: tree traversals,
36943704
divide-and-conquer, DP problems. O(2^n) -> O(n) with memoization.
36953705
"""
3696-
# self_call_count column may not exist in older DBs β€” fall back safely
3706+
# self_call_count column may not exist in older DBs fall back safely
36973707
try:
36983708
rows = conn.execute(
36993709
"SELECT s.id, s.name, s.qualified_name, s.kind, f.path as file_path, "
@@ -3780,11 +3790,11 @@ def _has_memo_collection(snippet: str) -> bool:
37803790
for r in rows:
37813791
if _is_test_path(r["file_path"]):
37823792
continue
3783-
# Skip fibonacci β€” already covered by detect_naive_fibonacci
3793+
# Skip fibonacci already covered by detect_naive_fibonacci
37843794
name_lower = (r["name"] or "").lower()
37853795
if "fib" in name_lower:
37863796
continue
3787-
# Skip tree/AST walkers β€” recursive traversal of children is
3797+
# Skip tree/AST walkers recursive traversal of children is
37883798
# intentional and doesn't have overlapping subproblems
37893799
_WALKER_NAMES = {
37903800
"walk",
@@ -3857,7 +3867,7 @@ def _has_memo_collection(snippet: str) -> bool:
38573867
query_cost=QUERY_COST_LOW,
38583868
)
38593869
def detect_quadratic_string(conn: sqlite3.Connection) -> list[dict]:
3860-
"""String concatenation via += inside a loop β€” O(n^2) due to
3870+
"""String concatenation via += inside a loop O(n^2) due to
38613871
immutable string reallocation in Python/Java/Go.
38623872
"""
38633873
try:
@@ -4650,7 +4660,7 @@ def _iter_registered_detectors():
46504660
for det in _MATH_DETECTORS:
46514661
yield det
46524662

4653-
# Python pivot v12.4 β€” language-specific idiom detectors. Wrapped
4663+
# Python pivot v12.4 language-specific idiom detectors. Wrapped
46544664
# in try/except so a regex bug in one detector can't block the
46554665
# algorithm pass.
46564666
try:

src/roam/commands/cmd_dark_matter.py

Lines changed: 51 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -297,22 +297,42 @@ def dark_matter(ctx, limit, min_npmi, min_cochanges, explain, category, persist)
297297
# W805-followup-D: empty-state disclosure (Pattern 2 silent-
298298
# fallback fix). Zero dark-matter pairs can mean two things:
299299
# (a) the co-change graph was analyzed cleanly and produced
300-
# no hidden couplings (real success), OR (b) the corpus had
301-
# no co-change history to analyze (degraded — needs index
302-
# populated). Distinguish via partial_success + state.
300+
# no hidden couplings (real success — no `state` stamp), OR
301+
# (b) the corpus had no co-change history to analyze
302+
# (degraded — `state=no_cochange` + partial_success=True).
303+
# Distinguish by actually querying git_cochange instead of
304+
# inferring from the empty pairs result; the previous code
305+
# always claimed (b) on zero pairs which is itself a silent
306+
# Pattern-2 — a clean populated graph would be labelled
307+
# "no co-change history" incorrectly.
308+
cochange_count: int
309+
if total == 0:
310+
try:
311+
cochange_count = conn.execute(
312+
"SELECT COUNT(*) FROM git_cochange"
313+
).fetchone()[0]
314+
except sqlite3.OperationalError:
315+
cochange_count = 0
316+
else:
317+
cochange_count = -1 # unused when total > 0
318+
if total > 0:
319+
verdict_str = f"{total} dark-matter coupling{'s' if total != 1 else ''} found"
320+
elif cochange_count == 0:
321+
verdict_str = (
322+
"no co-change history to analyze (corpus has 0 cochange records — "
323+
"run `roam index --force` to populate)"
324+
)
325+
else:
326+
verdict_str = "0 dark-matter couplings found"
303327
_summary: dict = {
304-
"verdict": (
305-
f"{total} dark-matter coupling{'s' if total != 1 else ''} found"
306-
if total > 0
307-
else "no co-change history to analyze (corpus has 0 cochange records — run `roam index --force` to populate)"
308-
),
328+
"verdict": verdict_str,
309329
"total_dark_matter_edges": total,
310330
"by_category": dict(by_cat),
311331
}
312-
if total == 0:
332+
if total == 0 and cochange_count == 0:
313333
_summary["partial_success"] = True
314334
_summary["state"] = "no_cochange"
315-
elif parts:
335+
elif total > 0 and parts:
316336
_summary["verdict"] += f" ({', '.join(parts)})"
317337

318338
click.echo(
@@ -341,7 +361,27 @@ def dark_matter(ctx, limit, min_npmi, min_cochanges, explain, category, persist)
341361
total = len(pairs)
342362

343363
if not pairs:
344-
click.echo("VERDICT: 0 dark-matter couplings found")
364+
# W805 (Pattern 2 propagation to text branch): the JSON branch
365+
# above already distinguishes "0 pairs from a populated
366+
# co-change graph" from "no co-change history to analyze".
367+
# Mirror that disclosure on the text branch so agents reading
368+
# the verdict line alone get the same lineage signal.
369+
try:
370+
cochange_count = conn.execute(
371+
"SELECT COUNT(*) FROM git_cochange"
372+
).fetchone()[0]
373+
except sqlite3.OperationalError:
374+
# git_cochange table missing (older schema) — treat as
375+
# no-cochange state per the loud-fallback rule.
376+
cochange_count = 0
377+
if cochange_count == 0:
378+
click.echo(
379+
"VERDICT: no co-change history to analyze "
380+
"(corpus has 0 cochange records — "
381+
"run `roam index --force` to populate)"
382+
)
383+
else:
384+
click.echo("VERDICT: 0 dark-matter couplings found")
345385
return
346386

347387
# Build verdict with category breakdown if hypotheses available

src/roam/commands/cmd_duplicates.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -605,7 +605,37 @@ def duplicates(
605605
).fetchall()
606606

607607
if len(candidates) < 2:
608-
verdict = "No duplicate candidates found"
608+
# W805 (Pattern 2: silent fallbacks) — fewer than 2 candidate
609+
# functions means the duplicate-detection algorithm CANNOT
610+
# produce findings (it needs pairs to compare). The previous
611+
# verdict "No duplicate candidates found" was a silent SAFE
612+
# indistinguishable from "scan ran cleanly across 1000 funcs
613+
# and found no clusters". Disclose the absent input state so
614+
# agents see that the detector ran in a degraded mode.
615+
symbol_count = conn.execute(
616+
"SELECT COUNT(*) FROM symbols WHERE kind IN ('function', 'method')"
617+
).fetchone()[0]
618+
if symbol_count == 0:
619+
w805_state = "empty_corpus"
620+
verdict = (
621+
"no symbols to analyze (corpus has 0 functions/methods; "
622+
"run `roam index --force` to populate the graph "
623+
"before duplicate detection)"
624+
)
625+
elif len(candidates) == 0:
626+
w805_state = "no_candidates"
627+
verdict = (
628+
f"no candidates above min-lines threshold ({min_lines}; "
629+
f"all {symbol_count} functions are smaller — "
630+
f"detector had no input to analyze)"
631+
)
632+
else:
633+
# Exactly 1 candidate — the algorithm needs pairs.
634+
w805_state = "insufficient_candidates"
635+
verdict = (
636+
"only 1 candidate function above min-lines threshold "
637+
"(duplicate detection requires at least 2 to form a pair)"
638+
)
609639
if json_mode:
610640
click.echo(
611641
to_json(
@@ -616,6 +646,9 @@ def duplicates(
616646
"total_clusters": 0,
617647
"total_functions": 0,
618648
"estimated_reducible_lines": 0,
649+
"state": w805_state,
650+
"partial_success": True,
651+
"candidates_scanned": len(candidates),
619652
},
620653
clusters=[],
621654
)

0 commit comments

Comments
 (0)