Skip to content

Commit aa26362

Browse files
author
stoa-automerge
committed
fix: restore 4 more detectors autopilot falsely removed as graph-dead (registry 30->34)
Second recurrence of the decorator-dispatch false-positive: the dead-code lens removed detect_async_fire_and_forget/manual_maxmin/spread_accumulator/string_concat_loop (all @algorithm_detector-registered). Restored verbatim from origin + their _RE_* constants. The producer's dead-code lens is now RETIRED (7f5d728) so this can't recur.
1 parent 1200e67 commit aa26362

1 file changed

Lines changed: 258 additions & 0 deletions

File tree

src/roam/catalog/detectors.py

Lines changed: 258 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -796,6 +796,29 @@ def _guard_hints_from_source(language: str | None, snippet: str) -> list[str]:
796796
# ---------------------------------------------------------------------------
797797

798798

799+
_RE_FIRE_AND_FORGET_TASK = re.compile(
800+
r"^\s*(?:asyncio\.)?create_task\s*\(",
801+
re.MULTILINE,
802+
)
803+
_RE_STORED_TASK = re.compile(
804+
r"^\s*(?:\w+\s*[+\-*/]?=\s*|\w+\s*\.\s*append\s*\(\s*|\w+\s*\.\s*add\s*\(\s*|return\s+|await\s+)"
805+
r"(?:asyncio\.)?create_task\s*\(",
806+
re.MULTILINE,
807+
)
808+
_RE_SPREAD_ACC = re.compile(
809+
r"\b(\w+)\s*=\s*\[\s*\.\.\.\s*\1\s*,", # name = [...name,
810+
)
811+
_RE_SPREAD_OBJ_ACC = re.compile(
812+
r"\b(\w+)\s*=\s*\{\s*\.\.\.\s*\1\s*[,}]", # name = {...name,
813+
)
814+
_RE_REDUCE_SPREAD = re.compile(
815+
r"\.\s*reduce\s*\(\s*\(\s*(\w+)[^)]*\)\s*=>\s*\[\s*\.\.\.\s*\1\s*,",
816+
)
817+
_RE_REDUCE_SPREAD_OBJ = re.compile(
818+
r"\.\s*reduce\s*\(\s*\(\s*(\w+)[^)]*\)\s*=>\s*\{\s*\.\.\.\s*\1\s*[,}]",
819+
)
820+
821+
799822
@algorithm_detector(
800823
task_id="sorting",
801824
languages=(),
@@ -3994,6 +4017,241 @@ def detect_defer_in_loop(conn: sqlite3.Connection) -> list[dict]:
39944017
return results
39954018

39964019

4020+
@algorithm_detector(
4021+
task_id="async-fire-and-forget-task",
4022+
languages=("python",),
4023+
confidence_basis="structural",
4024+
query_cost=QUERY_COST_MEDIUM,
4025+
)
4026+
def detect_async_fire_and_forget(conn: sqlite3.Connection) -> list[dict]:
4027+
"""``asyncio.create_task(...)`` whose return value is discarded.
4028+
4029+
Background tasks that aren't held in a long-lived reference get
4030+
garbage-collected before they finish. Python 3.11+ explicitly warns
4031+
about this footgun. The fix is to store the task somewhere that
4032+
survives until ``await`` time, or just ``await`` it directly.
4033+
4034+
Conservative: Python only, only fires when the line clearly creates
4035+
a task without storing it.
4036+
"""
4037+
try:
4038+
rows = conn.execute(
4039+
"SELECT s.id, s.name, s.qualified_name, s.kind, f.path AS file_path, "
4040+
"s.line_start, s.line_end "
4041+
"FROM symbols s "
4042+
"JOIN files f ON s.file_id = f.id "
4043+
"WHERE s.kind IN ('function', 'method') "
4044+
"AND f.language = 'python'"
4045+
).fetchall()
4046+
except sqlite3.Error:
4047+
return []
4048+
results = []
4049+
for r in rows:
4050+
if _is_test_path(r["file_path"]):
4051+
continue
4052+
snippet = _read_symbol_source(r["file_path"], r["line_start"], r["line_end"]) or ""
4053+
if "create_task" not in snippet:
4054+
continue
4055+
# Subtract stored-task lines from total create_task occurrences
4056+
total = len(_RE_FIRE_AND_FORGET_TASK.findall(snippet))
4057+
stored = len(_RE_STORED_TASK.findall(snippet))
4058+
leaked = total - stored
4059+
if leaked <= 0:
4060+
continue
4061+
results.append(
4062+
_finding(
4063+
"async-fire-and-forget-task",
4064+
"leaked-asyncio-task",
4065+
r,
4066+
f"{leaked} asyncio.create_task call(s) whose return value isn't stored — gc may discard the task before it completes",
4067+
"high",
4068+
snippet=snippet,
4069+
matched_patterns=[
4070+
f"create_task occurrences: {total}",
4071+
f"stored: {stored}, leaked: {leaked}",
4072+
],
4073+
)
4074+
)
4075+
results[-1]["fix"] = (
4076+
"Store the task: `tasks.append(asyncio.create_task(coro()))` and await it later, or `await asyncio.create_task(coro())` directly."
4077+
)
4078+
return results
4079+
4080+
4081+
@algorithm_detector(
4082+
task_id="max-min",
4083+
languages=(),
4084+
confidence_basis="heuristic",
4085+
query_cost=QUERY_COST_LOW,
4086+
)
4087+
def detect_manual_maxmin(conn: sqlite3.Connection) -> list[dict]:
4088+
"""Loops with comparisons in max/min-named functions.
4089+
4090+
Same Big-O (both O(n)) — this is an idiom improvement, flagged at low
4091+
confidence.
4092+
"""
4093+
rows = conn.execute(
4094+
"SELECT s.id, s.name, s.qualified_name, s.kind, f.path as file_path, "
4095+
"s.line_start, ms.loop_depth, ms.loop_with_compare, "
4096+
"ms.loop_with_accumulator, ms.calls_in_loops "
4097+
"FROM symbols s "
4098+
"JOIN files f ON s.file_id = f.id "
4099+
"JOIN math_signals ms ON ms.symbol_id = s.id "
4100+
"WHERE (s.name LIKE '%find\\_max%' ESCAPE '\\' OR s.name LIKE '%find\\_min%' ESCAPE '\\' "
4101+
" OR s.name LIKE '%findMax%' OR s.name LIKE '%findMin%' "
4102+
" OR s.name LIKE '%get\\_max%' ESCAPE '\\' OR s.name LIKE '%get\\_min%' ESCAPE '\\' "
4103+
" OR s.name LIKE '%getMax%' OR s.name LIKE '%getMin%' "
4104+
" OR s.name LIKE '%find\\_largest%' ESCAPE '\\' OR s.name LIKE '%find\\_smallest%' ESCAPE '\\' "
4105+
" OR s.name LIKE '%findLargest%' OR s.name LIKE '%findSmallest%') "
4106+
"AND s.kind IN ('function', 'method') "
4107+
"AND ms.loop_depth >= 1 "
4108+
"AND ms.loop_with_compare = 1"
4109+
).fetchall()
4110+
4111+
results = []
4112+
for r in rows:
4113+
if _is_test_path(r["file_path"]):
4114+
continue
4115+
calls = _iter_loop_calls(r)
4116+
if _call_in(calls, {"max", "min", "Math.max", "Math.min", "Collections.max", "Collections.min"}):
4117+
continue
4118+
results.append(
4119+
_finding(
4120+
"max-min",
4121+
"manual-loop",
4122+
r,
4123+
"Manual loop with comparisons in max/min function (idiomatic improvement)",
4124+
"low",
4125+
)
4126+
)
4127+
return results
4128+
4129+
4130+
@algorithm_detector(
4131+
task_id="spread-accumulator",
4132+
languages=JS_FAMILY_LANGUAGES,
4133+
confidence_basis="structural",
4134+
query_cost=QUERY_COST_MEDIUM,
4135+
)
4136+
def detect_spread_accumulator(conn: sqlite3.Connection) -> list[dict]:
4137+
"""JS/TS: `acc = [...acc, x]` or `.reduce((acc, x) => [...acc, x])` is O(n²)."""
4138+
try:
4139+
rows = conn.execute(
4140+
"SELECT s.id, s.name, s.qualified_name, s.kind, f.path AS file_path, "
4141+
"f.language AS language, s.line_start, s.line_end "
4142+
"FROM symbols s "
4143+
"JOIN files f ON s.file_id = f.id "
4144+
"WHERE s.kind IN ('function', 'method') "
4145+
"AND f.language IN " + _JS_FAMILY_SQL_TUPLE + ""
4146+
).fetchall()
4147+
except sqlite3.Error:
4148+
return []
4149+
results = []
4150+
for r in rows:
4151+
if _is_test_path(r["file_path"]):
4152+
continue
4153+
snippet = _read_symbol_source(r["file_path"], r["line_start"], r["line_end"])
4154+
if not snippet:
4155+
continue
4156+
matched: list[str] = []
4157+
first_pos: int | None = None
4158+
for pat, label in (
4159+
(_RE_REDUCE_SPREAD, "reduce array spread accumulator"),
4160+
(_RE_REDUCE_SPREAD_OBJ, "reduce object spread accumulator"),
4161+
(_RE_SPREAD_ACC, "in-place array spread re-bind"),
4162+
(_RE_SPREAD_OBJ_ACC, "in-place object spread re-bind"),
4163+
):
4164+
m = pat.search(snippet)
4165+
if m is None:
4166+
continue
4167+
matched.append(label)
4168+
if first_pos is None or m.start() < first_pos:
4169+
first_pos = m.start()
4170+
if not matched:
4171+
continue
4172+
if first_pos is None:
4173+
first_pos = 0
4174+
line_offset = snippet[:first_pos].count("\n")
4175+
match_line = (r["line_start"] or 1) + line_offset
4176+
results.append(
4177+
_finding(
4178+
"spread-accumulator",
4179+
"spread-rebind",
4180+
r,
4181+
f"Spread accumulator ({matched[0]}) is O(n^2) — use .push() / Object.assign()",
4182+
"high",
4183+
match_line=match_line,
4184+
snippet=snippet,
4185+
matched_patterns=matched,
4186+
)
4187+
)
4188+
return results
4189+
4190+
4191+
@algorithm_detector(
4192+
task_id="string-concat",
4193+
languages=(),
4194+
confidence_basis="structural",
4195+
query_cost=QUERY_COST_MEDIUM,
4196+
)
4197+
def detect_string_concat_loop(conn: sqlite3.Connection) -> list[dict]:
4198+
"""Loops with accumulation patterns and string-related call hints.
4199+
4200+
Relies primarily on the structural pattern (loop + accumulator) combined
4201+
with calls to string methods (append/concat) or string-building name hints.
4202+
"""
4203+
rows = conn.execute(
4204+
"SELECT s.id, s.name, s.qualified_name, s.kind, f.path as file_path, "
4205+
"s.line_start, ms.loop_depth, ms.calls_in_loops, ms.loop_with_accumulator "
4206+
"FROM symbols s "
4207+
"JOIN files f ON s.file_id = f.id "
4208+
"JOIN math_signals ms ON ms.symbol_id = s.id "
4209+
"WHERE s.kind IN ('function', 'method') "
4210+
"AND ms.loop_depth >= 1 "
4211+
"AND ms.loop_with_accumulator = 1"
4212+
).fetchall()
4213+
4214+
results = []
4215+
for r in rows:
4216+
if _is_test_path(r["file_path"]):
4217+
continue
4218+
calls = _iter_loop_calls(r)
4219+
# Structural signal: calls to string concat/append methods
4220+
has_concat_call = bool(_call_in(calls, {"concat", "strcat", "append", "push"}))
4221+
# Name signal: function name suggests string building
4222+
name_lower = (r["name"] or "").lower()
4223+
has_name_hint = any(
4224+
kw in name_lower
4225+
for kw in (
4226+
"concat",
4227+
"build_str",
4228+
"build_string",
4229+
"format",
4230+
"render",
4231+
"serialize",
4232+
"to_string",
4233+
"tostring",
4234+
"stringify",
4235+
"to_csv",
4236+
"to_html",
4237+
"to_xml",
4238+
"generate_report",
4239+
"join",
4240+
)
4241+
)
4242+
if has_concat_call or has_name_hint:
4243+
results.append(
4244+
_finding(
4245+
"string-concat",
4246+
"loop-concat",
4247+
r,
4248+
"Loop accumulation in string-building function",
4249+
"medium",
4250+
)
4251+
)
4252+
return results
4253+
4254+
39974255
# ---------------------------------------------------------------------------
39984256
# Confidence calibration
39994257
# ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)