@@ -796,6 +796,29 @@ def _guard_hints_from_source(language: str | None, snippet: str) -> list[str]:
796796# ---------------------------------------------------------------------------
797797
798798
799+ _RE_FIRE_AND_FORGET_TASK = re .compile (
800+ r"^\s*(?:asyncio\.)?create_task\s*\(" ,
801+ re .MULTILINE ,
802+ )
803+ _RE_STORED_TASK = re .compile (
804+ r"^\s*(?:\w+\s*[+\-*/]?=\s*|\w+\s*\.\s*append\s*\(\s*|\w+\s*\.\s*add\s*\(\s*|return\s+|await\s+)"
805+ r"(?:asyncio\.)?create_task\s*\(" ,
806+ re .MULTILINE ,
807+ )
808+ _RE_SPREAD_ACC = re .compile (
809+ r"\b(\w+)\s*=\s*\[\s*\.\.\.\s*\1\s*," , # name = [...name,
810+ )
811+ _RE_SPREAD_OBJ_ACC = re .compile (
812+ r"\b(\w+)\s*=\s*\{\s*\.\.\.\s*\1\s*[,}]" , # name = {...name,
813+ )
814+ _RE_REDUCE_SPREAD = re .compile (
815+ r"\.\s*reduce\s*\(\s*\(\s*(\w+)[^)]*\)\s*=>\s*\[\s*\.\.\.\s*\1\s*," ,
816+ )
817+ _RE_REDUCE_SPREAD_OBJ = re .compile (
818+ r"\.\s*reduce\s*\(\s*\(\s*(\w+)[^)]*\)\s*=>\s*\{\s*\.\.\.\s*\1\s*[,}]" ,
819+ )
820+
821+
799822@algorithm_detector (
800823 task_id = "sorting" ,
801824 languages = (),
@@ -3994,6 +4017,241 @@ def detect_defer_in_loop(conn: sqlite3.Connection) -> list[dict]:
39944017 return results
39954018
39964019
4020+ @algorithm_detector (
4021+ task_id = "async-fire-and-forget-task" ,
4022+ languages = ("python" ,),
4023+ confidence_basis = "structural" ,
4024+ query_cost = QUERY_COST_MEDIUM ,
4025+ )
4026+ def detect_async_fire_and_forget (conn : sqlite3 .Connection ) -> list [dict ]:
4027+ """``asyncio.create_task(...)`` whose return value is discarded.
4028+
4029+ Background tasks that aren't held in a long-lived reference get
4030+ garbage-collected before they finish. Python 3.11+ explicitly warns
4031+ about this footgun. The fix is to store the task somewhere that
4032+ survives until ``await`` time, or just ``await`` it directly.
4033+
4034+ Conservative: Python only, only fires when the line clearly creates
4035+ a task without storing it.
4036+ """
4037+ try :
4038+ rows = conn .execute (
4039+ "SELECT s.id, s.name, s.qualified_name, s.kind, f.path AS file_path, "
4040+ "s.line_start, s.line_end "
4041+ "FROM symbols s "
4042+ "JOIN files f ON s.file_id = f.id "
4043+ "WHERE s.kind IN ('function', 'method') "
4044+ "AND f.language = 'python'"
4045+ ).fetchall ()
4046+ except sqlite3 .Error :
4047+ return []
4048+ results = []
4049+ for r in rows :
4050+ if _is_test_path (r ["file_path" ]):
4051+ continue
4052+ snippet = _read_symbol_source (r ["file_path" ], r ["line_start" ], r ["line_end" ]) or ""
4053+ if "create_task" not in snippet :
4054+ continue
4055+ # Subtract stored-task lines from total create_task occurrences
4056+ total = len (_RE_FIRE_AND_FORGET_TASK .findall (snippet ))
4057+ stored = len (_RE_STORED_TASK .findall (snippet ))
4058+ leaked = total - stored
4059+ if leaked <= 0 :
4060+ continue
4061+ results .append (
4062+ _finding (
4063+ "async-fire-and-forget-task" ,
4064+ "leaked-asyncio-task" ,
4065+ r ,
4066+ f"{ leaked } asyncio.create_task call(s) whose return value isn't stored — gc may discard the task before it completes" ,
4067+ "high" ,
4068+ snippet = snippet ,
4069+ matched_patterns = [
4070+ f"create_task occurrences: { total } " ,
4071+ f"stored: { stored } , leaked: { leaked } " ,
4072+ ],
4073+ )
4074+ )
4075+ results [- 1 ]["fix" ] = (
4076+ "Store the task: `tasks.append(asyncio.create_task(coro()))` and await it later, or `await asyncio.create_task(coro())` directly."
4077+ )
4078+ return results
4079+
4080+
4081+ @algorithm_detector (
4082+ task_id = "max-min" ,
4083+ languages = (),
4084+ confidence_basis = "heuristic" ,
4085+ query_cost = QUERY_COST_LOW ,
4086+ )
4087+ def detect_manual_maxmin (conn : sqlite3 .Connection ) -> list [dict ]:
4088+ """Loops with comparisons in max/min-named functions.
4089+
4090+ Same Big-O (both O(n)) — this is an idiom improvement, flagged at low
4091+ confidence.
4092+ """
4093+ rows = conn .execute (
4094+ "SELECT s.id, s.name, s.qualified_name, s.kind, f.path as file_path, "
4095+ "s.line_start, ms.loop_depth, ms.loop_with_compare, "
4096+ "ms.loop_with_accumulator, ms.calls_in_loops "
4097+ "FROM symbols s "
4098+ "JOIN files f ON s.file_id = f.id "
4099+ "JOIN math_signals ms ON ms.symbol_id = s.id "
4100+ "WHERE (s.name LIKE '%find\\ _max%' ESCAPE '\\ ' OR s.name LIKE '%find\\ _min%' ESCAPE '\\ ' "
4101+ " OR s.name LIKE '%findMax%' OR s.name LIKE '%findMin%' "
4102+ " OR s.name LIKE '%get\\ _max%' ESCAPE '\\ ' OR s.name LIKE '%get\\ _min%' ESCAPE '\\ ' "
4103+ " OR s.name LIKE '%getMax%' OR s.name LIKE '%getMin%' "
4104+ " OR s.name LIKE '%find\\ _largest%' ESCAPE '\\ ' OR s.name LIKE '%find\\ _smallest%' ESCAPE '\\ ' "
4105+ " OR s.name LIKE '%findLargest%' OR s.name LIKE '%findSmallest%') "
4106+ "AND s.kind IN ('function', 'method') "
4107+ "AND ms.loop_depth >= 1 "
4108+ "AND ms.loop_with_compare = 1"
4109+ ).fetchall ()
4110+
4111+ results = []
4112+ for r in rows :
4113+ if _is_test_path (r ["file_path" ]):
4114+ continue
4115+ calls = _iter_loop_calls (r )
4116+ if _call_in (calls , {"max" , "min" , "Math.max" , "Math.min" , "Collections.max" , "Collections.min" }):
4117+ continue
4118+ results .append (
4119+ _finding (
4120+ "max-min" ,
4121+ "manual-loop" ,
4122+ r ,
4123+ "Manual loop with comparisons in max/min function (idiomatic improvement)" ,
4124+ "low" ,
4125+ )
4126+ )
4127+ return results
4128+
4129+
4130+ @algorithm_detector (
4131+ task_id = "spread-accumulator" ,
4132+ languages = JS_FAMILY_LANGUAGES ,
4133+ confidence_basis = "structural" ,
4134+ query_cost = QUERY_COST_MEDIUM ,
4135+ )
4136+ def detect_spread_accumulator (conn : sqlite3 .Connection ) -> list [dict ]:
4137+ """JS/TS: `acc = [...acc, x]` or `.reduce((acc, x) => [...acc, x])` is O(n²)."""
4138+ try :
4139+ rows = conn .execute (
4140+ "SELECT s.id, s.name, s.qualified_name, s.kind, f.path AS file_path, "
4141+ "f.language AS language, s.line_start, s.line_end "
4142+ "FROM symbols s "
4143+ "JOIN files f ON s.file_id = f.id "
4144+ "WHERE s.kind IN ('function', 'method') "
4145+ "AND f.language IN " + _JS_FAMILY_SQL_TUPLE + ""
4146+ ).fetchall ()
4147+ except sqlite3 .Error :
4148+ return []
4149+ results = []
4150+ for r in rows :
4151+ if _is_test_path (r ["file_path" ]):
4152+ continue
4153+ snippet = _read_symbol_source (r ["file_path" ], r ["line_start" ], r ["line_end" ])
4154+ if not snippet :
4155+ continue
4156+ matched : list [str ] = []
4157+ first_pos : int | None = None
4158+ for pat , label in (
4159+ (_RE_REDUCE_SPREAD , "reduce array spread accumulator" ),
4160+ (_RE_REDUCE_SPREAD_OBJ , "reduce object spread accumulator" ),
4161+ (_RE_SPREAD_ACC , "in-place array spread re-bind" ),
4162+ (_RE_SPREAD_OBJ_ACC , "in-place object spread re-bind" ),
4163+ ):
4164+ m = pat .search (snippet )
4165+ if m is None :
4166+ continue
4167+ matched .append (label )
4168+ if first_pos is None or m .start () < first_pos :
4169+ first_pos = m .start ()
4170+ if not matched :
4171+ continue
4172+ if first_pos is None :
4173+ first_pos = 0
4174+ line_offset = snippet [:first_pos ].count ("\n " )
4175+ match_line = (r ["line_start" ] or 1 ) + line_offset
4176+ results .append (
4177+ _finding (
4178+ "spread-accumulator" ,
4179+ "spread-rebind" ,
4180+ r ,
4181+ f"Spread accumulator ({ matched [0 ]} ) is O(n^2) — use .push() / Object.assign()" ,
4182+ "high" ,
4183+ match_line = match_line ,
4184+ snippet = snippet ,
4185+ matched_patterns = matched ,
4186+ )
4187+ )
4188+ return results
4189+
4190+
4191+ @algorithm_detector (
4192+ task_id = "string-concat" ,
4193+ languages = (),
4194+ confidence_basis = "structural" ,
4195+ query_cost = QUERY_COST_MEDIUM ,
4196+ )
4197+ def detect_string_concat_loop (conn : sqlite3 .Connection ) -> list [dict ]:
4198+ """Loops with accumulation patterns and string-related call hints.
4199+
4200+ Relies primarily on the structural pattern (loop + accumulator) combined
4201+ with calls to string methods (append/concat) or string-building name hints.
4202+ """
4203+ rows = conn .execute (
4204+ "SELECT s.id, s.name, s.qualified_name, s.kind, f.path as file_path, "
4205+ "s.line_start, ms.loop_depth, ms.calls_in_loops, ms.loop_with_accumulator "
4206+ "FROM symbols s "
4207+ "JOIN files f ON s.file_id = f.id "
4208+ "JOIN math_signals ms ON ms.symbol_id = s.id "
4209+ "WHERE s.kind IN ('function', 'method') "
4210+ "AND ms.loop_depth >= 1 "
4211+ "AND ms.loop_with_accumulator = 1"
4212+ ).fetchall ()
4213+
4214+ results = []
4215+ for r in rows :
4216+ if _is_test_path (r ["file_path" ]):
4217+ continue
4218+ calls = _iter_loop_calls (r )
4219+ # Structural signal: calls to string concat/append methods
4220+ has_concat_call = bool (_call_in (calls , {"concat" , "strcat" , "append" , "push" }))
4221+ # Name signal: function name suggests string building
4222+ name_lower = (r ["name" ] or "" ).lower ()
4223+ has_name_hint = any (
4224+ kw in name_lower
4225+ for kw in (
4226+ "concat" ,
4227+ "build_str" ,
4228+ "build_string" ,
4229+ "format" ,
4230+ "render" ,
4231+ "serialize" ,
4232+ "to_string" ,
4233+ "tostring" ,
4234+ "stringify" ,
4235+ "to_csv" ,
4236+ "to_html" ,
4237+ "to_xml" ,
4238+ "generate_report" ,
4239+ "join" ,
4240+ )
4241+ )
4242+ if has_concat_call or has_name_hint :
4243+ results .append (
4244+ _finding (
4245+ "string-concat" ,
4246+ "loop-concat" ,
4247+ r ,
4248+ "Loop accumulation in string-building function" ,
4249+ "medium" ,
4250+ )
4251+ )
4252+ return results
4253+
4254+
39974255# ---------------------------------------------------------------------------
39984256# Confidence calibration
39994257# ---------------------------------------------------------------------------
0 commit comments