Skip to content

Commit edd4684

Browse files
authored
Merge pull request #133 from devdanzin/oom-dedup-faulthandler-and-family-match
oom_dedup: faulthandler-func SEGV matching + clears-exc family catch-all
2 parents bb266cc + 820c76e commit edd4684

2 files changed

Lines changed: 148 additions & 15 deletions

File tree

fusil/python/oom_dedup.py

Lines changed: 66 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ def classify(text):
119119
kind="segv", file=None, line=None, func=None, assert_expr=None, fatal_msg=None
120120
)
121121
return dict(
122-
kind="fatal", file=None, line=None, func=None, assert_expr=None, fatal_msg=msg[:60]
122+
kind="fatal", file=None, line=None, func=None, assert_expr=None, fatal_msg=msg
123123
)
124124
if SEGV.search(text):
125125
return dict(kind="segv", file=None, line=None, func=None, assert_expr=None, fatal_msg=None)
@@ -133,7 +133,8 @@ def load_snapshot(lines):
133133
"""Load ``known_sites.tsv`` rows (an iterable of lines) into matcher tables."""
134134
by_func, by_assert, by_line = {}, {}, {}
135135
per_file_lines = collections.defaultdict(list)
136-
by_msg, kind_of = [], {}
136+
by_funcname = {} # bare func name -> oids (faulthandler-only stacks; see fh_match)
137+
by_msg, by_msgfam, kind_of = [], [], {}
137138
for line in lines:
138139
line = line.rstrip("\n")
139140
if not line or line.startswith("#"):
@@ -145,16 +146,22 @@ def load_snapshot(lines):
145146
kind_of[oid] = kind
146147
if kt == "func":
147148
by_func.setdefault(key, set()).add(oid)
149+
fn = key.rsplit(":", 1)[-1] # "file:func" -> "func"
150+
if re.fullmatch(r"\w+", fn): # clean ident only (skip combined "a/b/c(...)" keys)
151+
by_funcname.setdefault(fn, set()).add(oid)
148152
elif kt == "assert":
149153
by_assert.setdefault(key, set()).add(oid)
150154
elif kt == "msg":
151155
by_msg.append((key, oid))
156+
elif kt == "msgfam":
157+
by_msgfam.append((key, oid))
152158
elif kt == "line":
153159
f, ln = key.rsplit(":", 1)
154160
by_line.setdefault((f, int(ln)), set()).add(oid)
155161
per_file_lines[f].append((int(ln), oid))
156162
return dict(
157-
func=by_func, assert_=by_assert, line=by_line, fl=per_file_lines, msg=by_msg, kind=kind_of
163+
func=by_func, assert_=by_assert, line=by_line, fl=per_file_lines, msg=by_msg,
164+
msgfam=by_msgfam, kind=kind_of, funcname=by_funcname,
158165
)
159166

160167

@@ -170,19 +177,23 @@ def match(c, snap):
170177
if hit:
171178
return hit, "assert"
172179
if c.get("fatal_msg"):
180+
cm = c["fatal_msg"]
173181
# Match when the cataloged key is a prefix of the crash message (a key may be a short
174182
# signature, e.g. "_Py_CheckFunctionResult:") OR the (truncation-shortened) crash
175-
# message is a prefix of the key. The second clause must use the FULL crash message,
176-
# not a fixed [:30] slice -- a short slice stops before the discriminating content
177-
# (e.g. "_Py_Dealloc: Deallocator of type '<TYPE>'") and conflates type-specific keys
178-
# (OOM-0007 'Context' vs OOM-0023 '_StoreAction'), mislabelling any new type.
179-
hit = set(
180-
o
181-
for k, o in snap["msg"]
182-
if c["fatal_msg"].startswith(k) or k.startswith(c["fatal_msg"])
183-
)
184-
if hit:
185-
return hit, "msg"
183+
# message is a prefix of the key. Use the FULL crash message, not a fixed [:30] slice --
184+
# a short slice stops before the discriminating content (e.g. "_Py_Dealloc: Deallocator
185+
# of type '<TYPE>'") and conflates type-specific keys (OOM-0007 'Context' vs OOM-0023
186+
# '_StoreAction'). LONGEST match wins so the most specific type key beats a shorter one.
187+
exact = [(k, o) for k, o in snap["msg"] if cm.startswith(k) or k.startswith(cm)]
188+
if exact:
189+
maxlen = max(len(k) for k, _ in exact)
190+
return set(o for k, o in exact if len(k) == maxlen), "msg"
191+
# Family fallback: a substring identifying a whole bug family (e.g. the generic
192+
# subtype_dealloc 'cleared the current exception'), tried ONLY when no type-specific key
193+
# matched -> a new/fuzzer type dedups to the family (OOM-0023) instead of oomNEW.
194+
fam = set(o for sub, o in snap.get("msgfam", ()) if sub in cm)
195+
if fam:
196+
return fam, "msgfam"
186197
if c.get("file") and c.get("func"):
187198
hit = snap["func"].get("%s:%s" % (c["file"], c["func"]))
188199
if hit:
@@ -278,6 +289,39 @@ def extract_native_sites(text):
278289
return out
279290

280291

292+
# A faulthandler "Current thread's C stack trace" frame: '... at <func>+0x...'. On a
293+
# free-threaded debug SEGV this is often the ONLY symbol info (no ASan '#N file.c:line'
294+
# frames), so extract_native_sites comes back empty.
295+
_SYM = re.compile(r", at ([A-Za-z_]\w+)\+0x")
296+
# Funcs to skip when matching such a symbol-only stack (innermost first): the asan/dump/eval/
297+
# run plumbing + alloc/free + assert detectors + the dealloc dispatch and refcount macros
298+
# that wrap every dealloc. The first SURVIVING func the catalog keys by name is the site.
299+
_FH_SKIP = re.compile(
300+
r"^(___?interceptor\w*|__sanitizer\w*|__asan\w*|_Py_Dump\w*|faulthandler\w*"
301+
r"|_PyEval_EvalFrameDefault|_PyEval_EvalFrame|_PyEval_Vector|PyEval_EvalCode|_PyEval_Frame\w*"
302+
r"|Py_RunMain|Py_BytesMain|pymain_\w+|_start|__libc_start\w*|run_mod|run_eval_code_obj"
303+
r"|pyrun_\w*|_PyRun_\w*|clear_thread_frame|clear_gen_frame"
304+
r"|fatal_error\w*|_Py_FatalError\w*|_PyObject_AssertFailed|_Py_NegativeRefcount"
305+
r"|_Py_Dealloc|_Py_MergeZeroLocalRefcount|Py_X?DECREF|Py_X?INCREF|_Py_X?DECREF\w*"
306+
r"|_PyMem_Debug\w*|PyMem_\w*Free|PyObject_\w*Free|PyMem_\w*Realloc|PyObject_\w*Realloc"
307+
r"|hook_f\w+|tracemalloc_\w+)$"
308+
)
309+
310+
311+
def fh_match(text, snap):
312+
"""Fallback for a SEGV/generic-fatal whose stdout has a faulthandler C stack (func names)
313+
but NO ASan ``#N ... file.c:line`` frames and no gdb resolution. Match the innermost
314+
catalog-keyed func BY NAME (e.g. PyList_New -> OOM-0004). Returns (oids, func) or
315+
(set(), None)."""
316+
for fn in _SYM.findall(text): # faulthandler prints most-recent-call first
317+
if _FH_SKIP.match(fn):
318+
continue
319+
hit = snap.get("funcname", {}).get(fn)
320+
if hit:
321+
return set(hit), fn
322+
return set(), None
323+
324+
281325
def extract_site_from_bt(bt_text):
282326
"""First real CPython frame (back-compat)."""
283327
sites = extract_sites_from_bt(bt_text)
@@ -439,7 +483,7 @@ def decide(self, stdout_text, source_path=None):
439483
]
440484
if fmsg and not generic_fatal and not fmsg.lower().startswith(("segmentation", "aborted")):
441485
candidates.append(
442-
dict(file=None, line=None, func=None, assert_expr=None, fatal_msg=fmsg[:60])
486+
dict(file=None, line=None, func=None, assert_expr=None, fatal_msg=fmsg)
443487
)
444488
# Resolve a crash site when the stdout assertion text is unreliable (pure segv /
445489
# generic-assert fatal) or nothing matched yet. PREFER the native backtrace the
@@ -464,6 +508,13 @@ def decide(self, stdout_text, source_path=None):
464508
matched = set()
465509
for c in candidates:
466510
matched |= match(c, self.snap)[0]
511+
512+
# Faulthandler-only fallback: a SEGV/generic-fatal with no ASan file:line frames and
513+
# no gdb resolution still carries func names in the faulthandler C stack -- match the
514+
# innermost catalog-keyed func by name (e.g. PyList_New -> OOM-0004) before giving up.
515+
if not matched and not chain and (has_segv or generic_fatal):
516+
matched |= fh_match(stdout_text, self.snap)[0]
517+
467518
if matched:
468519
oid = sorted(matched)[0]
469520
self.seen[oid] += 1

tests/python/test_oom_dedup.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,5 +491,87 @@ def test_huge_tail_fatal_still_dedupes(self):
491491
self.assertEqual(label, "OOM-0022")
492492

493493

494+
class TestFaulthandlerMatch(unittest.TestCase):
495+
"""fh_match: SEGVs with only a faulthandler symbol-stack (no ASan file:line frames)
496+
resolve by innermost catalog-keyed func name instead of falling to oomSEGV."""
497+
498+
def setUp(self):
499+
fd, self.path = tempfile.mkstemp(suffix=".tsv")
500+
os.write(fd, SNAPSHOT.encode())
501+
os.close(fd)
502+
self.addCleanup(os.remove, self.path)
503+
self.snap = oom_dedup.load_snapshot_file(self.path)
504+
505+
def _segv(self, *funcs):
506+
frames = "\n".join(f' Binary file "python", at {f}+0x10 [0x55]' for f in funcs)
507+
return (
508+
"Fatal Python error: Segmentation fault\n"
509+
"Current thread's C stack trace (most recent call first):\n" + frames + "\n"
510+
)
511+
512+
def test_innermost_keyed_func_wins(self):
513+
# detector/eval frames skipped -> code_dealloc (OOM-0003) is the innermost keyed func
514+
oids, fn = oom_dedup.fh_match(
515+
self._segv("_Py_DumpStack", "_Py_Dealloc", "code_dealloc", "_PyEval_EvalFrameDefault"),
516+
self.snap,
517+
)
518+
self.assertEqual((oids, fn), ({"OOM-0003"}, "code_dealloc"))
519+
520+
def test_plumbing_and_eval_never_match(self):
521+
# _PyEval_EvalFrameDefault IS keyed (OOM-0027) but is generic plumbing -> skipped
522+
oids, _ = oom_dedup.fh_match(
523+
self._segv("_Py_Dealloc", "Py_DECREF", "_PyEval_EvalFrameDefault", "PyEval_EvalCode"),
524+
self.snap,
525+
)
526+
self.assertEqual(oids, set())
527+
528+
def test_decide_uses_fh_fallback(self):
529+
d = oom_dedup.Deduper(self.path, keep=5)
530+
keep, label = d.decide(self._segv("_Py_Dealloc", "dictiter_dealloc"))
531+
self.assertEqual(label, "OOM-0006")
532+
533+
def test_decide_unkeyed_symbol_segv_stays_oomSEGV(self):
534+
d = oom_dedup.Deduper(self.path, keep=5)
535+
keep, label = d.decide(self._segv("_Py_Dealloc", "some_unkeyed_helper"))
536+
self.assertEqual(label, "oomSEGV")
537+
538+
539+
class TestMsgFamily(unittest.TestCase):
540+
"""msgfam catch-all: a new/fuzzer clears-exc type dedups to the family (OOM-0023) while
541+
type-specific keys still win (Context->0007, deque->0039), and other invariant variants
542+
('raised'/'overrode') are NOT absorbed."""
543+
544+
SNAP = "\n".join(
545+
[
546+
"# oom_id\tkind\tkeytype\tkey",
547+
"OOM-0007\tfatal\tmsg\t_Py_Dealloc: Deallocator of type 'Context' cleared the curre",
548+
"OOM-0039\tfatal\tmsg\t_Py_Dealloc: Deallocator of type 'collections.deque' cleared",
549+
"OOM-0023\tfatal\tmsg\t_Py_Dealloc: Deallocator of type '_StoreAction' cleared the ",
550+
"OOM-0023\tfatal\tmsgfam\tcleared the current exception",
551+
]
552+
)
553+
554+
def setUp(self):
555+
self.snap = oom_dedup.load_snapshot(self.SNAP.splitlines())
556+
557+
def _m(self, typ, verb="cleared the current exception"):
558+
msg = f"_Py_Dealloc: Deallocator of type '{typ}' {verb}"
559+
return oom_dedup.match(dict(fatal_msg=msg), self.snap)[0]
560+
561+
def test_type_specific_keys_win_over_family(self):
562+
self.assertEqual(self._m("Context"), {"OOM-0007"})
563+
self.assertEqual(self._m("collections.deque"), {"OOM-0039"})
564+
self.assertEqual(self._m("_StoreAction"), {"OOM-0023"})
565+
566+
def test_new_and_fuzzer_types_fall_back_to_family(self):
567+
self.assertEqual(self._m("Evil"), {"OOM-0023"})
568+
self.assertEqual(self._m("weird_deque"), {"OOM-0023"})
569+
self.assertEqual(self._m("UnknownHandler"), {"OOM-0023"})
570+
571+
def test_other_invariant_variants_not_absorbed(self):
572+
self.assertEqual(self._m("Foo", "raised an exception"), set())
573+
self.assertEqual(self._m("Bar", "overrode the current exception"), set())
574+
575+
494576
if __name__ == "__main__":
495577
unittest.main()

0 commit comments

Comments
 (0)