@@ -119,7 +119,7 @@ def classify(text):
119119 kind = "segv" , file = None , line = None , func = None , assert_expr = None , fatal_msg = None
120120 )
121121 return dict (
122- kind = "fatal" , file = None , line = None , func = None , assert_expr = None , fatal_msg = msg [: 60 ]
122+ kind = "fatal" , file = None , line = None , func = None , assert_expr = None , fatal_msg = msg
123123 )
124124 if SEGV .search (text ):
125125 return dict (kind = "segv" , file = None , line = None , func = None , assert_expr = None , fatal_msg = None )
@@ -133,7 +133,8 @@ def load_snapshot(lines):
133133 """Load ``known_sites.tsv`` rows (an iterable of lines) into matcher tables."""
134134 by_func , by_assert , by_line = {}, {}, {}
135135 per_file_lines = collections .defaultdict (list )
136- by_msg , kind_of = [], {}
136+ by_funcname = {} # bare func name -> oids (faulthandler-only stacks; see fh_match)
137+ by_msg , by_msgfam , kind_of = [], [], {}
137138 for line in lines :
138139 line = line .rstrip ("\n " )
139140 if not line or line .startswith ("#" ):
@@ -145,16 +146,22 @@ def load_snapshot(lines):
145146 kind_of [oid ] = kind
146147 if kt == "func" :
147148 by_func .setdefault (key , set ()).add (oid )
149+ fn = key .rsplit (":" , 1 )[- 1 ] # "file:func" -> "func"
150+ if re .fullmatch (r"\w+" , fn ): # clean ident only (skip combined "a/b/c(...)" keys)
151+ by_funcname .setdefault (fn , set ()).add (oid )
148152 elif kt == "assert" :
149153 by_assert .setdefault (key , set ()).add (oid )
150154 elif kt == "msg" :
151155 by_msg .append ((key , oid ))
156+ elif kt == "msgfam" :
157+ by_msgfam .append ((key , oid ))
152158 elif kt == "line" :
153159 f , ln = key .rsplit (":" , 1 )
154160 by_line .setdefault ((f , int (ln )), set ()).add (oid )
155161 per_file_lines [f ].append ((int (ln ), oid ))
156162 return dict (
157- func = by_func , assert_ = by_assert , line = by_line , fl = per_file_lines , msg = by_msg , kind = kind_of
163+ func = by_func , assert_ = by_assert , line = by_line , fl = per_file_lines , msg = by_msg ,
164+ msgfam = by_msgfam , kind = kind_of , funcname = by_funcname ,
158165 )
159166
160167
@@ -170,19 +177,23 @@ def match(c, snap):
170177 if hit :
171178 return hit , "assert"
172179 if c .get ("fatal_msg" ):
180+ cm = c ["fatal_msg" ]
173181 # Match when the cataloged key is a prefix of the crash message (a key may be a short
174182 # signature, e.g. "_Py_CheckFunctionResult:") OR the (truncation-shortened) crash
175- # message is a prefix of the key. The second clause must use the FULL crash message,
176- # not a fixed [:30] slice -- a short slice stops before the discriminating content
177- # (e.g. "_Py_Dealloc: Deallocator of type '<TYPE>'") and conflates type-specific keys
178- # (OOM-0007 'Context' vs OOM-0023 '_StoreAction'), mislabelling any new type.
179- hit = set (
180- o
181- for k , o in snap ["msg" ]
182- if c ["fatal_msg" ].startswith (k ) or k .startswith (c ["fatal_msg" ])
183- )
184- if hit :
185- return hit , "msg"
183+ # message is a prefix of the key. Use the FULL crash message, not a fixed [:30] slice --
184+ # a short slice stops before the discriminating content (e.g. "_Py_Dealloc: Deallocator
185+ # of type '<TYPE>'") and conflates type-specific keys (OOM-0007 'Context' vs OOM-0023
186+ # '_StoreAction'). LONGEST match wins so the most specific type key beats a shorter one.
187+ exact = [(k , o ) for k , o in snap ["msg" ] if cm .startswith (k ) or k .startswith (cm )]
188+ if exact :
189+ maxlen = max (len (k ) for k , _ in exact )
190+ return set (o for k , o in exact if len (k ) == maxlen ), "msg"
191+ # Family fallback: a substring identifying a whole bug family (e.g. the generic
192+ # subtype_dealloc 'cleared the current exception'), tried ONLY when no type-specific key
193+ # matched -> a new/fuzzer type dedups to the family (OOM-0023) instead of oomNEW.
194+ fam = set (o for sub , o in snap .get ("msgfam" , ()) if sub in cm )
195+ if fam :
196+ return fam , "msgfam"
186197 if c .get ("file" ) and c .get ("func" ):
187198 hit = snap ["func" ].get ("%s:%s" % (c ["file" ], c ["func" ]))
188199 if hit :
@@ -278,6 +289,39 @@ def extract_native_sites(text):
278289 return out
279290
280291
292+ # A faulthandler "Current thread's C stack trace" frame: '... at <func>+0x...'. On a
293+ # free-threaded debug SEGV this is often the ONLY symbol info (no ASan '#N file.c:line'
294+ # frames), so extract_native_sites comes back empty.
295+ _SYM = re .compile (r", at ([A-Za-z_]\w+)\+0x" )
296+ # Funcs to skip when matching such a symbol-only stack (innermost first): the asan/dump/eval/
297+ # run plumbing + alloc/free + assert detectors + the dealloc dispatch and refcount macros
298+ # that wrap every dealloc. The first SURVIVING func the catalog keys by name is the site.
299+ _FH_SKIP = re .compile (
300+ r"^(___?interceptor\w*|__sanitizer\w*|__asan\w*|_Py_Dump\w*|faulthandler\w*"
301+ r"|_PyEval_EvalFrameDefault|_PyEval_EvalFrame|_PyEval_Vector|PyEval_EvalCode|_PyEval_Frame\w*"
302+ r"|Py_RunMain|Py_BytesMain|pymain_\w+|_start|__libc_start\w*|run_mod|run_eval_code_obj"
303+ r"|pyrun_\w*|_PyRun_\w*|clear_thread_frame|clear_gen_frame"
304+ r"|fatal_error\w*|_Py_FatalError\w*|_PyObject_AssertFailed|_Py_NegativeRefcount"
305+ r"|_Py_Dealloc|_Py_MergeZeroLocalRefcount|Py_X?DECREF|Py_X?INCREF|_Py_X?DECREF\w*"
306+ r"|_PyMem_Debug\w*|PyMem_\w*Free|PyObject_\w*Free|PyMem_\w*Realloc|PyObject_\w*Realloc"
307+ r"|hook_f\w+|tracemalloc_\w+)$"
308+ )
309+
310+
311+ def fh_match (text , snap ):
312+ """Fallback for a SEGV/generic-fatal whose stdout has a faulthandler C stack (func names)
313+ but NO ASan ``#N ... file.c:line`` frames and no gdb resolution. Match the innermost
314+ catalog-keyed func BY NAME (e.g. PyList_New -> OOM-0004). Returns (oids, func) or
315+ (set(), None)."""
316+ for fn in _SYM .findall (text ): # faulthandler prints most-recent-call first
317+ if _FH_SKIP .match (fn ):
318+ continue
319+ hit = snap .get ("funcname" , {}).get (fn )
320+ if hit :
321+ return set (hit ), fn
322+ return set (), None
323+
324+
281325def extract_site_from_bt (bt_text ):
282326 """First real CPython frame (back-compat)."""
283327 sites = extract_sites_from_bt (bt_text )
@@ -439,7 +483,7 @@ def decide(self, stdout_text, source_path=None):
439483 ]
440484 if fmsg and not generic_fatal and not fmsg .lower ().startswith (("segmentation" , "aborted" )):
441485 candidates .append (
442- dict (file = None , line = None , func = None , assert_expr = None , fatal_msg = fmsg [: 60 ] )
486+ dict (file = None , line = None , func = None , assert_expr = None , fatal_msg = fmsg )
443487 )
444488 # Resolve a crash site when the stdout assertion text is unreliable (pure segv /
445489 # generic-assert fatal) or nothing matched yet. PREFER the native backtrace the
@@ -464,6 +508,13 @@ def decide(self, stdout_text, source_path=None):
464508 matched = set ()
465509 for c in candidates :
466510 matched |= match (c , self .snap )[0 ]
511+
512+ # Faulthandler-only fallback: a SEGV/generic-fatal with no ASan file:line frames and
513+ # no gdb resolution still carries func names in the faulthandler C stack -- match the
514+ # innermost catalog-keyed func by name (e.g. PyList_New -> OOM-0004) before giving up.
515+ if not matched and not chain and (has_segv or generic_fatal ):
516+ matched |= fh_match (stdout_text , self .snap )[0 ]
517+
467518 if matched :
468519 oid = sorted (matched )[0 ]
469520 self .seen [oid ] += 1
0 commit comments