@@ -316,6 +316,33 @@ def _validate_inputs(root_dir: Path, alpha: float, tau: float, budget_tokens: in
316316 raise ValueError (f"budget_tokens must be > 0, got { budget_tokens } " )
317317
318318
319+ def _find_dangling_semantic_names (
320+ selected : list [Fragment ],
321+ graph : Graph ,
322+ frag_by_id : dict [FragmentId , Fragment ],
323+ selected_ids : set [FragmentId ],
324+ ) -> set [str ]:
325+ dangling : set [str ] = set ()
326+ for frag in selected :
327+ for nbr_id in graph .neighbors (frag .id ):
328+ if nbr_id in selected_ids :
329+ continue
330+ if graph .edge_categories .get ((frag .id , nbr_id ), "" ) != "semantic" :
331+ continue
332+ nbr_frag = frag_by_id .get (nbr_id )
333+ if nbr_frag and nbr_frag .symbol_name :
334+ dangling .add (nbr_frag .symbol_name .lower ())
335+ return dangling
336+
337+
338+ def _pick_best_fragment (candidates : list [Fragment ], selected_ids : set [FragmentId ]) -> Fragment | None :
339+ if any (c .id in selected_ids for c in candidates ):
340+ return None
341+ sig_candidates = [f for f in candidates if "_signature" in f .kind ]
342+ full_candidates = [f for f in candidates if "_signature" not in f .kind ]
343+ return next (iter (sig_candidates or full_candidates ), None )
344+
345+
319346def _coherence_post_pass (
320347 result : SelectionResult ,
321348 all_fragments : list [Fragment ],
@@ -331,32 +358,15 @@ def _coherence_post_pass(
331358 name_to_frags .setdefault (f .symbol_name .lower (), []).append (f )
332359
333360 frag_by_id : dict [FragmentId , Fragment ] = {f .id : f for f in all_fragments }
334-
335- dangling_names : set [str ] = set ()
336- for frag in result .selected :
337- for nbr_id in graph .neighbors (frag .id ):
338- if nbr_id in selected_ids :
339- continue
340- cat = graph .edge_categories .get ((frag .id , nbr_id ), "" )
341- if cat == "semantic" :
342- nbr_frag = frag_by_id .get (nbr_id )
343- if nbr_frag and nbr_frag .symbol_name :
344- dangling_names .add (nbr_frag .symbol_name .lower ())
361+ dangling_names = _find_dangling_semantic_names (result .selected , graph , frag_by_id , selected_ids )
345362
346363 added : list [Fragment ] = []
347364 for name in dangling_names :
348- candidates = name_to_frags .get (name , [])
349- for c in candidates :
350- if c .id in selected_ids :
351- break
352- else :
353- sig_candidates = [f for f in candidates if "_signature" in f .kind ]
354- full_candidates = [f for f in candidates if "_signature" not in f .kind ]
355- pick = next (iter (sig_candidates or full_candidates ), None )
356- if pick and pick .token_count <= remaining and pick .id not in selected_ids :
357- added .append (pick )
358- selected_ids .add (pick .id )
359- remaining -= pick .token_count
365+ pick = _pick_best_fragment (name_to_frags .get (name , []), selected_ids )
366+ if pick and pick .token_count <= remaining and pick .id not in selected_ids :
367+ added .append (pick )
368+ selected_ids .add (pick .id )
369+ remaining -= pick .token_count
360370
361371 if not added :
362372 return result
0 commit comments