@@ -416,44 +416,69 @@ def union(x, y):
416416def post_clean (unclean_df : gpd .GeoDataFrame ,
417417 clean_df : gpd .GeoDataFrame ,
418418 iou_threshold : float = 0.3 ,
419- field : str = "Confidence_score" ) -> gpd .GeoDataFrame :
419+ field : str = "Confidence_score" ,
420+ max_iterations : int = 5 ,
421+ verbose : bool = True ) -> gpd .GeoDataFrame :
420422 """Fill in the gaps left by clean_crowns.
421423
424+ Takes the original (unclean) crowns and the cleaned set, then iteratively adds back crowns
425+ from the unclean set that do not significantly overlap with any cleaned crown. Each round,
426+ the combined result is re-cleaned to handle mutual overlaps among the newly added crowns.
427+ Iteration continues until no new crowns are added or ``max_iterations`` is reached.
428+
422429 Args:
423430 unclean_df (gpd.GeoDataFrame): Unclean crowns.
424431 clean_df (gpd.GeoDataFrame): Clean crowns.
425- iou_threshold (float, optional): IoU threshold that determines whether predictions are considered overlapping.
426- crowns are overlapping. Defaults to 0.3.
432+ iou_threshold (float, optional): IoU threshold that determines whether predictions are
433+ considered overlapping. Defaults to 0.3.
434+ field (str): Field used to prioritise selection of crowns. Defaults to "Confidence_score".
435+ max_iterations (int, optional): Maximum number of gap-filling rounds. Defaults to 5.
436+ verbose (bool, optional): Print progress information. Defaults to True.
427437 """
428- # Spatial join between unclean and clean dataframes using the new syntax
429- joined_df = gpd .sjoin (unclean_df , clean_df , how = "inner" , predicate = "intersects" )
438+ # Fix invalid geometries once upfront, not per-row
439+ unclean_df = unclean_df .copy ()
440+ unclean_df ["geometry" ] = unclean_df .geometry .buffer (0 )
441+
442+ current_clean = clean_df .copy ()
443+ current_clean ["geometry" ] = current_clean .geometry .buffer (0 )
444+
445+ for iteration in range (1 , max_iterations + 1 ):
446+ prev_count = len (current_clean )
430447
431- to_remove = []
432- for idx , row in joined_df .iterrows ():
433- # Using the default suffix 'left' for columns from the unclean_df and 'right' for columns from the clean_df
434- unclean_shape = unclean_df .loc [idx , "geometry" ]
435- clean_shape = clean_df .loc [row ["index_right" ], "geometry" ]
448+ # Spatial join to find candidate overlapping pairs (bbox intersection)
449+ joined_df = gpd .sjoin (unclean_df , current_clean , how = "inner" , predicate = "intersects" )
436450
437- unclean_shape = unclean_shape .buffer (0 )
438- clean_shape = clean_shape .buffer (0 )
451+ # Use a set for O(1) lookup; skip further pairs once an unclean crown is marked
452+ to_remove = set ()
453+ for idx , row in joined_df .iterrows ():
454+ if idx in to_remove :
455+ continue # Already marked for removal, skip remaining pairs
439456
440- intersection_area = unclean_shape . intersection ( clean_shape ). area
441- union_area = unclean_shape . union ( clean_shape ). area
442- iou = intersection_area / union_area
457+ iou = calc_iou ( unclean_df . loc [ idx , "geometry" ], current_clean . loc [ row [ "index_right" ], "geometry" ])
458+ if iou > iou_threshold :
459+ to_remove . add ( idx )
443460
444- if iou > iou_threshold :
445- to_remove .append (idx )
461+ reduced_unclean_df = unclean_df .drop (index = to_remove )
446462
447- reduced_unclean_df = unclean_df .drop (index = to_remove )
463+ # Concatenate the reduced unclean dataframe with the clean dataframe
464+ result_df = pd .concat ([current_clean , reduced_unclean_df ], ignore_index = True )
465+ result_df .reset_index (drop = True , inplace = True )
448466
449- # Concatenate the reduced unclean dataframe with the clean dataframe
450- result_df = pd .concat ([clean_df , reduced_unclean_df ], ignore_index = True )
467+ # Re-clean the combined set to resolve any mutual overlaps among newly added crowns
468+ current_clean = clean_crowns (result_df , iou_threshold = iou_threshold , field = field , verbose = verbose )
469+ current_clean .reset_index (drop = True , inplace = True )
451470
452- result_df .reset_index (drop = True , inplace = True )
471+ new_count = len (current_clean )
472+ if verbose :
473+ print (f"post_clean: iteration { iteration } — { prev_count } → { new_count } crowns "
474+ f"(+{ new_count - prev_count } )" )
453475
454- reclean_df = clean_crowns (result_df , iou_threshold = iou_threshold , field = field )
476+ if new_count == prev_count :
477+ if verbose :
478+ print ("post_clean: converged, no new crowns added." )
479+ break
455480
456- return reclean_df . reset_index ( drop = True )
481+ return current_clean
457482
458483
459484def load_geopandas_dataframes (folder ):
0 commit comments