Skip to content

Commit a8a19da

Browse files
PatBall1cursoragent
andcommitted
Optimise post_clean and add iterative gap filling
- Fix geometries once upfront instead of per-row in the loop - Use a set for O(1) lookup with early skip for already-marked crowns - Reuse calc_iou helper instead of inline computation - Add iterative mode: repeats gap-filling rounds until crown count stabilises or max_iterations is reached (default 5) - Add verbose parameter for progress logging Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent e241c71 commit a8a19da

1 file changed

Lines changed: 48 additions & 23 deletions

File tree

detectree2/models/outputs.py

Lines changed: 48 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -416,44 +416,69 @@ def union(x, y):
416416
def post_clean(unclean_df: gpd.GeoDataFrame,
417417
clean_df: gpd.GeoDataFrame,
418418
iou_threshold: float = 0.3,
419-
field: str = "Confidence_score") -> gpd.GeoDataFrame:
419+
field: str = "Confidence_score",
420+
max_iterations: int = 5,
421+
verbose: bool = True) -> gpd.GeoDataFrame:
420422
"""Fill in the gaps left by clean_crowns.
421423
424+
Takes the original (unclean) crowns and the cleaned set, then iteratively adds back crowns
425+
from the unclean set that do not significantly overlap with any cleaned crown. Each round,
426+
the combined result is re-cleaned to handle mutual overlaps among the newly added crowns.
427+
Iteration continues until no new crowns are added or ``max_iterations`` is reached.
428+
422429
Args:
423430
unclean_df (gpd.GeoDataFrame): Unclean crowns.
424431
clean_df (gpd.GeoDataFrame): Clean crowns.
425-
iou_threshold (float, optional): IoU threshold that determines whether predictions are considered overlapping.
426-
crowns are overlapping. Defaults to 0.3.
432+
iou_threshold (float, optional): IoU threshold that determines whether predictions are
433+
considered overlapping. Defaults to 0.3.
434+
field (str): Field used to prioritise selection of crowns. Defaults to "Confidence_score".
435+
max_iterations (int, optional): Maximum number of gap-filling rounds. Defaults to 5.
436+
verbose (bool, optional): Print progress information. Defaults to True.
427437
"""
428-
# Spatial join between unclean and clean dataframes using the new syntax
429-
joined_df = gpd.sjoin(unclean_df, clean_df, how="inner", predicate="intersects")
438+
# Fix invalid geometries once upfront, not per-row
439+
unclean_df = unclean_df.copy()
440+
unclean_df["geometry"] = unclean_df.geometry.buffer(0)
441+
442+
current_clean = clean_df.copy()
443+
current_clean["geometry"] = current_clean.geometry.buffer(0)
444+
445+
for iteration in range(1, max_iterations + 1):
446+
prev_count = len(current_clean)
430447

431-
to_remove = []
432-
for idx, row in joined_df.iterrows():
433-
# Using the default suffix 'left' for columns from the unclean_df and 'right' for columns from the clean_df
434-
unclean_shape = unclean_df.loc[idx, "geometry"]
435-
clean_shape = clean_df.loc[row["index_right"], "geometry"]
448+
# Spatial join to find candidate overlapping pairs (bbox intersection)
449+
joined_df = gpd.sjoin(unclean_df, current_clean, how="inner", predicate="intersects")
436450

437-
unclean_shape = unclean_shape.buffer(0)
438-
clean_shape = clean_shape.buffer(0)
451+
# Use a set for O(1) lookup; skip further pairs once an unclean crown is marked
452+
to_remove = set()
453+
for idx, row in joined_df.iterrows():
454+
if idx in to_remove:
455+
continue # Already marked for removal, skip remaining pairs
439456

440-
intersection_area = unclean_shape.intersection(clean_shape).area
441-
union_area = unclean_shape.union(clean_shape).area
442-
iou = intersection_area / union_area
457+
iou = calc_iou(unclean_df.loc[idx, "geometry"], current_clean.loc[row["index_right"], "geometry"])
458+
if iou > iou_threshold:
459+
to_remove.add(idx)
443460

444-
if iou > iou_threshold:
445-
to_remove.append(idx)
461+
reduced_unclean_df = unclean_df.drop(index=to_remove)
446462

447-
reduced_unclean_df = unclean_df.drop(index=to_remove)
463+
# Concatenate the reduced unclean dataframe with the clean dataframe
464+
result_df = pd.concat([current_clean, reduced_unclean_df], ignore_index=True)
465+
result_df.reset_index(drop=True, inplace=True)
448466

449-
# Concatenate the reduced unclean dataframe with the clean dataframe
450-
result_df = pd.concat([clean_df, reduced_unclean_df], ignore_index=True)
467+
# Re-clean the combined set to resolve any mutual overlaps among newly added crowns
468+
current_clean = clean_crowns(result_df, iou_threshold=iou_threshold, field=field, verbose=verbose)
469+
current_clean.reset_index(drop=True, inplace=True)
451470

452-
result_df.reset_index(drop=True, inplace=True)
471+
new_count = len(current_clean)
472+
if verbose:
473+
print(f"post_clean: iteration {iteration}{prev_count}{new_count} crowns "
474+
f"(+{new_count - prev_count})")
453475

454-
reclean_df = clean_crowns(result_df, iou_threshold=iou_threshold, field=field)
476+
if new_count == prev_count:
477+
if verbose:
478+
print("post_clean: converged, no new crowns added.")
479+
break
455480

456-
return reclean_df.reset_index(drop=True)
481+
return current_clean
457482

458483

459484
def load_geopandas_dataframes(folder):

0 commit comments

Comments
 (0)