@@ -171,7 +171,7 @@ def log_rss(label: str) -> None:
171171# memory after the chunked matcher returns so the matching phase can
172172# run without the full source GeoDataFrames resident.
173173OSM_MERGE_COLS = [
174- "osm_id" , "name" , "brand" ,
174+ "osm_id" , "osm_type" , " name" , "brand" ,
175175 "conf_mean" , "conf_lower" , "conf_upper" , "geometry" ,
176176]
177177OVERTURE_MERGE_COLS = [
@@ -327,7 +327,29 @@ def _load_gdf(
327327 # indices match the match-phase output.
328328 overture_merge_source_path = OVERTURE_PATH
329329 overture_merge_needs_test_bbox = True
330- if DEDUP_ENABLED :
330+ post_dedup_resume_path = conflation_dir / DEDUP_POST_FILTER_FILE
331+ if DEDUP_ENABLED and post_dedup_resume_path .exists ():
332+ print (
333+ f"\n Reusing post-dedup Overture from "
334+ f"{ post_dedup_resume_path } (skipping dedup pass)."
335+ )
336+ overture_gdf = (
337+ gpd .read_parquet (post_dedup_resume_path )
338+ .reset_index (drop = True )
339+ )
340+ overture_shared_labels , overture_radii = (
341+ assign_overture_shared_label (
342+ overture_gdf , load_overture_crosswalk (), match_radii ,
343+ default_radius_m = DEFAULT_RADIUS_M ,
344+ )
345+ )
346+ overture_l0_bits = compute_overture_l0_bits (
347+ overture_gdf ["taxonomy_l0" ].fillna ("" ).to_numpy (),
348+ )
349+ overture_merge_source_path = post_dedup_resume_path
350+ overture_merge_needs_test_bbox = False
351+ log_rss ("after Overture post-dedup reload" )
352+ elif DEDUP_ENABLED :
331353 dedup_checkpoint_dir = (
332354 conflation_dir / DEDUP_CHECKPOINT_SUBDIR
333355 )
0 commit comments