Skip to content

Commit 784b7d4

Browse files
committed
Update location hierarchy to include new Overture L2 categories like Farmers Markets. Ensure that all OSM and OM points of interest are being categorized, even if only to a catch-all category.
1 parent 1d08c57 commit 784b7d4

12 files changed

Lines changed: 510 additions & 234 deletions

File tree

config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ versions:
55
snapshot_osm: "20260313"
66
snapshot_overture: "20260313"
77
snapshot_foursquare: "20260313"
8-
aws: "20260313"
9-
conflation: "20260316"
8+
aws: "20260318"
9+
conflation: "20260318"
1010

1111
# Settings for downloading data
1212
download:

scripts/build_taxonomy.py

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -40,21 +40,29 @@ def osm_cell(group):
4040

4141

4242
def overture_cell(group):
43-
"""Format Overture categories for one shared_label, grouped by l0."""
44-
by_l0 = {}
43+
"""Format Overture categories for one shared_label."""
44+
parts = []
4545
for _, row in group.iterrows():
4646
l0 = row["overture_l0"]
47-
l1 = row["overture_l1"] if pd.notna(row["overture_l1"]) else None
48-
by_l0.setdefault(l0, set())
49-
if l1:
50-
by_l0[l0].add(l1)
51-
parts = []
52-
for l0, l1s in by_l0.items():
53-
if l1s:
54-
l1_str = ", ".join(sorted(l1s))
55-
parts.append(f'<span class="tx-key">{l0}:</span> {l1_str}')
47+
l1 = row.get("overture_l1", "")
48+
l2 = row.get("overture_l2", "")
49+
if l1 and l2:
50+
parts.append(
51+
f'<span class="tx-key">{l0} &rsaquo;'
52+
f' {l1}:</span> {l2}'
53+
)
54+
elif l1:
55+
parts.append(
56+
f'<span class="tx-key">{l0}:</span> {l1}'
57+
)
58+
elif l2:
59+
parts.append(
60+
f'<span class="tx-key">{l0}:</span> {l2}'
61+
)
5662
else:
57-
parts.append(f'<span class="tx-key">{l0}</span>')
63+
parts.append(
64+
f'<span class="tx-key">{l0}</span>'
65+
)
5866
return "<br>".join(parts)
5967

6068

scripts/conflation/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@ OSM POIs are assigned categories using the filter key priority order (shop >
3636
healthcare > leisure > amenity). If the specific tag value is not in the
3737
crosswalk, a wildcard (`*`) fallback for that key is used (default 50m radius).
3838

39-
Overture POIs are matched by (taxonomy_l0, taxonomy_l1), falling back to
40-
l0-only match when l1 is missing.
39+
Overture POIs are matched using a 4-tier cascade from most to least specific:
40+
(L0, L1, L2), then (L0, L2), then (L0, L1), then L0-only.
4141

4242
### 2. Spatial Candidate Search
4343

scripts/conflation/conflate.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@
9898
"conf_mean", "conf_lower", "conf_upper", "geometry",
9999
]
100100
OVERTURE_MATCH_COLS = [
101-
"overture_id", "taxonomy_l0", "taxonomy_l1",
101+
"overture_id", "taxonomy_l0", "taxonomy_l1", "taxonomy_l2",
102102
"overture_name", "brand_name", "confidence", "geometry",
103103
]
104104

@@ -204,7 +204,7 @@ def _load_gdf(
204204
]:
205205
if col in osm_gdf.columns:
206206
osm_gdf.drop(columns = col, inplace = True)
207-
for col in ["taxonomy_l0", "taxonomy_l1"]:
207+
for col in ["taxonomy_l0", "taxonomy_l1", "taxonomy_l2"]:
208208
if col in overture_gdf.columns:
209209
overture_gdf.drop(columns = col, inplace = True)
210210
gc.collect()

scripts/overture/download.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
Output file:
2424
overture_snapshot.parquet — GeoParquet with ~7.2M US POIs
2525
Columns: overture_id, overture_name, taxonomy_l0, taxonomy_l1,
26-
brand_name, confidence, geometry, source
26+
taxonomy_l2, brand_name, confidence, geometry, source
2727
"""
2828
from config_versioned import Config
2929
from openpois.io.overture import download_overture_snapshot

site/src/constants.js

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ export const CONFLATED_LABELS = [
6666
'Alternative Medicine',
6767
'Arcade',
6868
'Arts Venue',
69-
'Assisted Living',
7069
'Bakery',
7170
'Bank',
7271
'Bar',
@@ -91,8 +90,8 @@ export const CONFLATED_LABELS = [
9190
'Discount Store',
9291
'Dog Park',
9392
'Dry Cleaning',
94-
'Event Venue',
9593
'Eye Care',
94+
'Farmers Market',
9695
'Fast Food',
9796
'Fitness Center',
9897
'Florist',

src/openpois/conflation/data/match_radii.csv

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
Alternative Medicine,50
33
Arcade,50
44
Arts Venue,100
5-
Assisted Living,100
65
Bakery,50
76
Bank,50
87
Bar,50
@@ -27,8 +26,8 @@ Dessert Shop,50
2726
Discount Store,50
2827
Dog Park,200
2928
Dry Cleaning,50
30-
Event Venue,50
3129
Eye Care,50
30+
Farmers Market,100
3231
Fast Food,50
3332
Fitness Center,50
3433
Florist,50

src/openpois/conflation/data/taxonomy_crosswalk_openstreetmap.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ amenity,hospital,Clinic
7777
amenity,ice_cream,Dessert Shop
7878
amenity,kindergarten,Kindergarten
7979
amenity,library,Library
80-
amenity,marketplace,Other Shop
80+
amenity,marketplace,Market
8181
amenity,museum,Museum
8282
amenity,nightclub,Nightclub
8383
amenity,pharmacy,Pharmacy

0 commit comments

Comments
 (0)