Skip to content

Commit df6d800

Browse files
committed
Create PMTiles layers for OSM and Conflated datasets.
1 parent 466d85e commit df6d800

25 files changed

Lines changed: 700 additions & 1375 deletions

.claude/skills/conflate-snapshots/SKILL.md

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,29 @@ Taxonomy-aware matching between rated OSM and Overture, then partition and uploa
4444
```
4545
Outputs `conflated_partitioned/` (and OSM-only `osm_snapshot_partitioned/`).
4646

47-
7. **Upload to S3**:
47+
6.5. **Build PMTiles** — single-zoom (z14) archives consumed directly by the
48+
site via `ol-pmtiles`. Intermediate FlatGeobufs are cleaned up on success.
49+
```bash
50+
python -u scripts/osm_snapshot/prepare_pmtiles.py \
51+
2>&1 | tee ~/data/openpois/logs/pmtiles_osm_<version>.log
52+
python -u scripts/conflation/prepare_pmtiles.py \
53+
2>&1 | tee ~/data/openpois/logs/pmtiles_conflated_<version>.log
54+
```
55+
Properties and zoom range are configured under `upload.pmtiles` in
56+
`config.yaml`.
57+
58+
7. **Upload to S3** — pushes partitioned parquet AND the matching `.pmtiles`
59+
(single file at `…/<version>/<name>.pmtiles`) under `versions.aws`.
4860
```bash
49-
python scripts/conflation/upload_to_s3.py
61+
python scripts/osm_snapshot/upload_to_s3.py # OSM parquet + pmtiles
62+
python scripts/conflation/upload_to_s3.py # conflated parquet + pmtiles
63+
```
64+
To upload only the PMTiles (e.g., after regenerating tiles without touching
65+
the parquet), use:
66+
```bash
67+
python scripts/osm_snapshot/upload_pmtiles_to_s3.py [--s3-version YYYYMMDD]
68+
python scripts/conflation/upload_pmtiles_to_s3.py [--s3-version YYYYMMDD]
5069
```
51-
Pushes to `s3://openpois-public/snapshots/...` under `versions.aws`.
5270

5371
8. **Update latest-URL pointers** in `config.yaml`:
5472
```yaml

config.yaml

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,7 @@ directories:
217217
conflated: conflated.parquet
218218
match_diagnostics: match_diagnostics.parquet
219219
partitioned: conflated_partitioned
220+
pmtiles: conflated.pmtiles
220221
summary_by_label: summary_by_label.csv
221222
testing:
222223
versioned: false
@@ -265,6 +266,35 @@ upload:
265266
latest_url_conflation: "https://openpois-public.s3.us-west-2.amazonaws.com/snapshots/conflated/20260422/conflated_partitioned/"
266267
geohash_precision_partition: 4 # ~39 km x 20 km cells; ~1,000–3,000 cells over CONUS
267268
geohash_precision_sort: 6 # ~0.6 km x 1.2 km; fine-grained sort within each partition
269+
# PMTiles generation — single-zoom archive at z14 for both OSM and conflated.
270+
# Site's View.minZoom is 14; OpenLayers over-zooms past z14 natively so
271+
# z15-20 render as lossless geometric scale-ups of the z14 tile.
268272
pmtiles:
269273
min_zoom: 14
270-
max_zoom: 16
274+
max_zoom: 14
275+
drop_strategy: "drop-densest-as-needed"
276+
osm_layer_name: "osm_pois"
277+
conflated_layer_name: "conflated_pois"
278+
osm_properties:
279+
- osm_id
280+
- source
281+
- name
282+
- conf_mean # drives confidence-based point coloring
283+
- amenity
284+
- shop
285+
- leisure
286+
- healthcare
287+
- craft
288+
- historic
289+
- landuse
290+
- office
291+
- tourism
292+
conflated_properties:
293+
- unified_id
294+
- source
295+
- shared_label
296+
- conf_mean
297+
- name
298+
- brand
299+
- match_score
300+
- match_distance_m

environment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ dependencies:
8282
- ruamel.yaml.clib=0.2.14=py312h4c3975b_0
8383
- setuptools=80.9.0=pyhff2d567_0
8484
- simdjson=4.2.2=hb700be7_0
85+
- tippecanoe=2.79.0=hbd1d79c_0
8586
- tk=8.6.13=noxft_ha0e22de_103
8687
- tqdm=4.67.1=pyhd8ed1ab_1
8788
- truststore=0.10.3=pyhe01879c_0
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
"""
2+
Build conflated.pmtiles from the conflated POI dataset.
3+
4+
Output is a single-zoom PMTiles archive (z14) keyed by the config's
5+
``upload.pmtiles`` block. OpenLayers over-zooms z15-20 natively, and the site
6+
never renders below z14, so tiling extra zoom levels would just waste disk and
7+
wall time.
8+
9+
Intermediate FlatGeobuf is staged next to the output and deleted on success.
10+
"""
11+
from config_versioned import Config
12+
13+
from openpois.io.pmtiles import build_pmtiles
14+
15+
# -----------------------------------------------------------------------------
16+
# Configuration
17+
# -----------------------------------------------------------------------------
18+
19+
config = Config("~/repos/openpois/config.yaml")
20+
21+
INPUT_PATH = config.get_file_path("conflation", "conflated")
22+
OUTPUT_PATH = config.get_file_path("conflation", "pmtiles")
23+
24+
LAYER_NAME = config.get("upload", "pmtiles", "conflated_layer_name")
25+
PROPERTIES = config.get("upload", "pmtiles", "conflated_properties")
26+
MIN_ZOOM = config.get("upload", "pmtiles", "min_zoom")
27+
MAX_ZOOM = config.get("upload", "pmtiles", "max_zoom")
28+
DROP_STRATEGY = config.get("upload", "pmtiles", "drop_strategy")
29+
30+
# -----------------------------------------------------------------------------
31+
# Main workflow
32+
# -----------------------------------------------------------------------------
33+
34+
if __name__ == "__main__":
35+
print(f"Building conflated PMTiles from {INPUT_PATH}")
36+
print(f" layer: {LAYER_NAME}")
37+
print(f" zooms: Z{MIN_ZOOM}-z{MAX_ZOOM}")
38+
print(f" drop: --{DROP_STRATEGY}")
39+
print(f" props: {', '.join(PROPERTIES)}")
40+
print(f" -> {OUTPUT_PATH}")
41+
42+
stats = build_pmtiles(
43+
input_parquet = INPUT_PATH,
44+
output_pmtiles = OUTPUT_PATH,
45+
layer_name = LAYER_NAME,
46+
properties = PROPERTIES,
47+
min_zoom = MIN_ZOOM,
48+
max_zoom = MAX_ZOOM,
49+
drop_strategy = DROP_STRATEGY,
50+
)
51+
52+
print(
53+
f"Done. Wrote {stats['rows_written']:,} features, "
54+
f"{stats['pmtiles_bytes'] / 1e9:.2f} GB PMTiles."
55+
)
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
"""
2+
Upload conflated.pmtiles to the public S3 bucket.
3+
4+
Key layout:
5+
s3://<bucket>/<s3_prefix>/<s3_version>/conflated.pmtiles
6+
7+
``s3_version`` defaults to ``versions.aws`` from config. Override with
8+
``--s3-version YYYYMMDD`` when needed.
9+
10+
See the sibling ``upload_to_s3.py`` for AWS setup prerequisites.
11+
"""
12+
import argparse
13+
14+
from config_versioned import Config
15+
16+
from openpois.io.s3 import upload_single_file
17+
18+
# -----------------------------------------------------------------------------
19+
# Configuration
20+
# -----------------------------------------------------------------------------
21+
22+
config = Config("~/repos/openpois/config.yaml")
23+
24+
PMTILES_PATH = config.get_file_path("conflation", "pmtiles")
25+
S3_BUCKET = config.get("upload", "s3_bucket")
26+
S3_PREFIX = config.get("upload", "s3_prefix_conflation")
27+
S3_REGION = config.get("upload", "s3_region")
28+
DEFAULT_VERSION = config.get("versions", "aws")
29+
30+
31+
# -----------------------------------------------------------------------------
32+
# Main workflow
33+
# -----------------------------------------------------------------------------
34+
35+
if __name__ == "__main__":
36+
parser = argparse.ArgumentParser(description = __doc__)
37+
parser.add_argument(
38+
"--s3-version",
39+
default = DEFAULT_VERSION,
40+
help = (
41+
"Version segment in the S3 key path. Defaults to versions.aws in "
42+
"config.yaml."
43+
),
44+
)
45+
args = parser.parse_args()
46+
47+
if not PMTILES_PATH.exists():
48+
raise FileNotFoundError(
49+
f"{PMTILES_PATH} not found. Run prepare_pmtiles.py first."
50+
)
51+
52+
s3_key = f"{S3_PREFIX}/{args.s3_version}/{PMTILES_PATH.name}"
53+
url = upload_single_file(
54+
local_path = PMTILES_PATH,
55+
bucket = S3_BUCKET,
56+
s3_key = s3_key,
57+
s3_region = S3_REGION,
58+
content_type = "application/octet-stream",
59+
)
60+
print(f"Uploaded conflated PMTiles: {url}")

scripts/conflation/upload_to_s3.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
"""
4949
from config_versioned import Config
5050

51-
from openpois.io.s3 import upload_partitioned_dataset
51+
from openpois.io.s3 import upload_partitioned_dataset, upload_single_file
5252

5353
# -----------------------------------------------------------------------------
5454
# Configuration constants
@@ -57,6 +57,7 @@
5757
config = Config("~/repos/openpois/config.yaml")
5858

5959
PARTITIONED_DIR = config.get_file_path("conflation", "partitioned")
60+
PMTILES_PATH = config.get_file_path("conflation", "pmtiles")
6061
AWS_VERSION = config.get("versions", "aws")
6162
S3_BUCKET = config.get("upload", "s3_bucket")
6263
S3_PREFIX = config.get("upload", "s3_prefix_conflation")
@@ -87,3 +88,19 @@
8788
)
8889
print(f"Uploaded {n:,} files.")
8990
print(f"Public base URL: {base_url}")
91+
92+
if PMTILES_PATH.exists():
93+
pmtiles_key = f"{S3_PREFIX}/{AWS_VERSION}/{PMTILES_PATH.name}"
94+
pmtiles_url = upload_single_file(
95+
local_path = PMTILES_PATH,
96+
bucket = S3_BUCKET,
97+
s3_key = pmtiles_key,
98+
s3_region = S3_REGION,
99+
content_type = "application/octet-stream",
100+
)
101+
print(f"Uploaded PMTiles: {pmtiles_url}")
102+
else:
103+
print(
104+
f"No PMTiles at {PMTILES_PATH} — skipping. "
105+
"Run prepare_pmtiles.py to generate."
106+
)
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
"""
2+
Build osm_snapshot.pmtiles from the rated OSM snapshot.
3+
4+
Output is a single-zoom PMTiles archive (z14) keyed by the config's
5+
``upload.pmtiles`` block. OpenLayers over-zooms z15-20 natively, and the site
6+
never renders below z14, so tiling extra zoom levels would just waste disk and
7+
wall time.
8+
9+
Intermediate FlatGeobuf is staged next to the output and deleted on success.
10+
"""
11+
from config_versioned import Config
12+
13+
from openpois.io.pmtiles import build_pmtiles
14+
15+
# -----------------------------------------------------------------------------
16+
# Configuration
17+
# -----------------------------------------------------------------------------
18+
19+
config = Config("~/repos/openpois/config.yaml")
20+
21+
INPUT_PATH = config.get_file_path("snapshot_osm", "rated_snapshot")
22+
OUTPUT_PATH = config.get_file_path("snapshot_osm", "pmtiles")
23+
24+
LAYER_NAME = config.get("upload", "pmtiles", "osm_layer_name")
25+
PROPERTIES = config.get("upload", "pmtiles", "osm_properties")
26+
MIN_ZOOM = config.get("upload", "pmtiles", "min_zoom")
27+
MAX_ZOOM = config.get("upload", "pmtiles", "max_zoom")
28+
DROP_STRATEGY = config.get("upload", "pmtiles", "drop_strategy")
29+
30+
# -----------------------------------------------------------------------------
31+
# Main workflow
32+
# -----------------------------------------------------------------------------
33+
34+
if __name__ == "__main__":
35+
print(f"Building OSM PMTiles from {INPUT_PATH}")
36+
print(f" layer: {LAYER_NAME}")
37+
print(f" zooms: Z{MIN_ZOOM}-z{MAX_ZOOM}")
38+
print(f" drop: --{DROP_STRATEGY}")
39+
print(f" props: {', '.join(PROPERTIES)}")
40+
print(f" -> {OUTPUT_PATH}")
41+
42+
stats = build_pmtiles(
43+
input_parquet = INPUT_PATH,
44+
output_pmtiles = OUTPUT_PATH,
45+
layer_name = LAYER_NAME,
46+
properties = PROPERTIES,
47+
min_zoom = MIN_ZOOM,
48+
max_zoom = MAX_ZOOM,
49+
drop_strategy = DROP_STRATEGY,
50+
)
51+
52+
print(
53+
f"Done. Wrote {stats['rows_written']:,} features, "
54+
f"{stats['pmtiles_bytes'] / 1e9:.2f} GB PMTiles."
55+
)
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
"""
2+
Upload osm_snapshot.pmtiles to the public S3 bucket.
3+
4+
Key layout:
5+
s3://<bucket>/<s3_prefix>/<s3_version>/osm_snapshot.pmtiles
6+
7+
``s3_version`` defaults to ``versions.aws`` from config. Override with
8+
``--s3-version YYYYMMDD`` when uploading to a different dataset version (e.g.,
9+
to match the existing OSM parquet path when aws_version has been bumped past
10+
it).
11+
12+
See the sibling ``upload_to_s3.py`` for AWS setup prerequisites.
13+
"""
14+
import argparse
15+
16+
from config_versioned import Config
17+
18+
from openpois.io.s3 import upload_single_file
19+
20+
# -----------------------------------------------------------------------------
21+
# Configuration
22+
# -----------------------------------------------------------------------------
23+
24+
config = Config("~/repos/openpois/config.yaml")
25+
26+
PMTILES_PATH = config.get_file_path("snapshot_osm", "pmtiles")
27+
S3_BUCKET = config.get("upload", "s3_bucket")
28+
S3_PREFIX = config.get("upload", "s3_prefix_osm")
29+
S3_REGION = config.get("upload", "s3_region")
30+
DEFAULT_VERSION = config.get("versions", "aws")
31+
32+
33+
# -----------------------------------------------------------------------------
34+
# Main workflow
35+
# -----------------------------------------------------------------------------
36+
37+
if __name__ == "__main__":
38+
parser = argparse.ArgumentParser(description = __doc__)
39+
parser.add_argument(
40+
"--s3-version",
41+
default = DEFAULT_VERSION,
42+
help = (
43+
"Version segment in the S3 key path. Defaults to versions.aws in "
44+
"config.yaml. Pass an explicit date (e.g. 20260417) to land the "
45+
"PMTiles alongside an existing dataset at a different version."
46+
),
47+
)
48+
args = parser.parse_args()
49+
50+
if not PMTILES_PATH.exists():
51+
raise FileNotFoundError(
52+
f"{PMTILES_PATH} not found. Run prepare_pmtiles.py first."
53+
)
54+
55+
s3_key = f"{S3_PREFIX}/{args.s3_version}/{PMTILES_PATH.name}"
56+
url = upload_single_file(
57+
local_path = PMTILES_PATH,
58+
bucket = S3_BUCKET,
59+
s3_key = s3_key,
60+
s3_region = S3_REGION,
61+
content_type = "application/octet-stream",
62+
)
63+
print(f"Uploaded OSM PMTiles: {url}")

scripts/osm_snapshot/upload_to_s3.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
"""
5050
from config_versioned import Config
5151

52-
from openpois.io.s3 import upload_partitioned_dataset
52+
from openpois.io.s3 import upload_partitioned_dataset, upload_single_file
5353

5454
# -----------------------------------------------------------------------------
5555
# Configuration constants
@@ -58,6 +58,7 @@
5858
config = Config("~/repos/openpois/config.yaml")
5959

6060
PARTITIONED_DIR = config.get_file_path("snapshot_osm", "partitioned")
61+
PMTILES_PATH = config.get_file_path("snapshot_osm", "pmtiles")
6162
AWS_VERSION = config.get("versions", "aws")
6263
S3_BUCKET = config.get("upload", "s3_bucket")
6364
S3_PREFIX = config.get("upload", "s3_prefix_osm")
@@ -88,3 +89,19 @@
8889
)
8990
print(f"Uploaded {n:,} files.")
9091
print(f"Public base URL: {base_url}")
92+
93+
if PMTILES_PATH.exists():
94+
pmtiles_key = f"{S3_PREFIX}/{AWS_VERSION}/{PMTILES_PATH.name}"
95+
pmtiles_url = upload_single_file(
96+
local_path = PMTILES_PATH,
97+
bucket = S3_BUCKET,
98+
s3_key = pmtiles_key,
99+
s3_region = S3_REGION,
100+
content_type = "application/octet-stream",
101+
)
102+
print(f"Uploaded PMTiles: {pmtiles_url}")
103+
else:
104+
print(
105+
f"No PMTiles at {PMTILES_PATH} — skipping. "
106+
"Run prepare_pmtiles.py to generate."
107+
)

0 commit comments

Comments
 (0)