@@ -15,9 +15,9 @@ Each refresh writes a new versioned folder. Inside every version:
1515```
1616<YYYY-MM-DD-vN>/
1717├── README.md # version metadata (OSM date, Overture release, model)
18- ├── osm-parquet/ # OSM-only snapshot, geohash -partitioned
18+ ├── osm-parquet/ # OSM-only snapshot, hive -partitioned by primary_tag
1919├── osm-pmtiles/osm.pmtiles # OSM snapshot as a single PMTiles archive
20- ├── conflated-parquet/ # OSM × Overture conflated snapshot, geohash -partitioned
20+ ├── conflated-parquet/ # OSM × Overture conflated snapshot, hive -partitioned by shared_label
2121└── conflated-pmtiles/conflated.pmtiles
2222```
2323
@@ -50,18 +50,29 @@ The `osm-parquet/` files contain the same OSM rows before conflation. This data
5050
5151## Quickstart
5252
53- Read a specific version directly from Source Cooperative (no authentication):
53+ Read directly from Source Cooperative's S3 mirror (no authentication):
54+
55+ - ** pyarrow / GeoPandas** use ` pyarrow.fs.S3FileSystem(anonymous=True) ` and a
56+ bucket-qualified path (no scheme prefix).
57+ - ** DuckDB** uses an ` s3:// ` URL plus an anonymous ` SECRET ` so its glob
58+ expansion works over the bucket listing.
59+
60+ Every example uses ` VERSION = "latest" ` ; swap in a dated folder (e.g.
61+ ` "2026-04-23-v0" ` ) when you need a reproducible pin.
5462
5563### Python: pyarrow
5664
5765``` python
5866import pyarrow.dataset as ds
67+ import pyarrow.fs as pafs
5968
60- BASE = " https://data .source.coop/henryspatialanalysis/openpois"
69+ BASE = " us-west-2.opendata .source.coop/henryspatialanalysis/openpois"
6170VERSION = " latest" # or pin a specific dated folder, e.g. "2026-04-23-v0"
6271
72+ fs = pafs.S3FileSystem(anonymous = True , region = " us-west-2" )
6373pois = ds.dataset(
6474 f " { BASE } / { VERSION } /conflated-parquet/ " ,
75+ filesystem = fs,
6576 format = " parquet" ,
6677 partitioning = " hive" ,
6778)
@@ -74,11 +85,18 @@ print(f"{pois.count_rows():,} POIs")
7485``` python
7586import duckdb
7687
77- BASE = " https ://data .source.coop/henryspatialanalysis/openpois"
88+ BASE = " s3 ://us-west-2.opendata .source.coop/henryspatialanalysis/openpois"
7889VERSION = " latest" # or pin a specific dated folder, e.g. "2026-04-23-v0"
7990
8091con = duckdb.connect()
8192con.execute(" INSTALL httpfs; LOAD httpfs;" )
93+ con.execute("""
94+ CREATE OR REPLACE SECRET srccoop (
95+ TYPE s3, PROVIDER config,
96+ REGION 'us-west-2', URL_STYLE 'path',
97+ KEY_ID '', SECRET ''
98+ );
99+ """ )
82100df = con.execute(f """
83101 SELECT shared_label, COUNT(*) AS n
84102 FROM read_parquet(' { BASE } / { VERSION } /conflated-parquet/**/*.parquet',
@@ -94,13 +112,16 @@ print(df)
94112
95113``` python
96114import geopandas as gpd
115+ import pyarrow.fs as pafs
97116
98- BASE = " https://data .source.coop/henryspatialanalysis/openpois"
117+ BASE = " us-west-2.opendata .source.coop/henryspatialanalysis/openpois"
99118VERSION = " latest" # or pin a specific dated folder, e.g. "2026-04-23-v0"
100119
101- # geohash_prefix=9q is roughly the US West coast
120+ fs = pafs.S3FileSystem(anonymous = True , region = " us-west-2" )
121+ # conflated-parquet is hive-partitioned by shared_label.
102122gdf = gpd.read_parquet(
103- f " { BASE } / { VERSION } /conflated-parquet/geohash_prefix=9q/part-0.parquet "
123+ f " { BASE } / { VERSION } /conflated-parquet/shared_label=Cafe/part-0.parquet " ,
124+ filesystem = fs,
104125)
105126print (gdf.head())
106127```
0 commit comments