Skip to content

Commit d5179e0

Browse files
committed
Add a mirrored latest/ folder to Source.Coop upload.
1 parent 5178a7c commit d5179e0

3 files changed

Lines changed: 134 additions & 4 deletions

File tree

scripts/publish/upload_to_source_coop.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
osm-pmtiles/osm.pmtiles
1515
conflated-parquet/geohash_prefix=*/part-*.parquet
1616
conflated-pmtiles/conflated.pmtiles
17+
latest/
18+
(server-side mirror of the most recently published version)
1719
1820
Credentials
1921
-----------
@@ -41,6 +43,7 @@
4143
from openpois.io.credentials import load_source_coop_credentials
4244
from openpois.io.source_coop import (
4345
make_client,
46+
mirror_prefix,
4447
public_url,
4548
upload_bytes,
4649
upload_directory,
@@ -76,6 +79,13 @@ def parse_args() -> argparse.Namespace:
7679
"default because these rarely change."
7780
),
7881
)
82+
parser.add_argument(
83+
"--skip-latest-mirror", action = "store_true",
84+
help = (
85+
"Skip mirroring the uploaded version to {repo_prefix}/latest/. "
86+
"Enabled by default so /latest always tracks the newest release."
87+
),
88+
)
7989
parser.add_argument(
8090
"--dry-run", action = "store_true",
8191
help = "Print every remote key that would be written and exit.",
@@ -170,6 +180,30 @@ def main() -> None:
170180
dry_run = args.dry_run,
171181
)
172182

183+
# -------------------------------------------------------------------------
184+
# Mirror the published version to {repo_prefix}/latest/
185+
# -------------------------------------------------------------------------
186+
latest_prefix = f"{repo_prefix}/latest"
187+
if not args.skip_latest_mirror:
188+
print()
189+
print(f"Mirroring {version_prefix}/ → {latest_prefix}/ …")
190+
if args.dry_run:
191+
print(
192+
" [dry-run] skipping remote listing — copy/delete counts "
193+
"will only reflect real remote state during a live run."
194+
)
195+
summary = mirror_prefix(
196+
client = client,
197+
bucket = bucket,
198+
src_prefix = f"{version_prefix}/",
199+
dst_prefix = f"{latest_prefix}/",
200+
dry_run = args.dry_run,
201+
)
202+
print(
203+
f" copied {summary['copied']} object(s), "
204+
f"deleted {summary['deleted']} stale object(s)."
205+
)
206+
173207
# -------------------------------------------------------------------------
174208
# Top-level README + LICENSE (opt-in)
175209
# -------------------------------------------------------------------------
@@ -194,6 +228,8 @@ def main() -> None:
194228
print()
195229
print(f"Version landing page: https://source.coop/{repo_prefix}/")
196230
print(f"Version data root: {public_url(f'{version_prefix}/')}")
231+
if not args.skip_latest_mirror:
232+
print(f"Latest data root: {public_url(f'{latest_prefix}/')}")
197233

198234

199235
if __name__ == "__main__":

src/openpois/io/source_coop.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,3 +143,93 @@ def upload_bytes(
143143
def public_url(key: str) -> str:
144144
"""Compose the public read URL for a given object key."""
145145
return f"{DEFAULT_READ_HOST}/{key.lstrip('/')}"
146+
147+
148+
def list_keys(client, bucket: str, prefix: str) -> list[str]:
149+
"""Return every object key in ``bucket`` under ``prefix`` (paginated)."""
150+
keys: list[str] = []
151+
paginator = client.get_paginator("list_objects_v2")
152+
for page in paginator.paginate(Bucket = bucket, Prefix = prefix):
153+
for obj in page.get("Contents", []) or []:
154+
keys.append(obj["Key"])
155+
return keys
156+
157+
158+
def delete_keys(
159+
client,
160+
bucket: str,
161+
keys: list[str],
162+
dry_run: bool = False,
163+
) -> None:
164+
"""Delete ``keys`` from ``bucket`` in batches of up to 1000 per request."""
165+
if not keys:
166+
return
167+
prefix = "[dry-run] " if dry_run else ""
168+
print(f"{prefix}deleting {len(keys)} object(s) from s3://{bucket}/")
169+
if dry_run:
170+
return
171+
for i in range(0, len(keys), 1000):
172+
chunk = keys[i : i + 1000]
173+
client.delete_objects(
174+
Bucket = bucket,
175+
Delete = {
176+
"Objects": [{"Key": k} for k in chunk],
177+
"Quiet": True,
178+
},
179+
)
180+
181+
182+
def mirror_prefix(
183+
client,
184+
bucket: str,
185+
src_prefix: str,
186+
dst_prefix: str,
187+
dry_run: bool = False,
188+
) -> dict:
189+
"""Server-side copy every object under ``src_prefix`` to ``dst_prefix``.
190+
191+
Any objects currently under ``dst_prefix`` whose relative path is not
192+
present in ``src_prefix`` are deleted first, so ``dst_prefix`` becomes a
193+
faithful mirror of ``src_prefix``. Returns a summary dict with the
194+
counts and the resolved src/dst prefixes.
195+
"""
196+
src_prefix = src_prefix if src_prefix.endswith("/") else src_prefix + "/"
197+
dst_prefix = dst_prefix if dst_prefix.endswith("/") else dst_prefix + "/"
198+
199+
# Without a live client we can't enumerate either side; report zeros.
200+
if client is None:
201+
src_keys: list[str] = []
202+
dst_keys: list[str] = []
203+
else:
204+
src_keys = list_keys(client, bucket, src_prefix)
205+
dst_keys = list_keys(client, bucket, dst_prefix)
206+
207+
src_relatives = {k[len(src_prefix) :] for k in src_keys}
208+
stale = [k for k in dst_keys if k[len(dst_prefix) :] not in src_relatives]
209+
210+
delete_keys(client, bucket, stale, dry_run = dry_run)
211+
212+
iterator = (
213+
tqdm(src_keys, desc = f"↪ {dst_prefix}", unit = "obj")
214+
if src_keys else src_keys
215+
)
216+
for src_key in iterator:
217+
rel = src_key[len(src_prefix) :]
218+
dst_key = f"{dst_prefix}{rel}"
219+
if dry_run:
220+
print(f"[dry-run] copy s3://{bucket}/{src_key} → s3://{bucket}/{dst_key}")
221+
continue
222+
client.copy_object(
223+
Bucket = bucket,
224+
Key = dst_key,
225+
CopySource = {"Bucket": bucket, "Key": src_key},
226+
ACL = "bucket-owner-full-control",
227+
MetadataDirective = "COPY",
228+
)
229+
230+
return {
231+
"src_prefix": src_prefix,
232+
"dst_prefix": dst_prefix,
233+
"copied": len(src_keys),
234+
"deleted": len(stale),
235+
}

src/openpois/publish/templates/top_readme.md

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ Each refresh writes a new versioned folder. Inside every version:
2121
└── conflated-pmtiles/conflated.pmtiles
2222
```
2323

24+
`latest/` is a server-side mirror of the most recently published version —
25+
use it for live demos and tutorials, and pin a dated folder
26+
(e.g. `2026-04-23-v0/`) when you need a stable, reproducible reference.
27+
2428
Browse all versions at
2529
<https://source.coop/henryspatialanalysis/openpois>.
2630

@@ -54,7 +58,7 @@ Read a specific version directly from Source Cooperative (no authentication):
5458
import pyarrow.dataset as ds
5559

5660
BASE = "https://data.source.coop/henryspatialanalysis/openpois"
57-
VERSION = "2026-04-23-v0" # replace with the latest version folder
61+
VERSION = "latest" # or pin a specific dated folder, e.g. "2026-04-23-v0"
5862

5963
pois = ds.dataset(
6064
f"{BASE}/{VERSION}/conflated-parquet/",
@@ -71,7 +75,7 @@ print(f"{pois.count_rows():,} POIs")
7175
import duckdb
7276

7377
BASE = "https://data.source.coop/henryspatialanalysis/openpois"
74-
VERSION = "2026-04-23-v0"
78+
VERSION = "latest" # or pin a specific dated folder, e.g. "2026-04-23-v0"
7579

7680
con = duckdb.connect()
7781
con.execute("INSTALL httpfs; LOAD httpfs;")
@@ -92,7 +96,7 @@ print(df)
9296
import geopandas as gpd
9397

9498
BASE = "https://data.source.coop/henryspatialanalysis/openpois"
95-
VERSION = "2026-04-23-v0"
99+
VERSION = "latest" # or pin a specific dated folder, e.g. "2026-04-23-v0"
96100

97101
# geohash_prefix=9q is roughly the US West coast
98102
gdf = gpd.read_parquet(
@@ -123,7 +127,7 @@ PMTiles are authored at zoom 14, so zoom in past z14 to see points.
123127
<script src="https://unpkg.com/pmtiles@3/dist/pmtiles.js"></script>
124128
<script>
125129
const BASE = "https://data.source.coop/henryspatialanalysis/openpois";
126-
const VERSION = "2026-04-23-v0";
130+
const VERSION = "latest"; // or pin a specific dated folder, e.g. "2026-04-23-v0"
127131
128132
const protocol = new pmtiles.Protocol();
129133
maplibregl.addProtocol("pmtiles", protocol.tile);

0 commit comments

Comments
 (0)