henryspatialanalysis
diff --git a/‎exploratory/conflation/README.md‎ ‎scripts/conflation/README.md‎exploratory/conflation/README.md renamed to scripts/conflation/README.md b/‎exploratory/conflation/README.md‎ ‎scripts/conflation/README.md‎exploratory/conflation/README.md renamed to scripts/conflation/README.md
diff --git a/‎exploratory/conflation/conflate.py‎ ‎scripts/conflation/conflate.py‎exploratory/conflation/conflate.py renamed to scripts/conflation/conflate.py
Lines changed: 30 additions & 8 deletions b/‎exploratory/conflation/conflate.py‎ ‎scripts/conflation/conflate.py‎exploratory/conflation/conflate.py renamed to scripts/conflation/conflate.py
Lines changed: 30 additions & 8 deletions
diff --git a/‎…loratory/conflation/format_for_upload.py‎ ‎scripts/conflation/format_for_upload.py‎exploratory/conflation/format_for_upload.py renamed to scripts/conflation/format_for_upload.py b/‎…loratory/conflation/format_for_upload.py‎ ‎scripts/conflation/format_for_upload.py‎exploratory/conflation/format_for_upload.py renamed to scripts/conflation/format_for_upload.py
diff --git a/‎exploratory/conflation/summarize.py‎ ‎scripts/conflation/summarize.py‎exploratory/conflation/summarize.py renamed to scripts/conflation/summarize.py
Lines changed: 15 additions & 6 deletions b/‎exploratory/conflation/summarize.py‎ ‎scripts/conflation/summarize.py‎exploratory/conflation/summarize.py renamed to scripts/conflation/summarize.py
Lines changed: 15 additions & 6 deletions
diff --git a/‎exploratory/conflation/upload_to_s3.py‎ ‎scripts/conflation/upload_to_s3.py‎exploratory/conflation/upload_to_s3.py renamed to scripts/conflation/upload_to_s3.py b/‎exploratory/conflation/upload_to_s3.py‎ ‎scripts/conflation/upload_to_s3.py‎exploratory/conflation/upload_to_s3.py renamed to scripts/conflation/upload_to_s3.py
diff --git a/‎exploratory/foursquare/download.py‎ ‎scripts/foursquare/download.py‎exploratory/foursquare/download.py renamed to scripts/foursquare/download.py
Lines changed: 22 additions & 10 deletions b/‎exploratory/foursquare/download.py‎ ‎scripts/foursquare/download.py‎exploratory/foursquare/download.py renamed to scripts/foursquare/download.py
Lines changed: 22 additions & 10 deletions
diff --git a/‎exploratory/models/osm_turnover.py‎ ‎scripts/models/osm_turnover.py‎exploratory/models/osm_turnover.py renamed to scripts/models/osm_turnover.py
Lines changed: 26 additions & 4 deletions b/‎exploratory/models/osm_turnover.py‎ ‎scripts/models/osm_turnover.py‎exploratory/models/osm_turnover.py renamed to scripts/models/osm_turnover.py
Lines changed: 26 additions & 4 deletions
diff --git a/‎exploratory/osm_data/data_viz.py‎ ‎scripts/osm_data/data_viz.py‎exploratory/osm_data/data_viz.py renamed to scripts/osm_data/data_viz.py
Lines changed: 23 additions & 5 deletions b/‎exploratory/osm_data/data_viz.py‎ ‎scripts/osm_data/data_viz.py‎exploratory/osm_data/data_viz.py renamed to scripts/osm_data/data_viz.py
Lines changed: 23 additions & 5 deletions
diff --git a/‎exploratory/osm_data/download.py‎ ‎scripts/osm_data/download.py‎exploratory/osm_data/download.py renamed to scripts/osm_data/download.py
Lines changed: 19 additions & 5 deletions b/‎exploratory/osm_data/download.py‎ ‎scripts/osm_data/download.py‎exploratory/osm_data/download.py renamed to scripts/osm_data/download.py
Lines changed: 19 additions & 5 deletions
diff --git a/‎exploratory/osm_data/format_tabular.py‎ ‎scripts/osm_data/format_tabular.py‎exploratory/osm_data/format_tabular.py renamed to scripts/osm_data/format_tabular.py
Lines changed: 20 additions & 7 deletions b/‎exploratory/osm_data/format_tabular.py‎ ‎scripts/osm_data/format_tabular.py‎exploratory/osm_data/format_tabular.py renamed to scripts/osm_data/format_tabular.py
Lines changed: 20 additions & 7 deletions
@@ -1,15 +1,37 @@
 #!/home/nathenry/miniforge3/envs/openpois/bin/python
 """
-Conflate rated OSM POIs with Overture Maps POIs.
-
-Reads both snapshots, builds a taxonomy crosswalk, finds spatial
-candidates via BallTree, scores candidates on distance + name +
-type + identifiers, performs greedy one-to-one matching, and merges
-into a unified superset saved as GeoParquet.
+Conflate rated OSM POIs with Overture Maps POIs into a unified dataset.
+
+Reads both snapshots, assigns each POI a shared taxonomy label via CSV
+crosswalk files, finds spatial candidates within per-category radii using a
+BallTree, scores candidate pairs on distance, name similarity, type agreement,
+and shared identifiers, performs greedy one-to-one matching, and merges all
+POIs (matched and unmatched) into a single GeoParquet output.
+
+Config keys used (config.yaml):
+    snapshot_osm.rated_snapshot            — rated OSM GeoParquet input path
+    snapshot_overture.snapshot             — Overture GeoParquet input path
+    conflation.conflated                   — output GeoParquet path
+    download.osm.filter_keys               — tag keys used for taxonomy assignment
+    conflation.overture_confidence_weight  — weight on Overture confidence in scoring
+    conflation.min_match_score             — minimum composite score to accept a match
+    conflation.max_radius_m                — maximum candidate search radius in meters
+    conflation.default_radius_m            — fallback radius for unclassified POIs
+    conflation.distance_weight             — scoring weight for spatial distance
+    conflation.name_weight                 — scoring weight for name similarity
+    conflation.type_weight                 — scoring weight for taxonomy agreement
+    conflation.identifier_weight           — scoring weight for shared identifiers
+    conflation.chunk_size                  — BallTree chunk size for memory management
+    conflation.test_bbox                   — small bbox used with --test flag
 
 Usage:
-    python exploratory/conflation/conflate.py          # full run
-    python exploratory/conflation/conflate.py --test   # small bbox
+    python scripts/conflation/conflate.py           # full CONUS run
+    python scripts/conflation/conflate.py --test    # Seattle test bbox
+
+Output file:
+    conflated.parquet — GeoParquet with all OSM + Overture POIs, columns:
+        shared_label, source (matched/osm/overture), match_score,
+        osm_id, overture_id, name, conf_mean/lower/upper, geometry, ...
 """
 from __future__ import annotations
 
 
@@ -2,11 +2,20 @@
 """
 Summarize the conflated dataset by shared_label and source.
 
-Produces a CSV with one row per shared_label showing counts by
-source (matched, osm, overture) and average match score.
+Reads conflated.parquet and produces a CSV with one row per shared_label
+showing POI counts broken down by source (matched, osm, overture) and the
+average composite match score for matched pairs.
 
-Usage:
-    python exploratory/conflation/summarize.py
+Config keys used (config.yaml):
+    conflation.conflated        — input GeoParquet path (conflated.parquet)
+    conflation.summary_by_label — output CSV path
+
+Prerequisites:
+    Run scripts/conflation/conflate.py first.
+
+Output file:
+    summary_by_label.csv — columns: shared_label, matched, osm, overture,
+        total, avg_match_score; sorted by total descending
 """
 from __future__ import annotations
 
@@ -53,6 +62,6 @@
     )
     summary.index.name = "shared_label"
 
-    summary.to_csv(OUTPUT_PATH)
-    print(f"\nSaved to {OUTPUT_PATH}")
+    summary.to_csv(output_path)
+    print(f"\nSaved to {output_path}")
     print(f"\n{summary.to_string()}")
@@ -1,18 +1,30 @@
 """
-Exploratory script for downloading a current Foursquare OS Places snapshot.
+Download the current US Foursquare OS Places snapshot as a GeoParquet file.
 
-This script uses openpois.foursquare.download to:
-1. Authenticate to the Foursquare Places Portal Iceberg catalog.
-2. Auto-detect or use a pinned release date.
-3. Load US places filtered by L1 category and save as GeoParquet.
+Authenticates to the Foursquare Places Portal Apache Iceberg REST catalog
+using a portal token, loads the unpartitioned places_os table filtered to US
+records with no closed date, joins against categories_os to resolve L1
+category names, and saves the result as a GeoParquet file.
 
 Authentication:
-    Set the FSQ_PORTAL_TOKEN environment variable to your portal token before
-    running. Register at https://places.foursquare.com to obtain a token.
-
-    Example (bash):
+    Set the FSQ_PORTAL_TOKEN environment variable before running:
         export FSQ_PORTAL_TOKEN="<your_token>"
-        python exploratory/foursquare/download.py
+    Register at https://places.foursquare.com to obtain a token.
+
+Config keys used (config.yaml):
+    download.foursquare.release_date        — pinned release (null = auto-detect)
+    download.foursquare.catalog_uri         — REST catalog endpoint URL
+    download.foursquare.catalog_warehouse   — warehouse name ("places")
+    download.foursquare.catalog_namespace   — namespace ("datasets")
+    download.foursquare.places_table        — places table name ("places_os")
+    download.foursquare.categories_table    — categories table name ("categories_os")
+    download.foursquare.token_env_var       — env var name for the portal token
+    download.foursquare.l1_category_names   — L1 category filter list
+    directories.snapshot_foursquare         — output directory
+
+Output file:
+    foursquare_snapshot.parquet — GeoParquet with ~8.3M US POIs
+        Columns: fsq_place_id, name, fsq_category_ids, geometry, source
 """
 from config_versioned import Config
 from openpois.io.foursquare import download_foursquare_snapshot
 
@@ -1,10 +1,32 @@
 """
-PyTorch model testing
+Fit an empirical Bayes PyTorch model for OSM POI tag change rates.
 
-Created February 12, 2026
-Purpose: Explore a simple empirical Bayes PyTorch model framework for change data
+Reads osm_observations_{tag_key}.csv and fits a Poisson change-rate model
+using L-BFGS optimization via PyTorch. The model estimates a per-group change
+rate λ (events per year). Predictions give the probability that a tag remains
+unchanged after t years for t = 0.0, 0.1, ..., 10.0. Supports constant and
+random-effects (by type) model specifications.
 
-Reads data prepared in `osm/format_tabular.py`
+Config keys used (config.yaml):
+    directories.osm_data                    — input data directory
+    directories.model_output                — output directory for results
+    osm_turnover_model.tag_key              — tag key to model (e.g. "amenity")
+    osm_turnover_model.group_key            — column to group by (null = constant)
+    osm_turnover_model.group_values         — subset of group values (null = all)
+    osm_turnover_model.min_value_count      — minimum observations to include a group
+    osm_turnover_model.model_type           — "constant" or "random_by_type"
+    osm_turnover_model.var_prior            — prior variance on log(λ)
+    osm_turnover_model.n_draws              — number of posterior parameter draws
+    osm_turnover_model.save_full_model      — save param_draws and serialized model
+
+Prerequisites:
+    Run osm_data/format_tabular.py first.
+
+Output files (in model_output directory):
+    fitted_params.csv   — estimated λ with uncertainty per group
+    predictions.csv     — p(unchanged) at t = 0.0..10.0 years per group
+    param_draws.csv     — posterior draws (if save_full_model = true)
+    fitted_model.pt     — serialized ModelFitter (if save_full_model = true)
 """
 
 import numpy as np
 
@@ -1,9 +1,27 @@
 """
-Exploratory data viz script for OSM observations.
-
-This script:
-1. Reads in the OSM observations from a CSV file.
-2. Creates time series plots of the observations, showing how many remain open over time.
+Plot OSM tag stability curves from observation data.
+
+Reads osm_observations_{tag_key}.csv and computes Kaplan-Meier-style survival
+estimates showing what fraction of tag assignments remain unchanged over time.
+Saves two types of PNG figures:
+    1. Overall stability curve — all tags pooled into a single panel.
+    2. Per-subtype multi-panel curves — top-N values for each key in
+       download_keys, shown as separate facets on one figure per key.
+
+Config keys used (config.yaml):
+    directories.osm_data           — directory containing input CSV and viz/ output
+    download.download_keys         — tag keys used as grouping variables for subplots
+    osm_data.tag_key               — the tag being analysed (e.g. "amenity")
+    osm_data.timestamp_cols        — columns to parse as timestamps (rows with nulls dropped)
+    osm_data.top_n_types           — number of top subtype values per multi-panel figure
+    download.osm.end_date          — right-censoring date for still-unchanged tags
+
+Prerequisites:
+    Run osm_data/format_tabular.py first.
+
+Output files (in osm_data/viz/):
+    osm_changes_{tag_key}_all.png             — overall survival curve
+    osm_changes_{tag_key}_{key}.png           — per-subtype facet grid, one per key
 """
 
 import numpy as np
 
@@ -1,10 +1,24 @@
 """
-Exploratory script for downloading OSM data.
+Download OSM element change histories for a configured bbox and date range.
 
-This script uses the openpois.osm_data module to:
-1. Collect element IDs across a date range using the Overpass API.
-2. Download element histories from the OSM API.
-3. Save the results to CSV files.
+Uses the Overpass API to collect element IDs for each configured tag key across
+a series of snapshot dates, then fetches the full version history of each
+element via the OSM API.
+
+Config keys used (config.yaml):
+    download.general.bbox            — WGS-84 bbox [xmin, ymin, xmax, ymax]
+    download.general.timeout         — request timeout in seconds
+    download.osm.start_date          — earliest snapshot date (min: 2012-09-13)
+    download.osm.end_date            — latest snapshot date
+    download.osm.date_interval_days  — spacing between Overpass queries in days
+    download.download_keys           — OSM tag keys to search for (e.g. amenity, shop)
+    directories.osm_data             — output directory
+
+Output files (in osm_data directory):
+    osm_elements.csv          — element IDs observed at each snapshot date
+    osm_versions.csv          — one row per element version with all tag fields
+    osm_changes.csv           — one row per version pair flagging tag changes
+    osm_failed_elements.csv   — elements whose history could not be retrieved
 """
 import datetime
 from config_versioned import Config
 
@@ -1,11 +1,24 @@
 """
-Exploratory script for reformatting OSM data into a tabular format.
-
-This script:
-1. Reads in the OSM versions and changes data from CSV files.
-2. Reconfigures POI changesets into 'observations', which are either changes to the
-   relevant POI tag or confirmation that the tag is unchanged.
-3. Saves the observations to a new CSV file.
+Reformat raw OSM version histories into modelling-ready observations.
+
+Reads osm_versions.csv and osm_changes.csv produced by osm_data/download.py,
+then converts them into an observation-per-version format suitable for the
+change-rate model. Each observation records the tag value, the timestamps of
+the previous tag assignment and the current observation, and a flag for whether
+the tag changed.
+
+Config keys used (config.yaml):
+    directories.osm_data     — directory containing input and output CSVs
+    download.download_keys   — all tag keys collected (passed as keep_keys)
+    osm_data.tag_key         — single tag key to model (e.g. "amenity")
+
+Prerequisites:
+    Run osm_data/download.py first to produce osm_versions.csv and osm_changes.csv.
+
+Output file (in osm_data directory):
+    osm_observations_{tag_key}.csv — one row per version observation with columns:
+        id, version, tag_key, last_tag_timestamp, obs_timestamp, changed,
+        plus all keep_keys columns for grouping
 """
 
 import pandas as pd