Skip to content

Commit 1bed18a

Browse files
baogorekclaude
andcommitted
Remove geography.npz artifact and stacked_dataset_builder.py
Geography is fully deterministic from (n_records, n_clones, seed) via assign_random_geography, so the .npz file was redundant. publish_local_area already regenerates from seed. Removing the artifact and its only consumer (stacked_dataset_builder.py) eliminates a divergent code path that had to stay in sync. The modal_app/worker_script.py still uses load_geography, so the functions remain in clone_and_assign.py for now. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 59b985b commit 1bed18a

2 files changed

Lines changed: 1 addition & 208 deletions

File tree

policyengine_us_data/calibration/stacked_dataset_builder.py

Lines changed: 0 additions & 184 deletions
This file was deleted.

policyengine_us_data/calibration/unified_calibration.py

Lines changed: 1 addition & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1295,29 +1295,7 @@ def main(argv=None):
12951295
logger.info("Weights saved to %s", output_path)
12961296
print(f"OUTPUT_PATH:{output_path}")
12971297

1298-
# Save full geography for local-area pipeline
1299-
from policyengine_us_data.calibration.clone_and_assign import (
1300-
GeographyAssignment,
1301-
save_geography,
1302-
)
1303-
1304-
geography = GeographyAssignment(
1305-
block_geoid=geography_info["block_geoid"],
1306-
cd_geoid=geography_info["cd_geoid"],
1307-
county_fips=np.array([b[:5] for b in geography_info["block_geoid"]]),
1308-
state_fips=np.array(
1309-
[int(b[:2]) for b in geography_info["block_geoid"]],
1310-
dtype=np.int32,
1311-
),
1312-
n_records=geography_info["base_n_records"],
1313-
n_clones=args.n_clones,
1314-
)
1315-
geo_path = output_dir / "geography.npz"
1316-
save_geography(geography, geo_path)
1317-
logger.info("Geography saved to %s", geo_path)
1318-
print(f"GEOGRAPHY_PATH:{geo_path}")
1319-
1320-
# Also save legacy artifacts for backward compatibility
1298+
# Save legacy block artifact for backward compatibility
13211299
blocks_path = output_dir / "stacked_blocks.npy"
13221300
np.save(str(blocks_path), geography_info["block_geoid"])
13231301
logger.info("Blocks saved to %s", blocks_path)
@@ -1369,7 +1347,6 @@ def _sha256(filepath):
13691347
"elapsed_seconds": round(t_end - t_start, 1),
13701348
"artifacts": {
13711349
"calibration_weights.npy": _sha256(output_path),
1372-
"geography.npz": _sha256(geo_path),
13731350
},
13741351
}
13751352
run_config.update(get_git_provenance())

0 commit comments

Comments
 (0)