|
28 | 28 | if _p not in sys.path: |
29 | 29 | sys.path.insert(0, _p) |
30 | 30 |
|
31 | | -from modal_app.images import cpu_image as image |
32 | | -from modal_app.resilience import reconcile_run_dir_fingerprint |
| 31 | +from modal_app.images import cpu_image as image # noqa: E402 |
| 32 | +from modal_app.resilience import reconcile_run_dir_fingerprint # noqa: E402 |
33 | 33 |
|
34 | 34 | app = modal.App("policyengine-us-data-local-area") |
35 | 35 |
|
@@ -70,6 +70,170 @@ def setup_repo(branch: str): |
70 | 70 | os.chdir("/root/policyengine-us-data") |
71 | 71 |
|
72 | 72 |
|
| 73 | +def _build_promote_national_publish_script( |
| 74 | + *, |
| 75 | + version: str, |
| 76 | + run_id: str, |
| 77 | + rel_paths: list[str], |
| 78 | +) -> str: |
| 79 | + rel_paths_json = json.dumps(rel_paths) |
| 80 | + return f""" |
| 81 | +import json |
| 82 | +from pathlib import Path |
| 83 | +from policyengine_us_data.utils.data_upload import ( |
| 84 | + promote_staging_to_production_hf, |
| 85 | + cleanup_staging_hf, |
| 86 | + upload_local_area_file, |
| 87 | + publish_release_manifest_to_hf, |
| 88 | + should_finalize_local_area_release, |
| 89 | +) |
| 90 | +from policyengine_us_data.utils.version_manifest import ( |
| 91 | + HFVersionInfo, |
| 92 | + build_manifest, |
| 93 | + upload_manifest, |
| 94 | +) |
| 95 | +
|
| 96 | +version = "{version}" |
| 97 | +run_id = "{run_id}" |
| 98 | +rel_paths = json.loads('''{rel_paths_json}''') |
| 99 | +run_dir = Path("{VOLUME_MOUNT}") / run_id |
| 100 | +
|
| 101 | +print(f"Promoting national H5 from staging to production (run_id={{run_id!r}})...") |
| 102 | +promoted = promote_staging_to_production_hf(rel_paths, version, run_id=run_id) |
| 103 | +print(f"Promoted {{promoted}} files to HuggingFace production") |
| 104 | +
|
| 105 | +national_h5 = run_dir / "national" / "US.h5" |
| 106 | +if national_h5.exists(): |
| 107 | + print("Uploading national H5 to GCS...") |
| 108 | + upload_local_area_file( |
| 109 | + str(national_h5), "national", version=version, skip_hf=True |
| 110 | + ) |
| 111 | + print("Uploaded national H5 to GCS") |
| 112 | +else: |
| 113 | + raise RuntimeError(f"Expected national H5 at {{national_h5}}") |
| 114 | +
|
| 115 | +print("Updating release manifest...") |
| 116 | +should_finalize, missing_prefixes = should_finalize_local_area_release( |
| 117 | + version=version, |
| 118 | + new_repo_paths=["national/US.h5"], |
| 119 | +) |
| 120 | +manifest = publish_release_manifest_to_hf( |
| 121 | + [(national_h5, "national/US.h5")], |
| 122 | + version=version, |
| 123 | + create_tag=should_finalize, |
| 124 | +) |
| 125 | +if should_finalize: |
| 126 | + upload_manifest( |
| 127 | + build_manifest( |
| 128 | + version=version, |
| 129 | + blob_names=sorted( |
| 130 | + artifact["path"] for artifact in manifest["artifacts"].values() |
| 131 | + ), |
| 132 | + hf_info=HFVersionInfo( |
| 133 | + repo="policyengine/policyengine-us-data", |
| 134 | + commit=version, |
| 135 | + ), |
| 136 | + ) |
| 137 | + ) |
| 138 | + print("Updated release manifest and created tag") |
| 139 | +else: |
| 140 | + print( |
| 141 | + "Updated release manifest without creating a tag; " |
| 142 | + f"missing prefixes: {{', '.join(missing_prefixes)}}" |
| 143 | + ) |
| 144 | +
|
| 145 | +print("Cleaning up staging...") |
| 146 | +cleaned = cleanup_staging_hf(rel_paths, version, run_id=run_id) |
| 147 | +print(f"Cleaned up {{cleaned}} files from staging") |
| 148 | +print(f"Successfully promoted national H5 for version {{version}}") |
| 149 | +""" |
| 150 | + |
| 151 | + |
| 152 | +def _build_promote_publish_script( |
| 153 | + *, |
| 154 | + version: str, |
| 155 | + run_id: str, |
| 156 | + rel_paths: list[str], |
| 157 | +) -> str: |
| 158 | + rel_paths_json = json.dumps(rel_paths) |
| 159 | + return f""" |
| 160 | +import json |
| 161 | +from pathlib import Path |
| 162 | +from policyengine_us_data.utils.data_upload import ( |
| 163 | + promote_staging_to_production_hf, |
| 164 | + cleanup_staging_hf, |
| 165 | + upload_local_area_file, |
| 166 | + publish_release_manifest_to_hf, |
| 167 | + should_finalize_local_area_release, |
| 168 | +) |
| 169 | +from policyengine_us_data.utils.version_manifest import ( |
| 170 | + HFVersionInfo, |
| 171 | + build_manifest, |
| 172 | + upload_manifest, |
| 173 | +) |
| 174 | +
|
| 175 | +rel_paths = json.loads('''{rel_paths_json}''') |
| 176 | +version = "{version}" |
| 177 | +run_id = "{run_id}" |
| 178 | +run_dir = Path("{VOLUME_MOUNT}") / run_id |
| 179 | +
|
| 180 | +print(f"Promoting {{len(rel_paths)}} files from staging/ to production (run_id={{run_id!r}})...") |
| 181 | +promoted = promote_staging_to_production_hf(rel_paths, version, run_id=run_id) |
| 182 | +print(f"Promoted {{promoted}} files to HuggingFace production") |
| 183 | +
|
| 184 | +print(f"Uploading {{len(rel_paths)}} files to GCS...") |
| 185 | +gcs_count = 0 |
| 186 | +for rel_path in rel_paths: |
| 187 | + local_path = run_dir / rel_path |
| 188 | + subdirectory = str(Path(rel_path).parent) |
| 189 | + upload_local_area_file( |
| 190 | + str(local_path), |
| 191 | + subdirectory, |
| 192 | + version=version, |
| 193 | + skip_hf=True, |
| 194 | + ) |
| 195 | + gcs_count += 1 |
| 196 | +print(f"Uploaded {{gcs_count}} files to GCS") |
| 197 | +
|
| 198 | +print("Updating release manifest...") |
| 199 | +should_finalize, missing_prefixes = should_finalize_local_area_release( |
| 200 | + version=version, |
| 201 | + new_repo_paths=rel_paths, |
| 202 | +) |
| 203 | +manifest = publish_release_manifest_to_hf( |
| 204 | + [(run_dir / rel_path, rel_path) for rel_path in rel_paths], |
| 205 | + version=version, |
| 206 | + create_tag=should_finalize, |
| 207 | +) |
| 208 | +if should_finalize: |
| 209 | + upload_manifest( |
| 210 | + build_manifest( |
| 211 | + version=version, |
| 212 | + blob_names=sorted( |
| 213 | + artifact["path"] for artifact in manifest["artifacts"].values() |
| 214 | + ), |
| 215 | + hf_info=HFVersionInfo( |
| 216 | + repo="policyengine/policyengine-us-data", |
| 217 | + commit=version, |
| 218 | + ), |
| 219 | + ) |
| 220 | + ) |
| 221 | + print("Updated release manifest and created tag") |
| 222 | +else: |
| 223 | + print( |
| 224 | + "Updated release manifest without final tag; missing local-area prefixes: " |
| 225 | + + ", ".join(missing_prefixes) |
| 226 | + ) |
| 227 | + print("Deferring version_manifest.json update until release finalization") |
| 228 | +
|
| 229 | +print("Cleaning up staging/...") |
| 230 | +cleaned = cleanup_staging_hf(rel_paths, version, run_id=run_id) |
| 231 | +print(f"Cleaned up {{cleaned}} files from staging/") |
| 232 | +
|
| 233 | +print(f"Successfully published version {{version}}") |
| 234 | +""" |
| 235 | + |
| 236 | + |
73 | 237 | def validate_artifacts( |
74 | 238 | config_path: Path, |
75 | 239 | artifact_dir: Path, |
@@ -556,52 +720,17 @@ def promote_publish(branch: str = "main", version: str = "", run_id: str = "") - |
556 | 720 | with open(manifest_path) as f: |
557 | 721 | manifest = json.load(f) |
558 | 722 |
|
559 | | - rel_paths_json = json.dumps(list(manifest["files"].keys())) |
560 | | - |
561 | 723 | result = subprocess.run( |
562 | 724 | [ |
563 | 725 | "uv", |
564 | 726 | "run", |
565 | 727 | "python", |
566 | 728 | "-c", |
567 | | - f""" |
568 | | -import json |
569 | | -from pathlib import Path |
570 | | -from policyengine_us_data.utils.data_upload import ( |
571 | | - promote_staging_to_production_hf, |
572 | | - cleanup_staging_hf, |
573 | | - upload_local_area_file, |
574 | | -) |
575 | | -
|
576 | | -rel_paths = json.loads('''{rel_paths_json}''') |
577 | | -version = "{version}" |
578 | | -run_id = "{run_id}" |
579 | | -run_dir = Path("{VOLUME_MOUNT}") / run_id |
580 | | -
|
581 | | -print(f"Promoting {{len(rel_paths)}} files from staging/ to production (run_id={{run_id!r}})...") |
582 | | -promoted = promote_staging_to_production_hf(rel_paths, version, run_id=run_id) |
583 | | -print(f"Promoted {{promoted}} files to HuggingFace production") |
584 | | -
|
585 | | -print(f"Uploading {{len(rel_paths)}} files to GCS...") |
586 | | -gcs_count = 0 |
587 | | -for rel_path in rel_paths: |
588 | | - local_path = run_dir / rel_path |
589 | | - subdirectory = str(Path(rel_path).parent) |
590 | | - upload_local_area_file( |
591 | | - str(local_path), |
592 | | - subdirectory, |
593 | | - version=version, |
594 | | - skip_hf=True, |
595 | | - ) |
596 | | - gcs_count += 1 |
597 | | -print(f"Uploaded {{gcs_count}} files to GCS") |
598 | | -
|
599 | | -print("Cleaning up staging/...") |
600 | | -cleaned = cleanup_staging_hf(rel_paths, version, run_id=run_id) |
601 | | -print(f"Cleaned up {{cleaned}} files from staging/") |
602 | | -
|
603 | | -print(f"Successfully published version {{version}}") |
604 | | -""", |
| 729 | + _build_promote_publish_script( |
| 730 | + version=version, |
| 731 | + run_id=run_id, |
| 732 | + rel_paths=list(manifest["files"].keys()), |
| 733 | + ), |
605 | 734 | ], |
606 | 735 | text=True, |
607 | 736 | env=os.environ.copy(), |
@@ -1133,39 +1262,11 @@ def promote_national_publish( |
1133 | 1262 | "run", |
1134 | 1263 | "python", |
1135 | 1264 | "-c", |
1136 | | - f""" |
1137 | | -import json |
1138 | | -from pathlib import Path |
1139 | | -from policyengine_us_data.utils.data_upload import ( |
1140 | | - promote_staging_to_production_hf, |
1141 | | - cleanup_staging_hf, |
1142 | | - upload_local_area_file, |
1143 | | -) |
1144 | | -
|
1145 | | -version = "{version}" |
1146 | | -run_id = "{run_id}" |
1147 | | -rel_paths = {json.dumps(rel_paths)} |
1148 | | -run_dir = Path("{VOLUME_MOUNT}") / run_id |
1149 | | -
|
1150 | | -print(f"Promoting national H5 from staging to production (run_id={{run_id!r}})...") |
1151 | | -promoted = promote_staging_to_production_hf(rel_paths, version, run_id=run_id) |
1152 | | -print(f"Promoted {{promoted}} files to HuggingFace production") |
1153 | | -
|
1154 | | -national_h5 = run_dir / "national" / "US.h5" |
1155 | | -if national_h5.exists(): |
1156 | | - print("Uploading national H5 to GCS...") |
1157 | | - upload_local_area_file( |
1158 | | - str(national_h5), "national", version=version, skip_hf=True |
1159 | | - ) |
1160 | | - print("Uploaded national H5 to GCS") |
1161 | | -else: |
1162 | | - print(f"WARNING: {{national_h5}} not on volume, skipping GCS") |
1163 | | -
|
1164 | | -print("Cleaning up staging...") |
1165 | | -cleaned = cleanup_staging_hf(rel_paths, version, run_id=run_id) |
1166 | | -print(f"Cleaned up {{cleaned}} files from staging") |
1167 | | -print(f"Successfully promoted national H5 for version {{version}}") |
1168 | | -""", |
| 1265 | + _build_promote_national_publish_script( |
| 1266 | + version=version, |
| 1267 | + run_id=run_id, |
| 1268 | + rel_paths=rel_paths, |
| 1269 | + ), |
1169 | 1270 | ], |
1170 | 1271 | text=True, |
1171 | 1272 | env=os.environ.copy(), |
|
0 commit comments