Skip to content

Commit eb81739

Browse files
authored
Update US release bundle (#356)
* Update US release bundle * Fix data release manifest TRO hashing
1 parent 5946686 commit eb81739

14 files changed

Lines changed: 197 additions & 79 deletions

File tree

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Update the bundled US release to policyengine-us 1.691.3 and policyengine-us-data 1.113.1.

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ uk = [
4646
]
4747
us = [
4848
"policyengine_core>=3.26.1",
49-
"policyengine-us==1.690.7",
49+
"policyengine-us==1.691.3",
5050
]
5151
dev = [
5252
"pytest",
@@ -61,7 +61,7 @@ dev = [
6161
"ruff>=0.9.0",
6262
"policyengine_core>=3.26.1",
6363
"policyengine-uk==2.88.14",
64-
"policyengine-us==1.690.7",
64+
"policyengine-us==1.691.3",
6565
"towncrier>=24.8.0",
6666
"mypy>=1.11.0",
6767
"pytest-cov>=5.0.0",

src/policyengine/data/release_manifests/us.json

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,34 +5,34 @@
55
"policyengine_version": "4.4.3",
66
"model_package": {
77
"name": "policyengine-us",
8-
"version": "1.690.7",
9-
"sha256": "5a7a541efabac98fa069d6845902cf5924c81db67383234b55dcd2b8bfcfc3ca",
10-
"wheel_url": "https://files.pythonhosted.org/packages/2a/02/52109bae5f4767237b43bd72ce0bc4edf7925650a788053b2bc168caa5ae/policyengine_us-1.690.7-py3-none-any.whl"
8+
"version": "1.691.3",
9+
"sha256": "c5d37aa4442f23d48bd5d587a02876c89d83c6135809f12988cc39bd3a47e8b2",
10+
"wheel_url": "https://files.pythonhosted.org/packages/2a/03/e21c872664f90dcc99f1fcf29d1da71409c50cf8a7798ff0596ad10d9400/policyengine_us-1.691.3-py3-none-any.whl"
1111
},
1212
"data_package": {
1313
"name": "policyengine-us-data",
14-
"version": "1.110.12",
14+
"version": "1.113.1",
1515
"repo_id": "policyengine/policyengine-us-data",
16-
"release_manifest_path": "releases/1.110.12/release_manifest.json",
17-
"release_manifest_revision": "3aac4505ec10d31efc1b3799a1e6458a15853ecc"
16+
"release_manifest_path": "releases/1.113.1/release_manifest.json",
17+
"release_manifest_revision": "99e0ec7e784cdba43dd21ff1d80a081599a7a537"
1818
},
1919
"certified_data_artifact": {
2020
"data_package": {
2121
"name": "policyengine-us-data",
22-
"version": "1.110.12"
22+
"version": "1.113.1"
2323
},
24-
"build_id": "policyengine-us-data-1.110.12",
24+
"build_id": "policyengine-us-data-1.113.1",
2525
"dataset": "enhanced_cps_2024",
26-
"uri": "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.110.12",
27-
"sha256": "58a6639f7511b8d804701417e2647f0c3a77f51a3d90441037eaf004b1f00761"
26+
"uri": "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@99e0ec7e784cdba43dd21ff1d80a081599a7a537",
27+
"sha256": "0ce549af18753287c097718362b8cd5cdccfc47953acf7f282709d604cf314d2"
2828
},
2929
"certification": {
3030
"compatibility_basis": "exact_build_model_version",
31-
"data_build_id": "policyengine-us-data-1.110.12",
32-
"built_with_model_version": "1.690.7",
33-
"certified_for_model_version": "1.690.7",
31+
"data_build_id": "policyengine-us-data-1.113.1",
32+
"built_with_model_version": "1.691.3",
33+
"certified_for_model_version": "1.691.3",
3434
"certified_by": "policyengine.py bundled manifest",
35-
"data_build_fingerprint": "sha256:9961ed1c5d00943a360724da560eee425eb9f99f91896f053dca74724c46e96e"
35+
"data_build_fingerprint": "sha256:d891044ece8ec3338904771c879b98ec11a12f1090c074e5b8cee846825d8056"
3636
},
3737
"default_dataset": "enhanced_cps_2024",
3838
"datasets": {

src/policyengine/data/release_manifests/us.trace.tro.jsonld

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
"schema:name": "PolicyEngine",
1818
"schema:url": "https://policyengine.org"
1919
},
20-
"schema:dateCreated": "2026-05-11T18:53:05.508006Z",
20+
"schema:dateCreated": "2026-05-13T03:56:37.150215Z",
2121
"schema:description": "TRACE TRO for certified runtime bundle us-4.4.3 covering the bundle manifest, the certified dataset artifact, the country model wheel, and the country data release manifest when it is available.",
2222
"schema:name": "policyengine us certified bundle TRO",
2323
"trov:createdWith": {
@@ -45,23 +45,23 @@
4545
"trov:hasArtifact": {
4646
"@id": "composition/1/artifact/data_release_manifest"
4747
},
48-
"trov:hasLocation": "https://huggingface.co/policyengine/policyengine-us-data/resolve/3aac4505ec10d31efc1b3799a1e6458a15853ecc/releases/1.110.12/release_manifest.json"
48+
"trov:hasLocation": "https://huggingface.co/policyengine/policyengine-us-data/resolve/99e0ec7e784cdba43dd21ff1d80a081599a7a537/releases/1.113.1/release_manifest.json"
4949
},
5050
{
5151
"@id": "arrangement/1/location/dataset",
5252
"@type": "trov:ArtifactLocation",
5353
"trov:hasArtifact": {
5454
"@id": "composition/1/artifact/dataset"
5555
},
56-
"trov:hasLocation": "https://huggingface.co/policyengine/policyengine-us-data/resolve/1.110.12/enhanced_cps_2024.h5"
56+
"trov:hasLocation": "https://huggingface.co/policyengine/policyengine-us-data/resolve/99e0ec7e784cdba43dd21ff1d80a081599a7a537/enhanced_cps_2024.h5"
5757
},
5858
{
5959
"@id": "arrangement/1/location/model_wheel",
6060
"@type": "trov:ArtifactLocation",
6161
"trov:hasArtifact": {
6262
"@id": "composition/1/artifact/model_wheel"
6363
},
64-
"trov:hasLocation": "https://files.pythonhosted.org/packages/2a/02/52109bae5f4767237b43bd72ce0bc4edf7925650a788053b2bc168caa5ae/policyengine_us-1.690.7-py3-none-any.whl"
64+
"trov:hasLocation": "https://files.pythonhosted.org/packages/2a/03/e21c872664f90dcc99f1fcf29d1da71409c50cf8a7798ff0596ad10d9400/policyengine_us-1.691.3-py3-none-any.whl"
6565
}
6666
]
6767
}
@@ -75,54 +75,51 @@
7575
"@type": "trov:ResearchArtifact",
7676
"schema:name": "policyengine.py bundle manifest for us",
7777
"trov:mimeType": "application/json",
78-
"trov:sha256": "41e196a6263b8168d403058029c52ebab795e17024ac9ebef11ff876e36959e2"
78+
"trov:sha256": "67d4d6505bed4af9bf2ec575d8b037e36be71b2f9a5afa9bb8cc695ec7a1e913"
7979
},
8080
{
8181
"@id": "composition/1/artifact/data_release_manifest",
8282
"@type": "trov:ResearchArtifact",
83-
"schema:name": "policyengine-us-data release manifest 1.110.12",
83+
"schema:name": "policyengine-us-data release manifest 1.113.1",
8484
"trov:mimeType": "application/json",
85-
"trov:sha256": "17cfd2fbb31064834ed82c0fd7d8ae5c272fe7f24b1e48b226a4acf97ff4c5dd"
85+
"trov:sha256": "d6b29ceff0cbf6a5cff4de94362ebc533dc5044c6a4155a46da7143140a8cb5f"
8686
},
8787
{
8888
"@id": "composition/1/artifact/dataset",
8989
"@type": "trov:ResearchArtifact",
9090
"schema:name": "enhanced_cps_2024",
9191
"trov:mimeType": "application/x-hdf5",
92-
"trov:sha256": "58a6639f7511b8d804701417e2647f0c3a77f51a3d90441037eaf004b1f00761"
92+
"trov:sha256": "0ce549af18753287c097718362b8cd5cdccfc47953acf7f282709d604cf314d2"
9393
},
9494
{
9595
"@id": "composition/1/artifact/model_wheel",
9696
"@type": "trov:ResearchArtifact",
97-
"schema:name": "policyengine-us==1.690.7 wheel",
97+
"schema:name": "policyengine-us==1.691.3 wheel",
9898
"trov:mimeType": "application/zip",
99-
"trov:sha256": "5a7a541efabac98fa069d6845902cf5924c81db67383234b55dcd2b8bfcfc3ca"
99+
"trov:sha256": "c5d37aa4442f23d48bd5d587a02876c89d83c6135809f12988cc39bd3a47e8b2"
100100
}
101101
],
102102
"trov:hasFingerprint": {
103103
"@id": "composition/1/fingerprint",
104104
"@type": "trov:CompositionFingerprint",
105-
"trov:sha256": "b84e895b3f19ffee5ec299b94ae2155448a12af2b3dc61d00f3d17003ecdf14a"
105+
"trov:sha256": "316e373ed13360efa12037200719c9621ce7bced6d80acc4dfa7bbb72962892f"
106106
}
107107
},
108108
"trov:hasPerformance": {
109109
"@id": "trp/1",
110110
"@type": "trov:TransparentResearchPerformance",
111-
"pe:builtWithModelVersion": "1.690.7",
111+
"pe:builtWithModelVersion": "1.691.3",
112112
"pe:certifiedBy": "policyengine.py bundled manifest",
113-
"pe:certifiedForModelVersion": "1.690.7",
114-
"pe:ciGitRef": "refs/heads/main",
115-
"pe:ciGitSha": "1718b493e4749faf62f0ffdff480205abdd20011",
116-
"pe:ciRunUrl": "https://github.com/PolicyEngine/policyengine.py/actions/runs/25692889721",
113+
"pe:certifiedForModelVersion": "1.691.3",
117114
"pe:compatibilityBasis": "exact_build_model_version",
118-
"pe:dataBuildFingerprint": "sha256:9961ed1c5d00943a360724da560eee425eb9f99f91896f053dca74724c46e96e",
119-
"pe:dataBuildId": "policyengine-us-data-1.110.12",
120-
"pe:emittedIn": "github-actions",
121-
"rdfs:comment": "Certification of build policyengine-us-data-1.110.12 for policyengine-us 1.690.7.",
115+
"pe:dataBuildFingerprint": "sha256:d891044ece8ec3338904771c879b98ec11a12f1090c074e5b8cee846825d8056",
116+
"pe:dataBuildId": "policyengine-us-data-1.113.1",
117+
"pe:emittedIn": "local",
118+
"rdfs:comment": "Certification of build policyengine-us-data-1.113.1 for policyengine-us 1.691.3.",
122119
"trov:accessedArrangement": {
123120
"@id": "arrangement/1"
124121
},
125-
"trov:startedAtTime": "2026-05-11T18:53:05.508006Z",
122+
"trov:startedAtTime": "2026-05-13T03:56:37.150215Z",
126123
"trov:wasConductedBy": {
127124
"@id": "trs"
128125
}

src/policyengine/provenance/bundle.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,13 @@ def refresh_release_bundle(
395395
dataset_repo_id = data_artifact_json.get("repo_id", repo_id)
396396
dataset_path = data_artifact_json.get("path", dataset_path)
397397
dataset_revision = data_artifact_json.get("revision", new_data)
398+
if (
399+
release_manifest_json is not None
400+
and new_release_manifest_revision is not None
401+
and dataset_repo_id == repo_id
402+
and dataset_revision == new_data
403+
):
404+
dataset_revision = new_release_manifest_revision
398405

399406
# Only hit HF if the data version actually changed.
400407
if new_data != old_data:

src/policyengine/provenance/manifest.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import hashlib
12
import os
23
from functools import lru_cache
34
from importlib import import_module
@@ -127,6 +128,8 @@ class DataReleaseManifest(BaseModel):
127128
can enclose the full set of artifacts published together). Distinct
128129
from per-artifact DOIs on ``DataReleaseArtifact.preservation_mirrors``.
129130
Populated when the release pipeline mirrors to a DOI-minting host."""
131+
source_sha256: Optional[str] = Field(default=None, exclude=True)
132+
"""Byte sha256 of the fetched manifest before runtime URI rewrites."""
130133

131134

132135
class DataCertification(BaseModel):
@@ -180,9 +183,13 @@ def https_dataset_uri(repo_id: str, path_in_repo: str, revision: str) -> str:
180183
return f"https://huggingface.co/{repo_id}/resolve/{revision}/{path_in_repo}"
181184

182185

186+
def _artifact_revision(data_package: "DataPackageVersion") -> str:
187+
return data_package.release_manifest_revision or data_package.version
188+
189+
183190
def https_release_manifest_uri(data_package: "DataPackageVersion") -> str:
184191
"""Return a dereferenceable HTTPS URI for a data release manifest."""
185-
revision = data_package.release_manifest_revision or data_package.version
192+
revision = _artifact_revision(data_package)
186193
return (
187194
f"https://huggingface.co/{data_package.repo_id}/resolve/"
188195
f"{revision}/{data_package.release_manifest_path}"
@@ -267,7 +274,20 @@ def get_data_release_manifest(country_id: str) -> DataReleaseManifest:
267274
raise DataReleaseManifestUnavailableError(
268275
"Could not fetch the data release manifest from Hugging Face."
269276
) from exc
270-
return DataReleaseManifest.model_validate_json(response.text)
277+
data_release_manifest = DataReleaseManifest.model_validate_json(response.text)
278+
source_bytes = response.content
279+
if not isinstance(source_bytes, bytes):
280+
source_bytes = response.text.encode("utf-8")
281+
data_release_manifest.source_sha256 = hashlib.sha256(source_bytes).hexdigest()
282+
release_revision = country_manifest.data_package.release_manifest_revision
283+
if release_revision is not None:
284+
for artifact in data_release_manifest.artifacts.values():
285+
if (
286+
artifact.repo_id == country_manifest.data_package.repo_id
287+
and artifact.revision == country_manifest.data_package.version
288+
):
289+
artifact.revision = release_revision
290+
return data_release_manifest
271291

272292

273293
def _specifier_matches(version: str, specifier: str) -> bool:
@@ -404,7 +424,7 @@ def resolve_dataset_reference(country_id: str, dataset: str) -> str:
404424
return build_hf_uri(
405425
repo_id=manifest.data_package.repo_id,
406426
path_in_repo=path_reference.path,
407-
revision=manifest.data_package.version,
427+
revision=_artifact_revision(manifest.data_package),
408428
)
409429

410430
data_release_manifest = get_data_release_manifest(country_id)
@@ -525,5 +545,5 @@ def resolve_region_dataset_path(
525545
return build_hf_uri(
526546
repo_id=manifest.data_package.repo_id,
527547
path_in_repo=resolved_path,
528-
revision=manifest.data_package.version,
548+
revision=_artifact_revision(manifest.data_package),
529549
)

src/policyengine/provenance/trace.py

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -353,21 +353,14 @@ def build_trace_tro_from_release_bundle(
353353
if data_release_manifest is not None
354354
else None
355355
)
356-
dataset_location = (
357-
https_dataset_uri(
358-
repo_id=dataset_artifact.repo_id,
359-
path_in_repo=dataset_artifact.path,
360-
revision=dataset_artifact.revision,
361-
)
362-
if dataset_artifact is not None
363-
else _dataset_location_from_uri(certified_artifact.uri)
364-
)
356+
dataset_location = _dataset_location_from_uri(certified_artifact.uri)
365357

366358
bundle_manifest_hash = hashlib.sha256(
367359
canonical_json_bytes(country_manifest.model_dump(mode="json"))
368360
).hexdigest()
369361
data_release_manifest_hash = (
370-
hashlib.sha256(
362+
data_release_manifest.source_sha256
363+
or hashlib.sha256(
371364
canonical_json_bytes(data_release_manifest.model_dump(mode="json"))
372365
).hexdigest()
373366
if data_release_manifest is not None

tests/fixtures/household_calculator_snapshots/us_model_surface.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"has_income_tax": true,
66
"has_region_registry": true,
77
"model_package_name": "policyengine-us",
8-
"num_parameters_bucketed_100s": 851,
8+
"num_parameters_bucketed_100s": 852,
99
"num_variables_bucketed_100s": 48,
1010
"region_registry_country": "us"
1111
}

tests/test_bundle_refresh.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,10 @@ def fake_urlopen(request, *args, **kwargs):
306306
written["data_package"]["release_manifest_revision"]
307307
== "release-manifest-commit-sha"
308308
)
309+
assert (
310+
written["certified_data_artifact"]["uri"]
311+
== "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@release-manifest-commit-sha"
312+
)
309313

310314

311315
def test__bump_data_only_falls_back_to_main_for_release_manifest(
@@ -337,6 +341,10 @@ def fake_urlopen(request, *args, **kwargs):
337341
written["data_package"]["release_manifest_revision"]
338342
== "release-manifest-commit-sha"
339343
)
344+
assert (
345+
written["certified_data_artifact"]["uri"]
346+
== "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@release-manifest-commit-sha"
347+
)
340348

341349

342350
def test__release_manifest_version_mismatch_raises(sandbox) -> None:

tests/test_models.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,12 +113,12 @@ def test_has_release_manifest_metadata(self):
113113
assert us_latest.release_manifest is not None
114114
assert us_latest.release_manifest.country_id == "us"
115115
assert us_latest.model_package.name == "policyengine-us"
116-
assert us_latest.model_package.version == "1.690.7"
116+
assert us_latest.model_package.version == "1.691.3"
117117
assert us_latest.data_package.name == "policyengine-us-data"
118-
assert us_latest.data_package.version == "1.110.12"
118+
assert us_latest.data_package.version == "1.113.1"
119119
assert (
120120
us_latest.default_dataset_uri
121-
== "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.110.12"
121+
== "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@99e0ec7e784cdba43dd21ff1d80a081599a7a537"
122122
)
123123

124124
def test_has_hundreds_of_parameters(self):

0 commit comments

Comments
 (0)