3030import logging
3131import os
3232import sys
33+ from datetime import UTC , datetime
3334from pathlib import Path
3435from typing import Optional
3536
4445 create_target_groups ,
4546)
4647from policyengine_us_data .pipeline_metadata import pipeline_node
48+ from policyengine_us_data .stage_contracts .calibration_package import (
49+ CalibrationPackageParameters ,
50+ )
4751from policyengine_us_data .pipeline_schema import PipelineNode
4852
4953logging .basicConfig (
7175DEFAULT_TARGET_CONFIG_PATH = Path (__file__ ).resolve ().parent / "target_config.yaml"
7276
7377
78+ def _utc_now_isoformat () -> str :
79+ """Return a compact UTC timestamp for contract metadata."""
80+
81+ return datetime .now (UTC ).isoformat ().replace ("+00:00" , "Z" )
82+
83+
84+ def _calibration_package_contract_parameters (
85+ * ,
86+ workers : int ,
87+ n_clones : int ,
88+ target_config_path : str | None ,
89+ skip_county : bool ,
90+ skip_source_impute : bool ,
91+ skip_takeup_rerandomize : bool ,
92+ chunked_matrix : bool ,
93+ chunk_size : int ,
94+ parallel : bool ,
95+ num_matrix_workers : int ,
96+ ) -> CalibrationPackageParameters :
97+ """Return Stage 2 parameters that affect package construction."""
98+
99+ return CalibrationPackageParameters .from_runtime_args (
100+ workers = workers ,
101+ n_clones = n_clones ,
102+ target_config_path = target_config_path ,
103+ skip_county = skip_county ,
104+ skip_source_impute = skip_source_impute ,
105+ skip_takeup_rerandomize = skip_takeup_rerandomize ,
106+ chunked_matrix = chunked_matrix ,
107+ chunk_size = chunk_size ,
108+ parallel = parallel ,
109+ num_matrix_workers = num_matrix_workers ,
110+ )
111+
112+
74113def get_git_provenance () -> dict :
75114 """Capture git state and package version for provenance tracking."""
76115 import subprocess as _sp
@@ -152,7 +191,11 @@ def check_package_staleness(metadata: dict) -> None:
152191 if created :
153192 try :
154193 built_dt = datetime .datetime .fromisoformat (created )
155- age = datetime .datetime .now () - built_dt
194+ if built_dt .tzinfo is None :
195+ built_dt = built_dt .replace (tzinfo = datetime .UTC )
196+ age = datetime .datetime .now (datetime .UTC ) - built_dt .astimezone (
197+ datetime .UTC
198+ )
156199 if age .days > 7 :
157200 print (f"WARNING: Package is { age .days } days old (built { created } )" )
158201 except Exception :
@@ -1303,6 +1346,7 @@ def run_calibration(
13031346 """
13041347 import time
13051348
1349+ started_at = _utc_now_isoformat ()
13061350 t0 = time .time ()
13071351
13081352 # Early exit: load pre-built package
@@ -1547,16 +1591,14 @@ def run_calibration(
15471591 # Step 6b: Save the calibration package. By default this is the
15481592 # minimal package selected by target_config.yaml; use
15491593 # --all-active-targets to build a broad diagnostic package.
1550- import datetime
1551-
15521594 metadata = {
15531595 "dataset_path" : dataset_path ,
15541596 "db_path" : db_path ,
15551597 "n_clones" : n_clones ,
15561598 "n_records" : X_sparse .shape [1 ],
15571599 "base_n_records" : n_records ,
15581600 "seed" : seed ,
1559- "created_at" : datetime . datetime . now (). isoformat (),
1601+ "created_at" : _utc_now_isoformat (),
15601602 "target_config_path" : target_config_path ,
15611603 "package_scope" : "minimal" if target_config else "all_active_targets" ,
15621604 "matrix_builder" : "chunked" if chunked_matrix else "precompute" ,
@@ -1573,20 +1615,63 @@ def run_calibration(
15731615 Path (target_config_path )
15741616 )
15751617
1618+ initial_weights = compute_initial_weights (X_sparse , targets_df )
15761619 if package_output_path :
1577- full_initial_weights = compute_initial_weights (X_sparse , targets_df )
1620+ package_payload = {
1621+ "X_sparse" : X_sparse ,
1622+ "targets_df" : targets_df ,
1623+ "target_names" : target_names ,
1624+ "metadata" : metadata ,
1625+ "initial_weights" : initial_weights ,
1626+ "cd_geoid" : geography .cd_geoid ,
1627+ "block_geoid" : geography .block_geoid ,
1628+ }
15781629 save_calibration_package (
15791630 package_output_path ,
15801631 X_sparse ,
15811632 targets_df ,
15821633 target_names ,
15831634 metadata ,
1584- initial_weights = full_initial_weights ,
1635+ initial_weights = initial_weights ,
15851636 cd_geoid = geography .cd_geoid ,
15861637 block_geoid = geography .block_geoid ,
15871638 )
1639+ from policyengine_us_data .stage_contracts .calibration_package import (
1640+ validate_calibration_package_contract ,
1641+ write_calibration_package_contract ,
1642+ )
15881643
1589- initial_weights = compute_initial_weights (X_sparse , targets_df )
1644+ completed_at = _utc_now_isoformat ()
1645+ write_calibration_package_contract (
1646+ package_path = Path (package_output_path ),
1647+ dataset_path = Path (dataset_path ),
1648+ db_path = Path (db_path ),
1649+ package = package_payload ,
1650+ parameters = _calibration_package_contract_parameters (
1651+ workers = workers ,
1652+ n_clones = n_clones ,
1653+ target_config_path = target_config_path ,
1654+ skip_county = skip_county ,
1655+ skip_source_impute = skip_source_impute ,
1656+ skip_takeup_rerandomize = skip_takeup_rerandomize ,
1657+ chunked_matrix = chunked_matrix ,
1658+ chunk_size = chunk_size ,
1659+ parallel = parallel ,
1660+ num_matrix_workers = num_matrix_workers ,
1661+ ),
1662+ run_id = run_id ,
1663+ started_at = started_at ,
1664+ completed_at = completed_at ,
1665+ duration_s = round (time .time () - t0 , 1 ),
1666+ code_sha = metadata .get ("git_commit" ),
1667+ package_version = metadata .get ("package_version" ),
1668+ )
1669+ validate_calibration_package_contract (
1670+ package_path = Path (package_output_path ),
1671+ package = package_payload ,
1672+ dataset_path = Path (dataset_path ),
1673+ db_path = Path (db_path ),
1674+ )
15901675
15911676 if build_only :
15921677 from policyengine_us_data .calibration .validate_package import (
0 commit comments