44
55import argparse
66import json
7+ import logging
8+ import sys
79from dataclasses import dataclass , replace
810from datetime import UTC , datetime
911from pathlib import Path
6769
6870DEFAULT_CHECKPOINT_IMPUTATION_ABLATION_EVAL_FRACTION = 0.25
6971MIN_CHECKPOINT_IMPUTATION_ABLATION_HOUSEHOLDS = 8
72+ LOGGER = logging .getLogger (__name__ )
73+
74+
75+ def _root_logger_has_handlers () -> bool :
76+ return bool (logging .getLogger ().handlers )
77+
78+
79+ def _emit_checkpoint_progress (message : str , / , ** context : object ) -> None :
80+ details = ", " .join (
81+ f"{ key } ={ value } "
82+ for key , value in context .items ()
83+ if value is not None and value != ""
84+ )
85+ line = f"{ message } [{ details } ]" if details else message
86+ LOGGER .info (line )
87+ if not LOGGER .handlers and not _root_logger_has_handlers ():
88+ print (line , file = sys .stderr , flush = True )
7089
7190
7291def _resolve_checkpoint_calibration_target_variables (
@@ -1865,6 +1884,14 @@ def run_policyengine_us_data_rebuild_checkpoint(
18651884 "rebuild_profile_expected" : True ,
18661885 ** dict (run_registry_metadata or {}),
18671886 }
1887+ _emit_checkpoint_progress (
1888+ "PE-US-data rebuild checkpoint: starting build" ,
1889+ output_root = Path (output_root ).expanduser (),
1890+ version_id = version_id or "auto" ,
1891+ target_profile = resolved_config .policyengine_target_profile ,
1892+ donor_condition_selection = resolved_config .donor_imputer_condition_selection ,
1893+ providers = "," .join (provider_names ),
1894+ )
18681895
18691896 artifacts = build_and_save_versioned_us_microplex_from_source_providers (
18701897 providers = list (resolved_providers ),
@@ -1889,6 +1916,19 @@ def run_policyengine_us_data_rebuild_checkpoint(
18891916 run_registry_metadata = resolved_registry_metadata ,
18901917 enable_child_tax_unit_agi_drift = True ,
18911918 )
1919+ _emit_checkpoint_progress (
1920+ "PE-US-data rebuild checkpoint: build complete" ,
1921+ artifact_dir = artifacts .artifact_paths .output_dir ,
1922+ frontier_metric = frontier_metric ,
1923+ )
1924+ _emit_checkpoint_progress (
1925+ "PE-US-data rebuild checkpoint: attaching PE evidence" ,
1926+ artifact_dir = artifacts .artifact_paths .output_dir ,
1927+ compute_harness = not defer_policyengine_harness ,
1928+ compute_native_scores = not defer_policyengine_native_score ,
1929+ compute_native_audit = not defer_native_audit ,
1930+ compute_imputation_ablation = not defer_imputation_ablation ,
1931+ )
18921932 evidence = attach_policyengine_us_data_rebuild_checkpoint_evidence (
18931933 artifacts .artifact_paths .output_dir ,
18941934 build_result = artifacts .build_result ,
@@ -1912,11 +1952,21 @@ def run_policyengine_us_data_rebuild_checkpoint(
19121952 run_index_path = run_index_path ,
19131953 run_registry_metadata = resolved_registry_metadata ,
19141954 )
1955+ _emit_checkpoint_progress (
1956+ "PE-US-data rebuild checkpoint: evidence complete" ,
1957+ parity_path = evidence .parity_path ,
1958+ native_audit_path = evidence .native_audit_path ,
1959+ imputation_ablation_path = evidence .imputation_ablation_path ,
1960+ )
19151961 refreshed_artifacts = _load_checkpoint_versioned_artifacts (
19161962 build_result = artifacts .build_result ,
19171963 artifact_root = artifacts .artifact_paths .output_dir ,
19181964 frontier_metric = frontier_metric ,
19191965 )
1966+ _emit_checkpoint_progress (
1967+ "PE-US-data rebuild checkpoint: checkpoint ready" ,
1968+ artifact_dir = refreshed_artifacts .artifact_paths .output_dir ,
1969+ )
19201970 return PEUSDataRebuildCheckpointResult (
19211971 build_config = resolved_config ,
19221972 provider_names = provider_names ,
@@ -1948,6 +1998,7 @@ def main(argv: list[str] | None = None) -> None:
19481998 parser .add_argument ("--calibration-target-profile" )
19491999 parser .add_argument ("--n-synthetic" , type = int , default = 100_000 )
19502000 parser .add_argument ("--random-seed" , type = int , default = 42 )
2001+ parser .add_argument ("--donor-imputer-condition-selection" )
19512002 parser .add_argument ("--cps-source-year" , type = int , default = 2023 )
19522003 parser .add_argument ("--puf-target-year" , type = int )
19532004 parser .add_argument ("--puf-cps-reference-year" , type = int )
@@ -1983,6 +2034,15 @@ def main(argv: list[str] | None = None) -> None:
19832034 parser .add_argument ("--require-policyengine-native-score" , action = "store_true" )
19842035 args = parser .parse_args (argv )
19852036
2037+ config_overrides = {
2038+ "n_synthetic" : int (args .n_synthetic ),
2039+ "random_seed" : int (args .random_seed ),
2040+ }
2041+ if args .donor_imputer_condition_selection is not None :
2042+ config_overrides ["donor_imputer_condition_selection" ] = (
2043+ args .donor_imputer_condition_selection
2044+ )
2045+
19862046 result = run_policyengine_us_data_rebuild_checkpoint (
19872047 output_root = args .output_root ,
19882048 policyengine_baseline_dataset = args .baseline_dataset ,
@@ -1996,10 +2056,7 @@ def main(argv: list[str] | None = None) -> None:
19962056 calibration_target_variables = tuple (args .calibration_target_variable ),
19972057 calibration_target_domains = tuple (args .calibration_target_domain ),
19982058 calibration_target_geo_levels = tuple (args .calibration_target_geo_level ),
1999- config_overrides = {
2000- "n_synthetic" : int (args .n_synthetic ),
2001- "random_seed" : int (args .random_seed ),
2002- },
2059+ config_overrides = config_overrides ,
20032060 cps_source_year = args .cps_source_year ,
20042061 cps_cache_dir = args .cps_cache_dir ,
20052062 cps_download = not args .no_cps_download ,
0 commit comments