Update publication candidate

MaxGhenis · MaxGhenis · commit 8967ce71520c · 2026-05-24T11:32:11.000Z
diff --git a/.github/publication_candidates/usdata-gha26360054055-a1/changelog.d/1123.changed.md b/.github/publication_candidates/usdata-gha26360054055-a1/changelog.d/1123.changed.md
diff --git a/.github/publication_candidates/usdata-gha26360054055-a1/publication_scope.json b/.github/publication_candidates/usdata-gha26360054055-a1/publication_scope.json
@@ -0,0 +1,7 @@
+{
+  "base_release_version": "1.115.5",
+  "candidate_scope": "1.115.5-patch",
+  "release_bump": "patch",
+  "run_id": "usdata-gha26360054055-a1",
+  "would_release_as_at_build_time": "1.115.6"
+}
diff --git a/.github/publication_scope.json b/.github/publication_scope.json
@@ -2,6 +2,6 @@
   "base_release_version": "1.115.5",
   "candidate_scope": "1.115.5-patch",
   "release_bump": "patch",
-  "run_id": "usdata-gha26359982995-a1",
+  "run_id": "usdata-gha26360054055-a1",
   "would_release_as_at_build_time": "1.115.6"
 }
diff --git a/docs/generated/pipeline_api.json b/docs/generated/pipeline_api.json
@@ -34,7 +34,7 @@
     "docstring": "Impute rent and real_estate_taxes from ACS with state.\n\nArgs:\n    data: CPS data dict.\n    state_fips: State FIPS per household.\n    time_period: Tax year.\n    dataset_path: Path to CPS h5 for Microsimulation.\n\nReturns:\n    Updated data dict.",
     "id": "acs_qrf",
     "kind": "function",
-    "line": 508,
+    "line": 524,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.calibration.source_impute._impute_acs"
@@ -61,7 +61,7 @@
     "docstring": "\"Add auto loan balance, interest and net_worth variable.",
     "id": "add_auto_loan",
     "kind": "function",
-    "line": 2938,
+    "line": 2951,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.cps.add_auto_loan_interest_and_net_worth"
@@ -88,7 +88,7 @@
     "docstring": "Populate household-level geography variables used by PolicyEngine US.\n\nArgs:\n    cps: Output CPS H5 group receiving derived household variables.\n    household: Raw CPS household table.",
     "id": "add_household_variables",
     "kind": "function",
-    "line": 1519,
+    "line": 1531,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.cps.add_household_variables"
@@ -115,7 +115,7 @@
     "docstring": "Add basic ID and weight variables.\n\nArgs:\n    cps (h5py.File): The CPS dataset file.\n    person (DataFrame): The person table of the ASEC.\n    tax_unit (DataFrame): The tax unit table created from the person table\n        of the ASEC.\n    family (DataFrame): The family table of the ASEC.\n    spm_unit (DataFrame): The SPM unit table created from the person table\n        of the ASEC.\n    household (DataFrame): The household table of the ASEC.",
     "id": "add_id_variables",
     "kind": "function",
-    "line": 988,
+    "line": 997,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.cps.add_id_variables"
@@ -142,7 +142,7 @@
     "docstring": "Impute ORG-derived wage and union inputs onto CPS persons.",
     "id": "add_org_inputs",
     "kind": "function",
-    "line": 2822,
+    "line": 2835,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.cps.add_org_labor_market_inputs"
@@ -169,7 +169,7 @@
     "docstring": "Add income variables.\n\nArgs:\n    cps (h5py.File): The CPS dataset file.\n    person (DataFrame): The CPS person table.\n    year (int): The CPS year",
     "id": "add_personal_income_variables",
     "kind": "function",
-    "line": 1194,
+    "line": 1206,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.cps.add_personal_income_variables"
@@ -196,7 +196,7 @@
     "docstring": "Add personal demographic variables.\n\nArgs:\n    cps (h5py.File): The CPS dataset file.\n    person (DataFrame): The CPS person table.",
     "id": "add_personal_variables",
     "kind": "function",
-    "line": 1050,
+    "line": 1059,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.cps.add_personal_variables"
@@ -223,7 +223,7 @@
     "docstring": "",
     "id": "add_previous_year_income",
     "kind": "function",
-    "line": 1561,
+    "line": 1573,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.cps.add_previous_year_income"
@@ -250,7 +250,7 @@
     "docstring": "",
     "id": "add_rent",
     "kind": "function",
-    "line": 362,
+    "line": 371,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.cps.add_rent"
@@ -277,7 +277,7 @@
     "docstring": "",
     "id": "add_spm_variables",
     "kind": "function",
-    "line": 1480,
+    "line": 1492,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.cps.add_spm_variables"
@@ -304,7 +304,7 @@
     "docstring": "Assign SSN card type using PRCITSHP, employment status, and ASEC-UA conditions.\nCodes:\n- 0: \"NONE\" - Likely undocumented immigrants\n- 1: \"CITIZEN\" - US citizens (born or naturalized)\n- 2: \"NON_CITIZEN_VALID_EAD\" - Non-citizens with work/study authorization\n- 3: \"OTHER_NON_CITIZEN\" - Non-citizens with indicators of legal status",
     "id": "add_ssn_card_type",
     "kind": "function",
-    "line": 1667,
+    "line": 1679,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.cps.add_ssn_card_type"
@@ -331,7 +331,7 @@
     "docstring": "",
     "id": "add_takeup",
     "kind": "function",
-    "line": 510,
+    "line": 519,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.cps.add_takeup"
@@ -358,7 +358,7 @@
     "docstring": "",
     "id": "add_tips",
     "kind": "function",
-    "line": 2566,
+    "line": 2578,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.cps.add_tips"
@@ -727,7 +727,7 @@
     "docstring": "",
     "id": "calibration_diagnostics",
     "kind": "function",
-    "line": 1249,
+    "line": 1246,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.calibration.unified_calibration.compute_diagnostics"
@@ -815,7 +815,7 @@
     "docstring": "Replace clone-half person-level feature variables with donor matches.",
     "id": "clone_features",
     "kind": "function",
-    "line": 412,
+    "line": 585,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.extended_cps._splice_clone_feature_predictions"
@@ -878,7 +878,7 @@
     "docstring": "Assert that final exported variables are leaf inputs.",
     "id": "computed_export_contract",
     "kind": "function",
-    "line": 1589,
+    "line": 1775,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.extended_cps.ExtendedCPS._assert_no_computed_variables_exported"
@@ -972,7 +972,7 @@
     "docstring": "Second-stage QRF: train on CPS, predict for PUF clones.\n\nFor the PUF clone half of the extended CPS we need plausible values\nof CPS-only variables (retirement distributions, transfers, hours,\nSPM components, etc.) that are consistent with the clone's\nPUF-imputed income -- not just naively copied from the CPS donor.\n\nWe train a QRF on CPS person-level data where:\n  * predictors = demographics + key income variables\n  * outputs    = CPS-only variables listed in\n                 ``CPS_ONLY_IMPUTED_VARIABLES``\n\nFor PUF clone prediction we use the PUF-imputed income values\nfrom the second half of ``data`` (the clone half, which already\nhas PUF-imputed income from stage 1).\n\nUses ``fit_predict()`` with ``max_train_samples`` instead of\nmanual sampling + separate fit/predict.\n\nArgs:\n    data: Extended dataset dict after ``puf_clone_dataset()`` --\n        already doubled, with PUF-imputed income in the second half.\n    time_period: Tax year.\n    dataset_path: Path to the CPS h5 file for Microsimulation.\n\nReturns:\n    DataFrame with one column per CPS-only variable, containing\n    predicted values for the PUF clone half (person-level).",
     "id": "cps_only",
     "kind": "function",
-    "line": 451,
+    "line": 624,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.extended_cps._impute_cps_only_variables"
@@ -1031,7 +1031,7 @@
     "docstring": "Create a stratified sample of CPS data preserving high-income households\nwhile maintaining low-income diversity for poverty analysis.\n\nArgs:\n    target_households: Target number of households in output (approximate)\n    oversample_poor: If True, boost sampling rate for bottom 25% by 1.5x\n    seed: Random seed for reproducibility (default: None for random)\n    base_dataset: Path to source h5 file (default: extended_cps_2024.h5)\n    output_path: Where to save the stratified h5 file\n    high_agi_brackets: List of (lo, hi, cap) tuples defining per-bracket\n        caps for the high-AGI tail. Defaults to HIGH_AGI_BRACKETS.",
     "id": "create_stratified",
     "kind": "function",
-    "line": 85,
+    "line": 145,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.calibration.create_stratified_cps.create_stratified_cps_dataset"
@@ -1064,7 +1064,7 @@
     "docstring": "Subsample the loaded CPS dataset and preserve downsampled arrays.\n\nArgs:\n    frac: Fraction of records to retain.",
     "id": "downsample",
     "kind": "function",
-    "line": 329,
+    "line": 338,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.cps.CPS.downsample"
@@ -1091,7 +1091,7 @@
     "docstring": "Fit L0-regularized calibration weights.\n\nArgs:\n    X_sparse: Sparse matrix (targets x records).\n    targets: Target values array.\n    lambda_l0: L0 regularization strength.\n    epochs: Training epochs.\n    device: Torch device.\n    verbose_freq: Print frequency. Defaults to 10%.\n    beta: L0 gate temperature.\n    lambda_l2: L2 regularization strength.\n    learning_rate: Optimizer learning rate.\n    log_freq: Epochs between per-target CSV logs.\n        None disables logging.\n    log_path: Path for the per-target calibration log CSV.\n    target_names: Human-readable target names for the log.\n    initial_weights: Pre-computed initial weights. If None,\n        computed from targets_df age targets.\n    targets_df: Targets DataFrame, used to compute\n        initial_weights when not provided.\n    target_groups: Optional group ID per target row for balanced loss.\n    resume_from: Path to a `.checkpoint.pt` file or `.npy`\n        weights file to continue fitting from.\n    checkpoint_path: Where to save resumable fit checkpoints.\n\nReturns:\n    Weight array of shape (n_records,).",
     "id": "fit_model",
     "kind": "function",
-    "line": 893,
+    "line": 890,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.calibration.unified_calibration.fit_l0_weights"
@@ -1325,7 +1325,7 @@
     "docstring": "Check formula-reconstructed housing assistance before export.\n\nThe final H5 must not export formula outputs such as ``housing_assistance``.\nThis guard verifies that the remaining leaf inputs still make those\nformulas produce nonzero values before the export contract strips or\nrejects computed variables.",
     "id": "housing_assistance_microsim_validation",
     "kind": "function",
-    "line": 1359,
+    "line": 1545,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.extended_cps.ExtendedCPS._validate_housing_assistance_microsimulation"
@@ -1410,7 +1410,7 @@
     "docstring": "Compute population-based initial weights from age targets.\n\nFor each congressional district, sums person_count targets where\ndomain_variable == \"age\" to get district population, then divides\nby the number of columns (households) active in that district.\n\nArgs:\n    X_sparse: Sparse matrix (targets x records).\n    targets_df: Targets DataFrame with columns: variable,\n        domain_variable, geo_level, geographic_id, value.\n\nReturns:\n    Weight array of shape (n_records,).",
     "id": "init_weights",
     "kind": "function",
-    "line": 814,
+    "line": 811,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.calibration.unified_calibration.compute_initial_weights"
@@ -3216,7 +3216,7 @@
     "docstring": "Replace PUF clone half of CPS-only variables with QRF predictions.\n\nAfter ``puf_clone_dataset()`` the CPS-only variables in the second\nhalf are naive copies of the CPS donor values. This function\nreplaces them with the second-stage QRF predictions that are\nconsistent with the clone's PUF-imputed income.\n\nArgs:\n    data: Extended dataset dict (already doubled).\n    predictions: DataFrame from ``_impute_cps_only_variables()``.\n    time_period: Tax year.\n    dataset_path: Path to CPS h5 file for entity mapping.\n\nReturns:\n    Modified data dict with CPS-only variables spliced in.",
     "id": "qrf_pass2",
     "kind": "function",
-    "line": 829,
+    "line": 1015,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.extended_cps._splice_cps_only_predictions"
@@ -3540,7 +3540,7 @@
     "docstring": "Run unified calibration pipeline.\n\nArgs:\n    dataset_path: Path to CPS h5 file.\n    db_path: Path to policy_data.db.\n    n_clones: Number of dataset clones.\n    lambda_l0: L0 regularization strength.\n    epochs: Training epochs.\n    device: Torch device.\n    seed: Random seed.\n    domain_variables: Filter targets by domain variable.\n    hierarchical_domains: Domains for hierarchical\n        uprating + CD reconciliation.\n    skip_takeup_rerandomize: Skip takeup step.\n    skip_source_impute: Skip ACS/SIPP/SCF imputations.\n    target_config: Parsed target config dict.\n    target_config_path: Path to target config, for provenance.\n    target_config_identity: Resolved target config path/checksum identity.\n    build_only: If True, save package and skip fitting.\n    package_path: Load pre-built package (skip build).\n    package_output_path: Where to save calibration package.\n    beta: L0 gate temperature.\n    lambda_l2: L2 regularization strength.\n    learning_rate: Optimizer learning rate.\n    log_freq: Epochs between per-target CSV logs.\n    log_path: Path for per-target calibration log CSV.\n    resume_from: Path to a checkpoint or weights file to\n        continue fitting from.\n    checkpoint_path: Where to save resumable fit checkpoints.\n    chunked_matrix: Build matrix in clone-household chunks.\n    chunk_size: Clone-household columns per chunk.\n    chunk_dir: Directory for chunked COO/H5 artifacts.\n    keep_chunks: Keep temporary chunk H5 files.\n    resume_chunks: Reuse existing chunk COO files.\n\nReturns:\n    (weights, targets_df, X_sparse, target_names, geography_info)\n    weights is None when build_only=True.\n    geography_info is a dict with cd_geoid and base_n_records.",
     "id": "run_calibration",
     "kind": "function",
-    "line": 1375,
+    "line": 1372,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.calibration.unified_calibration.run_calibration"
@@ -3670,7 +3670,7 @@
     "docstring": "Impute net_worth and auto_loan from SCF.\n\nArgs:\n    data: CPS data dict.\n    state_fips: State FIPS per household.\n    time_period: Tax year.\n    dataset_path: Path to CPS h5 for Microsimulation.\n\nReturns:\n    Updated data dict.",
     "id": "scf_qrf",
     "kind": "function",
-    "line": 1092,
+    "line": 1108,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.calibration.source_impute._impute_scf"
@@ -3724,7 +3724,7 @@
     "docstring": "Impute tip_income, liquid assets, and vehicle signals from SIPP.\n\nArgs:\n    data: CPS data dict.\n    state_fips: State FIPS per household.\n    time_period: Tax year.\n    dataset_path: Path to CPS h5 for Microsimulation.\n\nReturns:\n    Updated data dict.",
     "id": "sipp_qrf",
     "kind": "function",
-    "line": 633,
+    "line": 649,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.calibration.source_impute._impute_sipp"
@@ -3751,7 +3751,7 @@
     "docstring": "Re-impute ACS/SIPP/ORG/SCF variables from donor surveys.\n\nOverwrites existing imputed values in data. ACS uses\nstate_fips as a QRF predictor; ORG uses state plus labor-market\npredictors; SIPP and SCF use only demographic and financial\npredictors (no state data).\n\nArgs:\n    data: CPS dataset dict {variable: {time_period: array}}.\n    state_fips: State FIPS per household.\n    time_period: Tax year.\n    dataset_path: Path to CPS h5 for Microsimulation.\n    skip_acs: Skip ACS imputation.\n    skip_sipp: Skip SIPP imputation.\n    skip_org: Skip ORG imputation.\n    skip_scf: Skip SCF imputation.\n\nReturns:\n    Updated data dict with re-imputed variables.",
     "id": "source_impute",
     "kind": "function",
-    "line": 203,
+    "line": 219,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.calibration.source_impute.impute_source_variables"
@@ -3928,7 +3928,7 @@
     "docstring": "Save calibration package to pickle.\n\nArgs:\n    path: Output file path.\n    X_sparse: Sparse matrix.\n    targets_df: Targets DataFrame.\n    target_names: Target name list.\n    metadata: Run metadata dict.\n    initial_weights: Pre-computed initial weight array.\n    cd_geoid: CD GEOID array from geography assignment.\n    block_geoid: Block GEOID array from geography assignment.",
     "id": "stage2_calibration_package_writer",
     "kind": "function",
-    "line": 661,
+    "line": 658,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.calibration.unified_calibration.save_calibration_package"
@@ -3986,7 +3986,7 @@
     "docstring": "Filter target rows before matrix construction.",
     "id": "stage2_target_config_apply",
     "kind": "function",
-    "line": 631,
+    "line": 628,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.calibration.unified_calibration.apply_target_config_to_targets"
@@ -4041,7 +4041,7 @@
     "docstring": "Load target include/exclude config from YAML.\n\nArgs:\n    path: Path to YAML config file.\n\nReturns:\n    Parsed config dict with include and exclude lists.",
     "id": "stage2_target_config_load",
     "kind": "function",
-    "line": 525,
+    "line": 522,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.calibration.unified_calibration.load_target_config"

Original file line number	Diff line number	Diff line change
`@@ -2,6 +2,6 @@`
`2`	`2`	`"base_release_version": "1.115.5",`
`3`	`3`	`"candidate_scope": "1.115.5-patch",`
`4`	`4`	`"release_bump": "patch",`
`5`		`- "run_id": "usdata-gha26359982995-a1",`
	`5`	`+ "run_id": "usdata-gha26360054055-a1",`
`6`	`6`	`"would_release_as_at_build_time": "1.115.6"`
`7`	`7`	`}`