Skip to content

Commit 8967ce7

Browse files
committed
Update publication candidate
1 parent 9b37aba commit 8967ce7

4 files changed

Lines changed: 38 additions & 31 deletions

File tree

changelog.d/1123.changed.md renamed to .github/publication_candidates/usdata-gha26360054055-a1/changelog.d/1123.changed.md

File renamed without changes.
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"base_release_version": "1.115.5",
3+
"candidate_scope": "1.115.5-patch",
4+
"release_bump": "patch",
5+
"run_id": "usdata-gha26360054055-a1",
6+
"would_release_as_at_build_time": "1.115.6"
7+
}

.github/publication_scope.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22
"base_release_version": "1.115.5",
33
"candidate_scope": "1.115.5-patch",
44
"release_bump": "patch",
5-
"run_id": "usdata-gha26359982995-a1",
5+
"run_id": "usdata-gha26360054055-a1",
66
"would_release_as_at_build_time": "1.115.6"
77
}

docs/generated/pipeline_api.json

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
"docstring": "Impute rent and real_estate_taxes from ACS with state.\n\nArgs:\n data: CPS data dict.\n state_fips: State FIPS per household.\n time_period: Tax year.\n dataset_path: Path to CPS h5 for Microsimulation.\n\nReturns:\n Updated data dict.",
3535
"id": "acs_qrf",
3636
"kind": "function",
37-
"line": 508,
37+
"line": 524,
3838
"metadata": {
3939
"api_refs": [
4040
"policyengine_us_data.calibration.source_impute._impute_acs"
@@ -61,7 +61,7 @@
6161
"docstring": "\"Add auto loan balance, interest and net_worth variable.",
6262
"id": "add_auto_loan",
6363
"kind": "function",
64-
"line": 2938,
64+
"line": 2951,
6565
"metadata": {
6666
"api_refs": [
6767
"policyengine_us_data.datasets.cps.cps.add_auto_loan_interest_and_net_worth"
@@ -88,7 +88,7 @@
8888
"docstring": "Populate household-level geography variables used by PolicyEngine US.\n\nArgs:\n cps: Output CPS H5 group receiving derived household variables.\n household: Raw CPS household table.",
8989
"id": "add_household_variables",
9090
"kind": "function",
91-
"line": 1519,
91+
"line": 1531,
9292
"metadata": {
9393
"api_refs": [
9494
"policyengine_us_data.datasets.cps.cps.add_household_variables"
@@ -115,7 +115,7 @@
115115
"docstring": "Add basic ID and weight variables.\n\nArgs:\n cps (h5py.File): The CPS dataset file.\n person (DataFrame): The person table of the ASEC.\n tax_unit (DataFrame): The tax unit table created from the person table\n of the ASEC.\n family (DataFrame): The family table of the ASEC.\n spm_unit (DataFrame): The SPM unit table created from the person table\n of the ASEC.\n household (DataFrame): The household table of the ASEC.",
116116
"id": "add_id_variables",
117117
"kind": "function",
118-
"line": 988,
118+
"line": 997,
119119
"metadata": {
120120
"api_refs": [
121121
"policyengine_us_data.datasets.cps.cps.add_id_variables"
@@ -142,7 +142,7 @@
142142
"docstring": "Impute ORG-derived wage and union inputs onto CPS persons.",
143143
"id": "add_org_inputs",
144144
"kind": "function",
145-
"line": 2822,
145+
"line": 2835,
146146
"metadata": {
147147
"api_refs": [
148148
"policyengine_us_data.datasets.cps.cps.add_org_labor_market_inputs"
@@ -169,7 +169,7 @@
169169
"docstring": "Add income variables.\n\nArgs:\n cps (h5py.File): The CPS dataset file.\n person (DataFrame): The CPS person table.\n year (int): The CPS year",
170170
"id": "add_personal_income_variables",
171171
"kind": "function",
172-
"line": 1194,
172+
"line": 1206,
173173
"metadata": {
174174
"api_refs": [
175175
"policyengine_us_data.datasets.cps.cps.add_personal_income_variables"
@@ -196,7 +196,7 @@
196196
"docstring": "Add personal demographic variables.\n\nArgs:\n cps (h5py.File): The CPS dataset file.\n person (DataFrame): The CPS person table.",
197197
"id": "add_personal_variables",
198198
"kind": "function",
199-
"line": 1050,
199+
"line": 1059,
200200
"metadata": {
201201
"api_refs": [
202202
"policyengine_us_data.datasets.cps.cps.add_personal_variables"
@@ -223,7 +223,7 @@
223223
"docstring": "",
224224
"id": "add_previous_year_income",
225225
"kind": "function",
226-
"line": 1561,
226+
"line": 1573,
227227
"metadata": {
228228
"api_refs": [
229229
"policyengine_us_data.datasets.cps.cps.add_previous_year_income"
@@ -250,7 +250,7 @@
250250
"docstring": "",
251251
"id": "add_rent",
252252
"kind": "function",
253-
"line": 362,
253+
"line": 371,
254254
"metadata": {
255255
"api_refs": [
256256
"policyengine_us_data.datasets.cps.cps.add_rent"
@@ -277,7 +277,7 @@
277277
"docstring": "",
278278
"id": "add_spm_variables",
279279
"kind": "function",
280-
"line": 1480,
280+
"line": 1492,
281281
"metadata": {
282282
"api_refs": [
283283
"policyengine_us_data.datasets.cps.cps.add_spm_variables"
@@ -304,7 +304,7 @@
304304
"docstring": "Assign SSN card type using PRCITSHP, employment status, and ASEC-UA conditions.\nCodes:\n- 0: \"NONE\" - Likely undocumented immigrants\n- 1: \"CITIZEN\" - US citizens (born or naturalized)\n- 2: \"NON_CITIZEN_VALID_EAD\" - Non-citizens with work/study authorization\n- 3: \"OTHER_NON_CITIZEN\" - Non-citizens with indicators of legal status",
305305
"id": "add_ssn_card_type",
306306
"kind": "function",
307-
"line": 1667,
307+
"line": 1679,
308308
"metadata": {
309309
"api_refs": [
310310
"policyengine_us_data.datasets.cps.cps.add_ssn_card_type"
@@ -331,7 +331,7 @@
331331
"docstring": "",
332332
"id": "add_takeup",
333333
"kind": "function",
334-
"line": 510,
334+
"line": 519,
335335
"metadata": {
336336
"api_refs": [
337337
"policyengine_us_data.datasets.cps.cps.add_takeup"
@@ -358,7 +358,7 @@
358358
"docstring": "",
359359
"id": "add_tips",
360360
"kind": "function",
361-
"line": 2566,
361+
"line": 2578,
362362
"metadata": {
363363
"api_refs": [
364364
"policyengine_us_data.datasets.cps.cps.add_tips"
@@ -727,7 +727,7 @@
727727
"docstring": "",
728728
"id": "calibration_diagnostics",
729729
"kind": "function",
730-
"line": 1249,
730+
"line": 1246,
731731
"metadata": {
732732
"api_refs": [
733733
"policyengine_us_data.calibration.unified_calibration.compute_diagnostics"
@@ -815,7 +815,7 @@
815815
"docstring": "Replace clone-half person-level feature variables with donor matches.",
816816
"id": "clone_features",
817817
"kind": "function",
818-
"line": 412,
818+
"line": 585,
819819
"metadata": {
820820
"api_refs": [
821821
"policyengine_us_data.datasets.cps.extended_cps._splice_clone_feature_predictions"
@@ -878,7 +878,7 @@
878878
"docstring": "Assert that final exported variables are leaf inputs.",
879879
"id": "computed_export_contract",
880880
"kind": "function",
881-
"line": 1589,
881+
"line": 1775,
882882
"metadata": {
883883
"api_refs": [
884884
"policyengine_us_data.datasets.cps.extended_cps.ExtendedCPS._assert_no_computed_variables_exported"
@@ -972,7 +972,7 @@
972972
"docstring": "Second-stage QRF: train on CPS, predict for PUF clones.\n\nFor the PUF clone half of the extended CPS we need plausible values\nof CPS-only variables (retirement distributions, transfers, hours,\nSPM components, etc.) that are consistent with the clone's\nPUF-imputed income -- not just naively copied from the CPS donor.\n\nWe train a QRF on CPS person-level data where:\n * predictors = demographics + key income variables\n * outputs = CPS-only variables listed in\n ``CPS_ONLY_IMPUTED_VARIABLES``\n\nFor PUF clone prediction we use the PUF-imputed income values\nfrom the second half of ``data`` (the clone half, which already\nhas PUF-imputed income from stage 1).\n\nUses ``fit_predict()`` with ``max_train_samples`` instead of\nmanual sampling + separate fit/predict.\n\nArgs:\n data: Extended dataset dict after ``puf_clone_dataset()`` --\n already doubled, with PUF-imputed income in the second half.\n time_period: Tax year.\n dataset_path: Path to the CPS h5 file for Microsimulation.\n\nReturns:\n DataFrame with one column per CPS-only variable, containing\n predicted values for the PUF clone half (person-level).",
973973
"id": "cps_only",
974974
"kind": "function",
975-
"line": 451,
975+
"line": 624,
976976
"metadata": {
977977
"api_refs": [
978978
"policyengine_us_data.datasets.cps.extended_cps._impute_cps_only_variables"
@@ -1031,7 +1031,7 @@
10311031
"docstring": "Create a stratified sample of CPS data preserving high-income households\nwhile maintaining low-income diversity for poverty analysis.\n\nArgs:\n target_households: Target number of households in output (approximate)\n oversample_poor: If True, boost sampling rate for bottom 25% by 1.5x\n seed: Random seed for reproducibility (default: None for random)\n base_dataset: Path to source h5 file (default: extended_cps_2024.h5)\n output_path: Where to save the stratified h5 file\n high_agi_brackets: List of (lo, hi, cap) tuples defining per-bracket\n caps for the high-AGI tail. Defaults to HIGH_AGI_BRACKETS.",
10321032
"id": "create_stratified",
10331033
"kind": "function",
1034-
"line": 85,
1034+
"line": 145,
10351035
"metadata": {
10361036
"api_refs": [
10371037
"policyengine_us_data.calibration.create_stratified_cps.create_stratified_cps_dataset"
@@ -1064,7 +1064,7 @@
10641064
"docstring": "Subsample the loaded CPS dataset and preserve downsampled arrays.\n\nArgs:\n frac: Fraction of records to retain.",
10651065
"id": "downsample",
10661066
"kind": "function",
1067-
"line": 329,
1067+
"line": 338,
10681068
"metadata": {
10691069
"api_refs": [
10701070
"policyengine_us_data.datasets.cps.cps.CPS.downsample"
@@ -1091,7 +1091,7 @@
10911091
"docstring": "Fit L0-regularized calibration weights.\n\nArgs:\n X_sparse: Sparse matrix (targets x records).\n targets: Target values array.\n lambda_l0: L0 regularization strength.\n epochs: Training epochs.\n device: Torch device.\n verbose_freq: Print frequency. Defaults to 10%.\n beta: L0 gate temperature.\n lambda_l2: L2 regularization strength.\n learning_rate: Optimizer learning rate.\n log_freq: Epochs between per-target CSV logs.\n None disables logging.\n log_path: Path for the per-target calibration log CSV.\n target_names: Human-readable target names for the log.\n initial_weights: Pre-computed initial weights. If None,\n computed from targets_df age targets.\n targets_df: Targets DataFrame, used to compute\n initial_weights when not provided.\n target_groups: Optional group ID per target row for balanced loss.\n resume_from: Path to a `.checkpoint.pt` file or `.npy`\n weights file to continue fitting from.\n checkpoint_path: Where to save resumable fit checkpoints.\n\nReturns:\n Weight array of shape (n_records,).",
10921092
"id": "fit_model",
10931093
"kind": "function",
1094-
"line": 893,
1094+
"line": 890,
10951095
"metadata": {
10961096
"api_refs": [
10971097
"policyengine_us_data.calibration.unified_calibration.fit_l0_weights"
@@ -1325,7 +1325,7 @@
13251325
"docstring": "Check formula-reconstructed housing assistance before export.\n\nThe final H5 must not export formula outputs such as ``housing_assistance``.\nThis guard verifies that the remaining leaf inputs still make those\nformulas produce nonzero values before the export contract strips or\nrejects computed variables.",
13261326
"id": "housing_assistance_microsim_validation",
13271327
"kind": "function",
1328-
"line": 1359,
1328+
"line": 1545,
13291329
"metadata": {
13301330
"api_refs": [
13311331
"policyengine_us_data.datasets.cps.extended_cps.ExtendedCPS._validate_housing_assistance_microsimulation"
@@ -1410,7 +1410,7 @@
14101410
"docstring": "Compute population-based initial weights from age targets.\n\nFor each congressional district, sums person_count targets where\ndomain_variable == \"age\" to get district population, then divides\nby the number of columns (households) active in that district.\n\nArgs:\n X_sparse: Sparse matrix (targets x records).\n targets_df: Targets DataFrame with columns: variable,\n domain_variable, geo_level, geographic_id, value.\n\nReturns:\n Weight array of shape (n_records,).",
14111411
"id": "init_weights",
14121412
"kind": "function",
1413-
"line": 814,
1413+
"line": 811,
14141414
"metadata": {
14151415
"api_refs": [
14161416
"policyengine_us_data.calibration.unified_calibration.compute_initial_weights"
@@ -3216,7 +3216,7 @@
32163216
"docstring": "Replace PUF clone half of CPS-only variables with QRF predictions.\n\nAfter ``puf_clone_dataset()`` the CPS-only variables in the second\nhalf are naive copies of the CPS donor values. This function\nreplaces them with the second-stage QRF predictions that are\nconsistent with the clone's PUF-imputed income.\n\nArgs:\n data: Extended dataset dict (already doubled).\n predictions: DataFrame from ``_impute_cps_only_variables()``.\n time_period: Tax year.\n dataset_path: Path to CPS h5 file for entity mapping.\n\nReturns:\n Modified data dict with CPS-only variables spliced in.",
32173217
"id": "qrf_pass2",
32183218
"kind": "function",
3219-
"line": 829,
3219+
"line": 1015,
32203220
"metadata": {
32213221
"api_refs": [
32223222
"policyengine_us_data.datasets.cps.extended_cps._splice_cps_only_predictions"
@@ -3540,7 +3540,7 @@
35403540
"docstring": "Run unified calibration pipeline.\n\nArgs:\n dataset_path: Path to CPS h5 file.\n db_path: Path to policy_data.db.\n n_clones: Number of dataset clones.\n lambda_l0: L0 regularization strength.\n epochs: Training epochs.\n device: Torch device.\n seed: Random seed.\n domain_variables: Filter targets by domain variable.\n hierarchical_domains: Domains for hierarchical\n uprating + CD reconciliation.\n skip_takeup_rerandomize: Skip takeup step.\n skip_source_impute: Skip ACS/SIPP/SCF imputations.\n target_config: Parsed target config dict.\n target_config_path: Path to target config, for provenance.\n target_config_identity: Resolved target config path/checksum identity.\n build_only: If True, save package and skip fitting.\n package_path: Load pre-built package (skip build).\n package_output_path: Where to save calibration package.\n beta: L0 gate temperature.\n lambda_l2: L2 regularization strength.\n learning_rate: Optimizer learning rate.\n log_freq: Epochs between per-target CSV logs.\n log_path: Path for per-target calibration log CSV.\n resume_from: Path to a checkpoint or weights file to\n continue fitting from.\n checkpoint_path: Where to save resumable fit checkpoints.\n chunked_matrix: Build matrix in clone-household chunks.\n chunk_size: Clone-household columns per chunk.\n chunk_dir: Directory for chunked COO/H5 artifacts.\n keep_chunks: Keep temporary chunk H5 files.\n resume_chunks: Reuse existing chunk COO files.\n\nReturns:\n (weights, targets_df, X_sparse, target_names, geography_info)\n weights is None when build_only=True.\n geography_info is a dict with cd_geoid and base_n_records.",
35413541
"id": "run_calibration",
35423542
"kind": "function",
3543-
"line": 1375,
3543+
"line": 1372,
35443544
"metadata": {
35453545
"api_refs": [
35463546
"policyengine_us_data.calibration.unified_calibration.run_calibration"
@@ -3670,7 +3670,7 @@
36703670
"docstring": "Impute net_worth and auto_loan from SCF.\n\nArgs:\n data: CPS data dict.\n state_fips: State FIPS per household.\n time_period: Tax year.\n dataset_path: Path to CPS h5 for Microsimulation.\n\nReturns:\n Updated data dict.",
36713671
"id": "scf_qrf",
36723672
"kind": "function",
3673-
"line": 1092,
3673+
"line": 1108,
36743674
"metadata": {
36753675
"api_refs": [
36763676
"policyengine_us_data.calibration.source_impute._impute_scf"
@@ -3724,7 +3724,7 @@
37243724
"docstring": "Impute tip_income, liquid assets, and vehicle signals from SIPP.\n\nArgs:\n data: CPS data dict.\n state_fips: State FIPS per household.\n time_period: Tax year.\n dataset_path: Path to CPS h5 for Microsimulation.\n\nReturns:\n Updated data dict.",
37253725
"id": "sipp_qrf",
37263726
"kind": "function",
3727-
"line": 633,
3727+
"line": 649,
37283728
"metadata": {
37293729
"api_refs": [
37303730
"policyengine_us_data.calibration.source_impute._impute_sipp"
@@ -3751,7 +3751,7 @@
37513751
"docstring": "Re-impute ACS/SIPP/ORG/SCF variables from donor surveys.\n\nOverwrites existing imputed values in data. ACS uses\nstate_fips as a QRF predictor; ORG uses state plus labor-market\npredictors; SIPP and SCF use only demographic and financial\npredictors (no state data).\n\nArgs:\n data: CPS dataset dict {variable: {time_period: array}}.\n state_fips: State FIPS per household.\n time_period: Tax year.\n dataset_path: Path to CPS h5 for Microsimulation.\n skip_acs: Skip ACS imputation.\n skip_sipp: Skip SIPP imputation.\n skip_org: Skip ORG imputation.\n skip_scf: Skip SCF imputation.\n\nReturns:\n Updated data dict with re-imputed variables.",
37523752
"id": "source_impute",
37533753
"kind": "function",
3754-
"line": 203,
3754+
"line": 219,
37553755
"metadata": {
37563756
"api_refs": [
37573757
"policyengine_us_data.calibration.source_impute.impute_source_variables"
@@ -3928,7 +3928,7 @@
39283928
"docstring": "Save calibration package to pickle.\n\nArgs:\n path: Output file path.\n X_sparse: Sparse matrix.\n targets_df: Targets DataFrame.\n target_names: Target name list.\n metadata: Run metadata dict.\n initial_weights: Pre-computed initial weight array.\n cd_geoid: CD GEOID array from geography assignment.\n block_geoid: Block GEOID array from geography assignment.",
39293929
"id": "stage2_calibration_package_writer",
39303930
"kind": "function",
3931-
"line": 661,
3931+
"line": 658,
39323932
"metadata": {
39333933
"api_refs": [
39343934
"policyengine_us_data.calibration.unified_calibration.save_calibration_package"
@@ -3986,7 +3986,7 @@
39863986
"docstring": "Filter target rows before matrix construction.",
39873987
"id": "stage2_target_config_apply",
39883988
"kind": "function",
3989-
"line": 631,
3989+
"line": 628,
39903990
"metadata": {
39913991
"api_refs": [
39923992
"policyengine_us_data.calibration.unified_calibration.apply_target_config_to_targets"
@@ -4041,7 +4041,7 @@
40414041
"docstring": "Load target include/exclude config from YAML.\n\nArgs:\n path: Path to YAML config file.\n\nReturns:\n Parsed config dict with include and exclude lists.",
40424042
"id": "stage2_target_config_load",
40434043
"kind": "function",
4044-
"line": 525,
4044+
"line": 522,
40454045
"metadata": {
40464046
"api_refs": [
40474047
"policyengine_us_data.calibration.unified_calibration.load_target_config"

0 commit comments

Comments
 (0)