Skip to content

Commit b117a6c

Browse files
committed
Update publication candidate
1 parent 2b348a0 commit b117a6c

6 files changed

Lines changed: 52 additions & 45 deletions

File tree

changelog.d/974.fixed renamed to .github/publication_candidates/usdata-gha25966054079-a1/changelog.d/974.fixed

File renamed without changes.
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"base_release_version": "1.115.2",
3+
"candidate_scope": "1.115.2-patch",
4+
"release_bump": "patch",
5+
"run_id": "usdata-gha25966054079-a1",
6+
"would_release_as_at_build_time": "1.115.3"
7+
}

.github/publication_scope.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22
"base_release_version": "1.115.2",
33
"candidate_scope": "1.115.2-patch",
44
"release_bump": "patch",
5-
"run_id": "usdata-gha25953131994-a1",
5+
"run_id": "usdata-gha25966054079-a1",
66
"would_release_as_at_build_time": "1.115.3"
77
}

docs/engineering/pipeline-map.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ Merge CPS + PUF via cloning, rematch clone features, QRF-impute incomes and CPS-
184184
| `qrf_pass2` Splice CPS-Only Predictions | `process` | `transitional` | `moving` | `policyengine_us_data.datasets.cps.extended_cps._splice_cps_only_predictions` |
185185
| `mortgage_hints` Mortgage Balance Hint Imputation | `library` | `current` | `moving` | `policyengine_us_data.utils.mortgage_interest.impute_tax_unit_mortgage_balance_hints` |
186186
| `mortgage_convert` Structural Mortgage Conversion | `library` | `current` | `moving` | `policyengine_us_data.utils.mortgage_interest.convert_mortgage_interest_to_structural_inputs` |
187-
| `formula_drop` Drop Formula Variables | `process` | `transitional` | `moving` | `policyengine_us_data.datasets.cps.extended_cps.ExtendedCPS._drop_formula_variables` |
187+
| `computed_export_contract` Validate Leaf-Input Export | `process` | `transitional` | `moving` | `policyengine_us_data.datasets.cps.extended_cps.ExtendedCPS._assert_no_computed_variables_exported` |
188188

189189
#### Edges
190190

@@ -204,8 +204,8 @@ Merge CPS + PUF via cloning, rematch clone features, QRF-impute incomes and CPS-
204204
- `qrf_pass2` -> `mortgage_hints` `data_flow`
205205
- `in_scf_s2` -> `mortgage_hints` `data_flow` (SCF donor sample)
206206
- `mortgage_hints` -> `mortgage_convert` `data_flow`
207-
- `mortgage_convert` -> `formula_drop` `data_flow`
208-
- `formula_drop` -> `out_ext` `produces_artifact`
207+
- `mortgage_convert` -> `computed_export_contract` `data_flow`
208+
- `computed_export_contract` -> `out_ext` `produces_artifact`
209209
- `util_qrf_s2` -> `puf_qrf_pass` `uses_utility`
210210
- `util_qrf_s2` -> `cps_only` `uses_utility`
211211
- `util_qrf_s2` -> `mortgage_hints` `uses_utility`

docs/generated/pipeline_api.json

Lines changed: 34 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -810,7 +810,7 @@
810810
"docstring": "Replace clone-half person-level feature variables with donor matches.",
811811
"id": "clone_features",
812812
"kind": "function",
813-
"line": 400,
813+
"line": 403,
814814
"metadata": {
815815
"api_refs": [
816816
"policyengine_us_data.datasets.cps.extended_cps._splice_clone_feature_predictions"
@@ -869,6 +869,37 @@
869869
"signature": "class CloneWeightMatrix",
870870
"source_file": "policyengine_us_data/build_outputs/weights.py"
871871
},
872+
"computed_export_contract": {
873+
"docstring": "Assert that final exported variables are leaf inputs.",
874+
"id": "computed_export_contract",
875+
"kind": "function",
876+
"line": 1266,
877+
"metadata": {
878+
"api_refs": [
879+
"policyengine_us_data.datasets.cps.extended_cps.ExtendedCPS._assert_no_computed_variables_exported"
880+
],
881+
"artifacts_in": [
882+
"extended_cps_stage2"
883+
],
884+
"artifacts_out": [
885+
"validated_extended_cps"
886+
],
887+
"description": "Fails the build if the final export still contains variables computed by policyengine-us formulas, adds, or subtracts.",
888+
"id": "computed_export_contract",
889+
"label": "Validate Leaf-Input Export",
890+
"node_type": "process",
891+
"pathways": [
892+
"data_build"
893+
],
894+
"pydoc": true,
895+
"source_file": "policyengine_us_data/datasets/cps/extended_cps.py",
896+
"stability": "moving",
897+
"status": "transitional"
898+
},
899+
"object_path": "policyengine_us_data.datasets.cps.extended_cps.ExtendedCPS._assert_no_computed_variables_exported",
900+
"signature": "def _assert_no_computed_variables_exported(cls, data, time_period)",
901+
"source_file": "policyengine_us_data/datasets/cps/extended_cps.py"
902+
},
872903
"coordinate_publish": {
873904
"docstring": "Coordinate the full publishing workflow.",
874905
"id": "coordinate_publish",
@@ -936,7 +967,7 @@
936967
"docstring": "Second-stage QRF: train on CPS, predict for PUF clones.\n\nFor the PUF clone half of the extended CPS we need plausible values\nof CPS-only variables (retirement distributions, transfers, hours,\nSPM components, etc.) that are consistent with the clone's\nPUF-imputed income -- not just naively copied from the CPS donor.\n\nWe train a QRF on CPS person-level data where:\n * predictors = demographics + key income variables\n * outputs = CPS-only variables listed in\n ``CPS_ONLY_IMPUTED_VARIABLES``\n\nFor PUF clone prediction we use the PUF-imputed income values\nfrom the second half of ``data`` (the clone half, which already\nhas PUF-imputed income from stage 1).\n\nUses ``fit_predict()`` with ``max_train_samples`` instead of\nmanual sampling + separate fit/predict.\n\nArgs:\n data: Extended dataset dict after ``puf_clone_dataset()`` --\n already doubled, with PUF-imputed income in the second half.\n time_period: Tax year.\n dataset_path: Path to the CPS h5 file for Microsimulation.\n\nReturns:\n DataFrame with one column per CPS-only variable, containing\n predicted values for the PUF clone half (person-level).",
937968
"id": "cps_only",
938969
"kind": "function",
939-
"line": 439,
970+
"line": 442,
940971
"metadata": {
941972
"api_refs": [
942973
"policyengine_us_data.datasets.cps.extended_cps._impute_cps_only_variables"
@@ -1085,37 +1116,6 @@
10851116
"signature": "def fit_l0_weights(X_sparse, targets: np.ndarray, lambda_l0: float, epochs: int = DEFAULT_EPOCHS, device: str = 'cpu', verbose_freq: Optional[int] = None, beta: float = BETA, lambda_l2: float = LAMBDA_L2, learning_rate: float = LEARNING_RATE, log_freq: int = None, log_path: str = None, target_names: list = None, initial_weights: np.ndarray = None, targets_df: 'pd.DataFrame' = None, achievable: np.ndarray = None, target_groups: Optional[np.ndarray] = None, resume_from: str = None, checkpoint_path: str = None) -> np.ndarray",
10861117
"source_file": "policyengine_us_data/calibration/unified_calibration.py"
10871118
},
1088-
"formula_drop": {
1089-
"docstring": "Remove variables that are computed by policyengine-us.\n\nVariables with formulas, ``adds``, or ``subtracts`` are\nrecomputed by the simulation engine, so storing them wastes\nspace and can mislead validation.\n\nAggregate variables whose ``adds`` include a behavioral-\nresponse input (e.g. ``employment_income_before_lsr``) are\nrenamed to that input before dropping so the raw data is\npreserved under the correct input-variable name.",
1090-
"id": "formula_drop",
1091-
"kind": "function",
1092-
"line": 1197,
1093-
"metadata": {
1094-
"api_refs": [
1095-
"policyengine_us_data.datasets.cps.extended_cps.ExtendedCPS._drop_formula_variables"
1096-
],
1097-
"artifacts_in": [
1098-
"extended_cps_stage2"
1099-
],
1100-
"artifacts_out": [
1101-
"formula_pruned_extended_cps"
1102-
],
1103-
"description": "Removes variables computed by policyengine-us formulas, while preserving selected imputed inputs under canonical leaf variable names.",
1104-
"id": "formula_drop",
1105-
"label": "Drop Formula Variables",
1106-
"node_type": "process",
1107-
"pathways": [
1108-
"data_build"
1109-
],
1110-
"pydoc": true,
1111-
"source_file": "policyengine_us_data/datasets/cps/extended_cps.py",
1112-
"stability": "moving",
1113-
"status": "transitional"
1114-
},
1115-
"object_path": "policyengine_us_data.datasets.cps.extended_cps.ExtendedCPS._drop_formula_variables",
1116-
"signature": "def _drop_formula_variables(cls, data)",
1117-
"source_file": "policyengine_us_data/datasets/cps/extended_cps.py"
1118-
},
11191119
"geo_assign": {
11201120
"docstring": "Assign random census block geography to cloned\nCPS records.\n\nEach of n_records * n_clones total records gets a\nrandom census block sampled from the global\npopulation-weighted distribution. State and CD are\nderived from the block GEOID.\n\nArgs:\n n_records: Number of households in the base CPS\n dataset.\n n_clones: Number of clones (default 10).\n seed: Random seed for reproducibility.\n fixed_state_fips: Optional state FIPS per base record. Positive\n values constrain every clone of that record to blocks in the\n requested state; zero or missing values remain unrestricted.\n\nReturns:\n GeographyAssignment with arrays of length\n n_records * n_clones.",
11211121
"id": "geo_assign",
@@ -2619,7 +2619,7 @@
26192619
"docstring": "Replace PUF clone half of CPS-only variables with QRF predictions.\n\nAfter ``puf_clone_dataset()`` the CPS-only variables in the second\nhalf are naive copies of the CPS donor values. This function\nreplaces them with the second-stage QRF predictions that are\nconsistent with the clone's PUF-imputed income.\n\nArgs:\n data: Extended dataset dict (already doubled).\n predictions: DataFrame from ``_impute_cps_only_variables()``.\n time_period: Tax year.\n dataset_path: Path to CPS h5 file for entity mapping.\n\nReturns:\n Modified data dict with CPS-only variables spliced in.",
26202620
"id": "qrf_pass2",
26212621
"kind": "function",
2622-
"line": 717,
2622+
"line": 748,
26232623
"metadata": {
26242624
"api_refs": [
26252625
"policyengine_us_data.datasets.cps.extended_cps._splice_cps_only_predictions"

docs/generated/pipeline_map.json

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2260,11 +2260,11 @@
22602260
{
22612261
"edge_type": "data_flow",
22622262
"source": "mortgage_convert",
2263-
"target": "formula_drop"
2263+
"target": "computed_export_contract"
22642264
},
22652265
{
22662266
"edge_type": "produces_artifact",
2267-
"source": "formula_drop",
2267+
"source": "computed_export_contract",
22682268
"target": "out_ext"
22692269
},
22702270
{
@@ -2543,17 +2543,17 @@
25432543
},
25442544
{
25452545
"api_refs": [
2546-
"policyengine_us_data.datasets.cps.extended_cps.ExtendedCPS._drop_formula_variables"
2546+
"policyengine_us_data.datasets.cps.extended_cps.ExtendedCPS._assert_no_computed_variables_exported"
25472547
],
25482548
"artifacts_in": [
25492549
"extended_cps_stage2"
25502550
],
25512551
"artifacts_out": [
2552-
"formula_pruned_extended_cps"
2552+
"validated_extended_cps"
25532553
],
2554-
"description": "Removes variables computed by policyengine-us formulas, while preserving selected imputed inputs under canonical leaf variable names.",
2555-
"id": "formula_drop",
2556-
"label": "Drop Formula Variables",
2554+
"description": "Fails the build if the final export still contains variables computed by policyengine-us formulas, adds, or subtracts.",
2555+
"id": "computed_export_contract",
2556+
"label": "Validate Leaf-Input Export",
25572557
"node_type": "process",
25582558
"pathways": [
25592559
"data_build"

0 commit comments

Comments
 (0)