Update publication candidate

MaxGhenis · MaxGhenis · commit 7e5a3f7460c8 · 2026-05-25T02:08:24.000Z
diff --git a/.github/publication_candidates/usdata-gha26379540291-a1/changelog.d/fsla-overtime-premium.added b/.github/publication_candidates/usdata-gha26379540291-a1/changelog.d/fsla-overtime-premium.added
diff --git a/.github/publication_candidates/usdata-gha26379540291-a1/publication_scope.json b/.github/publication_candidates/usdata-gha26379540291-a1/publication_scope.json
@@ -0,0 +1,7 @@
+{
+  "base_release_version": "1.115.5",
+  "candidate_scope": "1.115.5-minor",
+  "release_bump": "minor",
+  "run_id": "usdata-gha26379540291-a1",
+  "would_release_as_at_build_time": "1.116.0"
+}
diff --git a/.github/publication_scope.json b/.github/publication_scope.json
@@ -1,7 +1,7 @@
 {
   "base_release_version": "1.115.5",
-  "candidate_scope": "1.115.5-patch",
-  "release_bump": "patch",
-  "run_id": "usdata-gha26360054055-a1",
-  "would_release_as_at_build_time": "1.115.6"
+  "candidate_scope": "1.115.5-minor",
+  "release_bump": "minor",
+  "run_id": "usdata-gha26379540291-a1",
+  "would_release_as_at_build_time": "1.116.0"
 }
diff --git a/docs/generated/pipeline_api.json b/docs/generated/pipeline_api.json
@@ -61,7 +61,7 @@
     "docstring": "\"Add auto loan balance, interest and net_worth variable.",
     "id": "add_auto_loan",
     "kind": "function",
-    "line": 2951,
+    "line": 3074,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.cps.add_auto_loan_interest_and_net_worth"
@@ -88,7 +88,7 @@
     "docstring": "Populate household-level geography variables used by PolicyEngine US.\n\nArgs:\n    cps: Output CPS H5 group receiving derived household variables.\n    household: Raw CPS household table.",
     "id": "add_household_variables",
     "kind": "function",
-    "line": 1531,
+    "line": 1667,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.cps.add_household_variables"
@@ -115,7 +115,7 @@
     "docstring": "Add basic ID and weight variables.\n\nArgs:\n    cps (h5py.File): The CPS dataset file.\n    person (DataFrame): The person table of the ASEC.\n    tax_unit (DataFrame): The tax unit table created from the person table\n        of the ASEC.\n    family (DataFrame): The family table of the ASEC.\n    spm_unit (DataFrame): The SPM unit table created from the person table\n        of the ASEC.\n    household (DataFrame): The household table of the ASEC.",
     "id": "add_id_variables",
     "kind": "function",
-    "line": 997,
+    "line": 1043,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.cps.add_id_variables"
@@ -139,15 +139,15 @@
     "source_file": "policyengine_us_data/datasets/cps/cps.py"
   },
   "add_org_inputs": {
-    "docstring": "Impute ORG-derived wage and union inputs onto CPS persons.",
+    "docstring": "Impute ORG-derived labor-market inputs and derive overtime premium.",
     "id": "add_org_inputs",
     "kind": "function",
-    "line": 2835,
+    "line": 2974,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.cps.add_org_labor_market_inputs"
       ],
-      "description": "Impute hourly wage, hourly-pay status, and union coverage from CPS ORG donors.",
+      "description": "Impute hourly wage, hourly-pay status, and union coverage from CPS ORG donors, then derive FLSA overtime premium.",
       "id": "add_org_inputs",
       "label": "ORG Labor-Market Inputs",
       "node_type": "library",
@@ -162,14 +162,14 @@
       ]
     },
     "object_path": "policyengine_us_data.datasets.cps.cps.add_org_labor_market_inputs",
-    "signature": "def add_org_labor_market_inputs(cps: h5py.File) -> None",
+    "signature": "def add_org_labor_market_inputs(cps: h5py.File, time_period: int) -> None",
     "source_file": "policyengine_us_data/datasets/cps/cps.py"
   },
   "add_personal_income_variables": {
     "docstring": "Add income variables.\n\nArgs:\n    cps (h5py.File): The CPS dataset file.\n    person (DataFrame): The CPS person table.\n    year (int): The CPS year",
     "id": "add_personal_income_variables",
     "kind": "function",
-    "line": 1206,
+    "line": 1342,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.cps.add_personal_income_variables"
@@ -196,7 +196,7 @@
     "docstring": "Add personal demographic variables.\n\nArgs:\n    cps (h5py.File): The CPS dataset file.\n    person (DataFrame): The CPS person table.",
     "id": "add_personal_variables",
     "kind": "function",
-    "line": 1059,
+    "line": 1105,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.cps.add_personal_variables"
@@ -223,7 +223,7 @@
     "docstring": "",
     "id": "add_previous_year_income",
     "kind": "function",
-    "line": 1573,
+    "line": 1709,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.cps.add_previous_year_income"
@@ -250,7 +250,7 @@
     "docstring": "",
     "id": "add_rent",
     "kind": "function",
-    "line": 371,
+    "line": 417,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.cps.add_rent"
@@ -277,7 +277,7 @@
     "docstring": "",
     "id": "add_spm_variables",
     "kind": "function",
-    "line": 1492,
+    "line": 1628,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.cps.add_spm_variables"
@@ -304,7 +304,7 @@
     "docstring": "Assign SSN card type using PRCITSHP, employment status, and ASEC-UA conditions.\nCodes:\n- 0: \"NONE\" - Likely undocumented immigrants\n- 1: \"CITIZEN\" - US citizens (born or naturalized)\n- 2: \"NON_CITIZEN_VALID_EAD\" - Non-citizens with work/study authorization\n- 3: \"OTHER_NON_CITIZEN\" - Non-citizens with indicators of legal status",
     "id": "add_ssn_card_type",
     "kind": "function",
-    "line": 1679,
+    "line": 1815,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.cps.add_ssn_card_type"
@@ -331,7 +331,7 @@
     "docstring": "",
     "id": "add_takeup",
     "kind": "function",
-    "line": 519,
+    "line": 565,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.cps.add_takeup"
@@ -358,7 +358,7 @@
     "docstring": "",
     "id": "add_tips",
     "kind": "function",
-    "line": 2578,
+    "line": 2714,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.cps.add_tips"
@@ -815,7 +815,7 @@
     "docstring": "Replace clone-half person-level feature variables with donor matches.",
     "id": "clone_features",
     "kind": "function",
-    "line": 585,
+    "line": 604,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.extended_cps._splice_clone_feature_predictions"
@@ -878,7 +878,7 @@
     "docstring": "Assert that final exported variables are leaf inputs.",
     "id": "computed_export_contract",
     "kind": "function",
-    "line": 1775,
+    "line": 1795,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.extended_cps.ExtendedCPS._assert_no_computed_variables_exported"
@@ -972,7 +972,7 @@
     "docstring": "Second-stage QRF: train on CPS, predict for PUF clones.\n\nFor the PUF clone half of the extended CPS we need plausible values\nof CPS-only variables (retirement distributions, transfers, hours,\nSPM components, etc.) that are consistent with the clone's\nPUF-imputed income -- not just naively copied from the CPS donor.\n\nWe train a QRF on CPS person-level data where:\n  * predictors = demographics + key income variables\n  * outputs    = CPS-only variables listed in\n                 ``CPS_ONLY_IMPUTED_VARIABLES``\n\nFor PUF clone prediction we use the PUF-imputed income values\nfrom the second half of ``data`` (the clone half, which already\nhas PUF-imputed income from stage 1).\n\nUses ``fit_predict()`` with ``max_train_samples`` instead of\nmanual sampling + separate fit/predict.\n\nArgs:\n    data: Extended dataset dict after ``puf_clone_dataset()`` --\n        already doubled, with PUF-imputed income in the second half.\n    time_period: Tax year.\n    dataset_path: Path to the CPS h5 file for Microsimulation.\n\nReturns:\n    DataFrame with one column per CPS-only variable, containing\n    predicted values for the PUF clone half (person-level).",
     "id": "cps_only",
     "kind": "function",
-    "line": 624,
+    "line": 643,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.extended_cps._impute_cps_only_variables"
@@ -1064,7 +1064,7 @@
     "docstring": "Subsample the loaded CPS dataset and preserve downsampled arrays.\n\nArgs:\n    frac: Fraction of records to retain.",
     "id": "downsample",
     "kind": "function",
-    "line": 338,
+    "line": 384,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.cps.CPS.downsample"
@@ -1325,7 +1325,7 @@
     "docstring": "Check formula-reconstructed housing assistance before export.\n\nThe final H5 must not export formula outputs such as ``housing_assistance``.\nThis guard verifies that the remaining leaf inputs still make those\nformulas produce nonzero values before the export contract strips or\nrejects computed variables.",
     "id": "housing_assistance_microsim_validation",
     "kind": "function",
-    "line": 1545,
+    "line": 1565,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.extended_cps.ExtendedCPS._validate_housing_assistance_microsimulation"
@@ -3216,7 +3216,7 @@
     "docstring": "Replace PUF clone half of CPS-only variables with QRF predictions.\n\nAfter ``puf_clone_dataset()`` the CPS-only variables in the second\nhalf are naive copies of the CPS donor values. This function\nreplaces them with the second-stage QRF predictions that are\nconsistent with the clone's PUF-imputed income.\n\nArgs:\n    data: Extended dataset dict (already doubled).\n    predictions: DataFrame from ``_impute_cps_only_variables()``.\n    time_period: Tax year.\n    dataset_path: Path to CPS h5 file for entity mapping.\n\nReturns:\n    Modified data dict with CPS-only variables spliced in.",
     "id": "qrf_pass2",
     "kind": "function",
-    "line": 1015,
+    "line": 1034,
     "metadata": {
       "api_refs": [
         "policyengine_us_data.datasets.cps.extended_cps._splice_cps_only_predictions"
diff --git a/docs/generated/pipeline_map.json b/docs/generated/pipeline_map.json
@@ -2739,7 +2739,7 @@
           "api_refs": [
             "policyengine_us_data.datasets.cps.cps.add_org_labor_market_inputs"
           ],
-          "description": "Impute hourly wage, hourly-pay status, and union coverage from CPS ORG donors.",
+          "description": "Impute hourly wage, hourly-pay status, and union coverage from CPS ORG donors, then derive FLSA overtime premium.",
           "id": "add_org_inputs",
           "label": "ORG Labor-Market Inputs",
           "node_type": "library",

Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"base_release_version": "1.115.5",`
`3`		`- "candidate_scope": "1.115.5-patch",`
`4`		`- "release_bump": "patch",`
`5`		`- "run_id": "usdata-gha26360054055-a1",`
`6`		`- "would_release_as_at_build_time": "1.115.6"`
	`3`	`+ "candidate_scope": "1.115.5-minor",`
	`4`	`+ "release_bump": "minor",`
	`5`	`+ "run_id": "usdata-gha26379540291-a1",`
	`6`	`+ "would_release_as_at_build_time": "1.116.0"`
`7`	`7`	`}`