Skip to content

Commit 0e3d9a4

Browse files
authored
Merge pull request #721 from PolicyEngine/codex/ctc-calibration-followups
Add CTC calibration follow-up targets and validation Times up. Let's do this.
2 parents c9a7787 + 4a24b48 commit 0e3d9a4

9 files changed

Lines changed: 875 additions & 30 deletions

File tree

changelog.d/719.added.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Added richer national CTC calibration and validation coverage by loading AGI-split refundable and nonrefundable CTC targets from IRS geography data, expanding CTC diagnostics to AGI-by-filing-status and child-composition tables, and reporting a canonical ARPA-style CTC reform in national H5 validation.

policyengine_us_data/calibration/ctc_diagnostics.py

Lines changed: 156 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,28 @@
4040
"non_refundable_ctc",
4141
]
4242

43+
CHILD_AGE_GROUP_COLUMNS = [
44+
"tax_unit_count",
45+
"ctc_qualifying_children",
46+
"ctc_recipient_count",
47+
"refundable_ctc_recipient_count",
48+
"non_refundable_ctc_recipient_count",
49+
]
50+
51+
COUNT_FORMAT_COLUMNS = {
52+
"tax_unit_count",
53+
"ctc_qualifying_children",
54+
"ctc_recipient_count",
55+
"refundable_ctc_recipient_count",
56+
"non_refundable_ctc_recipient_count",
57+
}
58+
59+
AMOUNT_FORMAT_COLUMNS = {
60+
"ctc",
61+
"refundable_ctc",
62+
"non_refundable_ctc",
63+
}
64+
4365

4466
def _assign_agi_bands(adjusted_gross_income: np.ndarray) -> pd.Categorical:
4567
labels = [label for _, _, label in IRS_AGI_BANDS]
@@ -58,15 +80,19 @@ def _normalize_filing_status(filing_status: pd.Series) -> pd.Categorical:
5880
return pd.Categorical(labels, categories=FILING_STATUS_ORDER, ordered=True)
5981

6082

61-
def build_ctc_diagnostic_tables(frame: pd.DataFrame) -> dict[str, pd.DataFrame]:
62-
"""Aggregate weighted CTC diagnostics by AGI band and filing status."""
63-
work = frame.copy()
64-
weights = work["tax_unit_weight"].astype(float).to_numpy()
83+
def _assign_ctc_child_count_buckets(
84+
ctc_qualifying_children: np.ndarray,
85+
) -> pd.Categorical:
86+
labels = ["0", "1", "2", "3+"]
87+
bucket = np.full(len(ctc_qualifying_children), labels[-1], dtype=object)
88+
bucket[ctc_qualifying_children <= 0] = "0"
89+
bucket[ctc_qualifying_children == 1] = "1"
90+
bucket[ctc_qualifying_children == 2] = "2"
91+
return pd.Categorical(bucket, categories=labels, ordered=True)
6592

66-
work["agi_band"] = _assign_agi_bands(
67-
work["adjusted_gross_income"].astype(float).to_numpy()
68-
)
69-
work["filing_status_group"] = _normalize_filing_status(work["filing_status"])
93+
94+
def _add_weighted_ctc_columns(work: pd.DataFrame) -> pd.DataFrame:
95+
weights = work["tax_unit_weight"].astype(float).to_numpy()
7096

7197
work["tax_unit_count"] = weights
7298
work["ctc_qualifying_children"] = (
@@ -87,6 +113,67 @@ def build_ctc_diagnostic_tables(frame: pd.DataFrame) -> dict[str, pd.DataFrame]:
87113
work["non_refundable_ctc"].astype(float).to_numpy() * weights
88114
)
89115

116+
return work
117+
118+
119+
def _build_child_age_table(work: pd.DataFrame) -> pd.DataFrame | None:
120+
if (
121+
"ctc_qualifying_children_under_6" not in work
122+
or "ctc_qualifying_children_6_to_17" not in work
123+
):
124+
return None
125+
126+
weights = work["tax_unit_weight"].astype(float).to_numpy()
127+
ctc_positive = work["ctc"].astype(float).to_numpy() > 0
128+
refundable_positive = work["refundable_ctc"].astype(float).to_numpy() > 0
129+
non_refundable_positive = work["non_refundable_ctc"].astype(float).to_numpy() > 0
130+
131+
rows = []
132+
for label, child_counts in (
133+
(
134+
"Under 6",
135+
work["ctc_qualifying_children_under_6"].astype(float).to_numpy(),
136+
),
137+
(
138+
"Age 6-17",
139+
work["ctc_qualifying_children_6_to_17"].astype(float).to_numpy(),
140+
),
141+
):
142+
has_children = child_counts > 0
143+
rows.append(
144+
{
145+
"group": label,
146+
"tax_unit_count": float((has_children.astype(float) * weights).sum()),
147+
"ctc_qualifying_children": float((child_counts * weights).sum()),
148+
"ctc_recipient_count": float(
149+
((ctc_positive & has_children).astype(float) * weights).sum()
150+
),
151+
"refundable_ctc_recipient_count": float(
152+
((refundable_positive & has_children).astype(float) * weights).sum()
153+
),
154+
"non_refundable_ctc_recipient_count": float(
155+
(
156+
(non_refundable_positive & has_children).astype(float) * weights
157+
).sum()
158+
),
159+
}
160+
)
161+
162+
return pd.DataFrame(rows, columns=["group"] + CHILD_AGE_GROUP_COLUMNS)
163+
164+
165+
def build_ctc_diagnostic_tables(frame: pd.DataFrame) -> dict[str, pd.DataFrame]:
166+
"""Aggregate weighted CTC diagnostics by AGI band and filing status."""
167+
work = frame.copy()
168+
child_counts = work["ctc_qualifying_children"].astype(float).to_numpy()
169+
170+
work["agi_band"] = _assign_agi_bands(
171+
work["adjusted_gross_income"].astype(float).to_numpy()
172+
)
173+
work["filing_status_group"] = _normalize_filing_status(work["filing_status"])
174+
work["child_count_group"] = _assign_ctc_child_count_buckets(child_counts)
175+
work = _add_weighted_ctc_columns(work)
176+
90177
by_agi = (
91178
work.groupby("agi_band", observed=False)[CTC_GROUP_COLUMNS]
92179
.sum()
@@ -99,26 +186,73 @@ def build_ctc_diagnostic_tables(frame: pd.DataFrame) -> dict[str, pd.DataFrame]:
99186
.reset_index()
100187
.rename(columns={"filing_status_group": "group"})
101188
)
189+
by_agi_band_and_filing_status = (
190+
work.groupby(["agi_band", "filing_status_group"], observed=False)[
191+
CTC_GROUP_COLUMNS
192+
]
193+
.sum()
194+
.reset_index()
195+
.rename(columns={"filing_status_group": "filing_status"})
196+
)
197+
by_child_count = (
198+
work.groupby("child_count_group", observed=False)[CTC_GROUP_COLUMNS]
199+
.sum()
200+
.reset_index()
201+
.rename(columns={"child_count_group": "group"})
202+
)
203+
by_child_age = _build_child_age_table(frame)
102204

103-
return {
205+
tables = {
104206
"by_agi_band": by_agi,
105207
"by_filing_status": by_filing_status,
208+
"by_agi_band_and_filing_status": by_agi_band_and_filing_status,
209+
"by_child_count": by_child_count,
106210
}
211+
if by_child_age is not None:
212+
tables["by_child_age"] = by_child_age
213+
return tables
107214

108215

109-
def create_ctc_diagnostic_tables(sim) -> dict[str, pd.DataFrame]:
216+
def create_ctc_diagnostic_tables(sim, period=None) -> dict[str, pd.DataFrame]:
110217
"""Calculate weighted CTC diagnostic tables from a microsimulation."""
111218
frame = pd.DataFrame(
112219
{
113-
"adjusted_gross_income": sim.calculate("adjusted_gross_income").values,
114-
"filing_status": sim.calculate("filing_status").values,
115-
"tax_unit_weight": sim.calculate("tax_unit_weight").values,
116-
"ctc_qualifying_children": sim.calculate("ctc_qualifying_children").values,
117-
"ctc": sim.calculate("ctc").values,
118-
"refundable_ctc": sim.calculate("refundable_ctc").values,
119-
"non_refundable_ctc": sim.calculate("non_refundable_ctc").values,
220+
"adjusted_gross_income": sim.calculate(
221+
"adjusted_gross_income", period=period
222+
).values,
223+
"filing_status": sim.calculate("filing_status", period=period).values,
224+
"tax_unit_weight": sim.calculate("tax_unit_weight", period=period).values,
225+
"ctc_qualifying_children": sim.calculate(
226+
"ctc_qualifying_children", period=period
227+
).values,
228+
"ctc": sim.calculate("ctc", period=period).values,
229+
"refundable_ctc": sim.calculate("refundable_ctc", period=period).values,
230+
"non_refundable_ctc": sim.calculate(
231+
"non_refundable_ctc", period=period
232+
).values,
120233
}
121234
)
235+
236+
try:
237+
ctc_qualifying_child = sim.calculate(
238+
"ctc_qualifying_child",
239+
map_to="person",
240+
period=period,
241+
).values.astype(bool)
242+
age = sim.calculate("age", map_to="person", period=period).values.astype(float)
243+
frame["ctc_qualifying_children_under_6"] = sim.map_result(
244+
(ctc_qualifying_child & (age < 6)).astype(float),
245+
"person",
246+
"tax_unit",
247+
)
248+
frame["ctc_qualifying_children_6_to_17"] = sim.map_result(
249+
(ctc_qualifying_child & (age >= 6) & (age < 18)).astype(float),
250+
"person",
251+
"tax_unit",
252+
)
253+
except Exception:
254+
pass
255+
122256
return build_ctc_diagnostic_tables(frame)
123257

124258

@@ -132,14 +266,9 @@ def _format_amount(value: float) -> str:
132266

133267
def format_ctc_diagnostic_table(table: pd.DataFrame) -> str:
134268
display = table.copy()
135-
for column in [
136-
"tax_unit_count",
137-
"ctc_qualifying_children",
138-
"ctc_recipient_count",
139-
"refundable_ctc_recipient_count",
140-
"non_refundable_ctc_recipient_count",
141-
]:
142-
display[column] = display[column].map(_format_count)
143-
for column in ["ctc", "refundable_ctc", "non_refundable_ctc"]:
144-
display[column] = display[column].map(_format_amount)
269+
for column in display.columns:
270+
if column in COUNT_FORMAT_COLUMNS:
271+
display[column] = display[column].map(_format_count)
272+
elif column in AMOUNT_FORMAT_COLUMNS:
273+
display[column] = display[column].map(_format_amount)
145274
return display.to_string(index=False)

policyengine_us_data/calibration/target_config.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,15 @@ include:
154154
- variable: refundable_ctc
155155
geo_level: national
156156
domain_variable: refundable_ctc
157+
- variable: refundable_ctc
158+
geo_level: national
159+
domain_variable: adjusted_gross_income,refundable_ctc
157160
- variable: non_refundable_ctc
158161
geo_level: national
159162
domain_variable: non_refundable_ctc
163+
- variable: non_refundable_ctc
164+
geo_level: national
165+
domain_variable: adjusted_gross_income,non_refundable_ctc
160166
- variable: self_employment_income
161167
geo_level: national
162168
domain_variable: self_employment_income
@@ -181,9 +187,15 @@ include:
181187
- variable: tax_unit_count
182188
geo_level: national
183189
domain_variable: refundable_ctc
190+
- variable: tax_unit_count
191+
geo_level: national
192+
domain_variable: adjusted_gross_income,refundable_ctc
184193
- variable: tax_unit_count
185194
geo_level: national
186195
domain_variable: non_refundable_ctc
196+
- variable: tax_unit_count
197+
geo_level: national
198+
domain_variable: adjusted_gross_income,non_refundable_ctc
187199
# Restore old loss.py's self-employment return-count target.
188200
- variable: tax_unit_count
189201
geo_level: national

0 commit comments

Comments
 (0)