Skip to content

Commit 01bce1c

Browse files
committed
Add CTC calibration follow-up targets and diagnostics
1 parent c9a7787 commit 01bce1c

9 files changed

Lines changed: 939 additions & 30 deletions

File tree

changelog.d/719.added.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Added richer national CTC calibration and validation coverage by loading AGI-split refundable and nonrefundable CTC targets from IRS geography data, expanding CTC diagnostics to AGI-by-filing-status and child-composition tables, and reporting a canonical ARPA-style CTC reform in national H5 validation.

policyengine_us_data/calibration/ctc_diagnostics.py

Lines changed: 160 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,28 @@
4040
"non_refundable_ctc",
4141
]
4242

43+
CHILD_AGE_GROUP_COLUMNS = [
44+
"tax_unit_count",
45+
"ctc_qualifying_children",
46+
"ctc_recipient_count",
47+
"refundable_ctc_recipient_count",
48+
"non_refundable_ctc_recipient_count",
49+
]
50+
51+
COUNT_FORMAT_COLUMNS = {
52+
"tax_unit_count",
53+
"ctc_qualifying_children",
54+
"ctc_recipient_count",
55+
"refundable_ctc_recipient_count",
56+
"non_refundable_ctc_recipient_count",
57+
}
58+
59+
AMOUNT_FORMAT_COLUMNS = {
60+
"ctc",
61+
"refundable_ctc",
62+
"non_refundable_ctc",
63+
}
64+
4365

4466
def _assign_agi_bands(adjusted_gross_income: np.ndarray) -> pd.Categorical:
4567
labels = [label for _, _, label in IRS_AGI_BANDS]
@@ -58,15 +80,19 @@ def _normalize_filing_status(filing_status: pd.Series) -> pd.Categorical:
5880
return pd.Categorical(labels, categories=FILING_STATUS_ORDER, ordered=True)
5981

6082

61-
def build_ctc_diagnostic_tables(frame: pd.DataFrame) -> dict[str, pd.DataFrame]:
62-
"""Aggregate weighted CTC diagnostics by AGI band and filing status."""
63-
work = frame.copy()
64-
weights = work["tax_unit_weight"].astype(float).to_numpy()
83+
def _assign_ctc_child_count_buckets(
84+
ctc_qualifying_children: np.ndarray,
85+
) -> pd.Categorical:
86+
labels = ["0", "1", "2", "3+"]
87+
bucket = np.full(len(ctc_qualifying_children), labels[-1], dtype=object)
88+
bucket[ctc_qualifying_children <= 0] = "0"
89+
bucket[ctc_qualifying_children == 1] = "1"
90+
bucket[ctc_qualifying_children == 2] = "2"
91+
return pd.Categorical(bucket, categories=labels, ordered=True)
6592

66-
work["agi_band"] = _assign_agi_bands(
67-
work["adjusted_gross_income"].astype(float).to_numpy()
68-
)
69-
work["filing_status_group"] = _normalize_filing_status(work["filing_status"])
93+
94+
def _add_weighted_ctc_columns(work: pd.DataFrame) -> pd.DataFrame:
95+
weights = work["tax_unit_weight"].astype(float).to_numpy()
7096

7197
work["tax_unit_count"] = weights
7298
work["ctc_qualifying_children"] = (
@@ -87,6 +113,71 @@ def build_ctc_diagnostic_tables(frame: pd.DataFrame) -> dict[str, pd.DataFrame]:
87113
work["non_refundable_ctc"].astype(float).to_numpy() * weights
88114
)
89115

116+
return work
117+
118+
119+
def _build_child_age_table(work: pd.DataFrame) -> pd.DataFrame | None:
120+
if (
121+
"ctc_qualifying_children_under_6" not in work
122+
or "ctc_qualifying_children_6_to_17" not in work
123+
):
124+
return None
125+
126+
weights = work["tax_unit_weight"].astype(float).to_numpy()
127+
ctc_positive = work["ctc"].astype(float).to_numpy() > 0
128+
refundable_positive = work["refundable_ctc"].astype(float).to_numpy() > 0
129+
non_refundable_positive = (
130+
work["non_refundable_ctc"].astype(float).to_numpy() > 0
131+
)
132+
133+
rows = []
134+
for label, child_counts in (
135+
(
136+
"Under 6",
137+
work["ctc_qualifying_children_under_6"].astype(float).to_numpy(),
138+
),
139+
(
140+
"Age 6-17",
141+
work["ctc_qualifying_children_6_to_17"].astype(float).to_numpy(),
142+
),
143+
):
144+
has_children = child_counts > 0
145+
rows.append(
146+
{
147+
"group": label,
148+
"tax_unit_count": float((has_children.astype(float) * weights).sum()),
149+
"ctc_qualifying_children": float((child_counts * weights).sum()),
150+
"ctc_recipient_count": float(
151+
((ctc_positive & has_children).astype(float) * weights).sum()
152+
),
153+
"refundable_ctc_recipient_count": float(
154+
(
155+
(refundable_positive & has_children).astype(float) * weights
156+
).sum()
157+
),
158+
"non_refundable_ctc_recipient_count": float(
159+
(
160+
(non_refundable_positive & has_children).astype(float) * weights
161+
).sum()
162+
),
163+
}
164+
)
165+
166+
return pd.DataFrame(rows, columns=["group"] + CHILD_AGE_GROUP_COLUMNS)
167+
168+
169+
def build_ctc_diagnostic_tables(frame: pd.DataFrame) -> dict[str, pd.DataFrame]:
170+
"""Aggregate weighted CTC diagnostics by AGI band and filing status."""
171+
work = frame.copy()
172+
child_counts = work["ctc_qualifying_children"].astype(float).to_numpy()
173+
174+
work["agi_band"] = _assign_agi_bands(
175+
work["adjusted_gross_income"].astype(float).to_numpy()
176+
)
177+
work["filing_status_group"] = _normalize_filing_status(work["filing_status"])
178+
work["child_count_group"] = _assign_ctc_child_count_buckets(child_counts)
179+
work = _add_weighted_ctc_columns(work)
180+
90181
by_agi = (
91182
work.groupby("agi_band", observed=False)[CTC_GROUP_COLUMNS]
92183
.sum()
@@ -99,26 +190,73 @@ def build_ctc_diagnostic_tables(frame: pd.DataFrame) -> dict[str, pd.DataFrame]:
99190
.reset_index()
100191
.rename(columns={"filing_status_group": "group"})
101192
)
193+
by_agi_band_and_filing_status = (
194+
work.groupby(["agi_band", "filing_status_group"], observed=False)[
195+
CTC_GROUP_COLUMNS
196+
]
197+
.sum()
198+
.reset_index()
199+
.rename(columns={"filing_status_group": "filing_status"})
200+
)
201+
by_child_count = (
202+
work.groupby("child_count_group", observed=False)[CTC_GROUP_COLUMNS]
203+
.sum()
204+
.reset_index()
205+
.rename(columns={"child_count_group": "group"})
206+
)
207+
by_child_age = _build_child_age_table(frame)
102208

103-
return {
209+
tables = {
104210
"by_agi_band": by_agi,
105211
"by_filing_status": by_filing_status,
212+
"by_agi_band_and_filing_status": by_agi_band_and_filing_status,
213+
"by_child_count": by_child_count,
106214
}
215+
if by_child_age is not None:
216+
tables["by_child_age"] = by_child_age
217+
return tables
107218

108219

109-
def create_ctc_diagnostic_tables(sim) -> dict[str, pd.DataFrame]:
220+
def create_ctc_diagnostic_tables(sim, period=None) -> dict[str, pd.DataFrame]:
110221
"""Calculate weighted CTC diagnostic tables from a microsimulation."""
111222
frame = pd.DataFrame(
112223
{
113-
"adjusted_gross_income": sim.calculate("adjusted_gross_income").values,
114-
"filing_status": sim.calculate("filing_status").values,
115-
"tax_unit_weight": sim.calculate("tax_unit_weight").values,
116-
"ctc_qualifying_children": sim.calculate("ctc_qualifying_children").values,
117-
"ctc": sim.calculate("ctc").values,
118-
"refundable_ctc": sim.calculate("refundable_ctc").values,
119-
"non_refundable_ctc": sim.calculate("non_refundable_ctc").values,
224+
"adjusted_gross_income": sim.calculate(
225+
"adjusted_gross_income", period=period
226+
).values,
227+
"filing_status": sim.calculate("filing_status", period=period).values,
228+
"tax_unit_weight": sim.calculate("tax_unit_weight", period=period).values,
229+
"ctc_qualifying_children": sim.calculate(
230+
"ctc_qualifying_children", period=period
231+
).values,
232+
"ctc": sim.calculate("ctc", period=period).values,
233+
"refundable_ctc": sim.calculate("refundable_ctc", period=period).values,
234+
"non_refundable_ctc": sim.calculate(
235+
"non_refundable_ctc", period=period
236+
).values,
120237
}
121238
)
239+
240+
try:
241+
ctc_qualifying_child = sim.calculate(
242+
"ctc_qualifying_child",
243+
map_to="person",
244+
period=period,
245+
).values.astype(bool)
246+
age = sim.calculate("age", map_to="person", period=period).values.astype(float)
247+
frame["ctc_qualifying_children_under_6"] = sim.map_result(
248+
(ctc_qualifying_child & (age < 6)).astype(float),
249+
"person",
250+
"tax_unit",
251+
)
252+
frame["ctc_qualifying_children_6_to_17"] = sim.map_result(
253+
(ctc_qualifying_child & (age >= 6) & (age < 18)).astype(float),
254+
"person",
255+
"tax_unit",
256+
)
257+
except Exception:
258+
pass
259+
122260
return build_ctc_diagnostic_tables(frame)
123261

124262

@@ -132,14 +270,9 @@ def _format_amount(value: float) -> str:
132270

133271
def format_ctc_diagnostic_table(table: pd.DataFrame) -> str:
134272
display = table.copy()
135-
for column in [
136-
"tax_unit_count",
137-
"ctc_qualifying_children",
138-
"ctc_recipient_count",
139-
"refundable_ctc_recipient_count",
140-
"non_refundable_ctc_recipient_count",
141-
]:
142-
display[column] = display[column].map(_format_count)
143-
for column in ["ctc", "refundable_ctc", "non_refundable_ctc"]:
144-
display[column] = display[column].map(_format_amount)
273+
for column in display.columns:
274+
if column in COUNT_FORMAT_COLUMNS:
275+
display[column] = display[column].map(_format_count)
276+
elif column in AMOUNT_FORMAT_COLUMNS:
277+
display[column] = display[column].map(_format_amount)
145278
return display.to_string(index=False)

policyengine_us_data/calibration/target_config.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,15 @@ include:
154154
- variable: refundable_ctc
155155
geo_level: national
156156
domain_variable: refundable_ctc
157+
- variable: refundable_ctc
158+
geo_level: national
159+
domain_variable: adjusted_gross_income,refundable_ctc
157160
- variable: non_refundable_ctc
158161
geo_level: national
159162
domain_variable: non_refundable_ctc
163+
- variable: non_refundable_ctc
164+
geo_level: national
165+
domain_variable: adjusted_gross_income,non_refundable_ctc
160166
- variable: self_employment_income
161167
geo_level: national
162168
domain_variable: self_employment_income
@@ -181,9 +187,15 @@ include:
181187
- variable: tax_unit_count
182188
geo_level: national
183189
domain_variable: refundable_ctc
190+
- variable: tax_unit_count
191+
geo_level: national
192+
domain_variable: adjusted_gross_income,refundable_ctc
184193
- variable: tax_unit_count
185194
geo_level: national
186195
domain_variable: non_refundable_ctc
196+
- variable: tax_unit_count
197+
geo_level: national
198+
domain_variable: adjusted_gross_income,non_refundable_ctc
187199
# Restore old loss.py's self-employment return-count target.
188200
- variable: tax_unit_count
189201
geo_level: national

0 commit comments

Comments
 (0)