|
31 | 31 | save_bytes, |
32 | 32 | ) |
33 | 33 | from policyengine_us_data.utils.soi import get_tracked_soi_row |
| 34 | +from policyengine_us_data.storage.calibration_targets.pull_soi_targets import ( |
| 35 | + STATE_ABBR_TO_FIPS, |
| 36 | +) |
| 37 | +from policyengine_us_data.storage.calibration_targets.refresh_soi_table_targets import ( |
| 38 | + _load_workbook, |
| 39 | + _scaled_cell, |
| 40 | +) |
34 | 41 |
|
35 | 42 | logger = logging.getLogger(__name__) |
36 | 43 |
|
|
57 | 64 | 9: (500_000, np.inf), # $500,000 or more |
58 | 65 | } |
59 | 66 |
|
| 67 | +STATE_FINE_AGI_STUBS = { |
| 68 | + 9: (500_000, 1_000_000), # $500,000 under $1,000,000 |
| 69 | + 10: (1_000_000, np.inf), # $1,000,000 or more |
| 70 | +} |
| 71 | + |
| 72 | +NATIONAL_FINE_AGI_BRACKETS = { |
| 73 | + 23: (500_000, 1_000_000), # Table 1.4 row 23 |
| 74 | + 24: (1_000_000, 1_500_000), # row 24 |
| 75 | + 25: (1_500_000, 2_000_000), # row 25 |
| 76 | + 26: (2_000_000, 5_000_000), # row 26 |
| 77 | + 27: (5_000_000, 10_000_000), # row 27 |
| 78 | + 28: (10_000_000, np.inf), # row 28 |
| 79 | +} |
| 80 | + |
60 | 81 | # These variables map cleanly from Publication 1304 aggregate tables to the |
61 | 82 | # existing national IRS-SOI domain strata. We intentionally leave `aca_ptc` |
62 | 83 | # and `refundable_ctc` on the geography-file path for now because the |
@@ -396,6 +417,179 @@ def load_national_workbook_soi_targets( |
396 | 417 | ) |
397 | 418 |
|
398 | 419 |
|
| 420 | +def extract_state_fine_agi_data(year: int) -> pd.DataFrame: |
| 421 | + """Download the state-level SOI file (in55cmcsv) with stubs 9 and 10.""" |
| 422 | + year_prefix = _year_prefix(year) |
| 423 | + cache_file = f"irs_soi_{year_prefix}in55cmcsv.csv" |
| 424 | + if is_cached(cache_file): |
| 425 | + logger.info(f"Using cached {cache_file}") |
| 426 | + df = pd.read_csv(cache_path(cache_file), thousands=",") |
| 427 | + else: |
| 428 | + import requests |
| 429 | + |
| 430 | + url = f"https://www.irs.gov/pub/irs-soi/{year_prefix}in55cmcsv.csv" |
| 431 | + response = requests.get(url) |
| 432 | + response.raise_for_status() |
| 433 | + save_bytes(cache_file, response.content) |
| 434 | + df = pd.read_csv(cache_path(cache_file), thousands=",") |
| 435 | + |
| 436 | + df = df[df["AGI_STUB"].isin(STATE_FINE_AGI_STUBS.keys())] |
| 437 | + df = df[df["STATE"].isin(STATE_ABBR_TO_FIPS.keys())] |
| 438 | + return df |
| 439 | + |
| 440 | + |
| 441 | +def load_state_fine_agi_targets( |
| 442 | + session: Session, filer_strata: dict, year: int |
| 443 | +) -> None: |
| 444 | + """Create strata and targets for state-level fine AGI brackets (stubs 9/10).""" |
| 445 | + df = extract_state_fine_agi_data(year) |
| 446 | + |
| 447 | + for _, row in df.iterrows(): |
| 448 | + state_abbr = row["STATE"] |
| 449 | + stub = int(row["AGI_STUB"]) |
| 450 | + fips_str = STATE_ABBR_TO_FIPS[state_abbr] |
| 451 | + fips_int = int(fips_str) |
| 452 | + lower, upper = STATE_FINE_AGI_STUBS[stub] |
| 453 | + |
| 454 | + parent_stratum_id = filer_strata["state"][fips_int] |
| 455 | + note = f"State FIPS {fips_int} filers, AGI >= {lower}, AGI < {upper}" |
| 456 | + |
| 457 | + existing = ( |
| 458 | + session.query(Stratum) |
| 459 | + .filter( |
| 460 | + Stratum.parent_stratum_id == parent_stratum_id, |
| 461 | + Stratum.notes == note, |
| 462 | + ) |
| 463 | + .first() |
| 464 | + ) |
| 465 | + |
| 466 | + if existing: |
| 467 | + stratum = existing |
| 468 | + else: |
| 469 | + stratum = Stratum( |
| 470 | + parent_stratum_id=parent_stratum_id, |
| 471 | + notes=note, |
| 472 | + ) |
| 473 | + stratum.constraints_rel.extend( |
| 474 | + [ |
| 475 | + StratumConstraint( |
| 476 | + constraint_variable="tax_unit_is_filer", |
| 477 | + operation="==", |
| 478 | + value="1", |
| 479 | + ), |
| 480 | + StratumConstraint( |
| 481 | + constraint_variable="state_fips", |
| 482 | + operation="==", |
| 483 | + value=str(fips_int), |
| 484 | + ), |
| 485 | + StratumConstraint( |
| 486 | + constraint_variable="adjusted_gross_income", |
| 487 | + operation=">=", |
| 488 | + value=str(lower), |
| 489 | + ), |
| 490 | + StratumConstraint( |
| 491 | + constraint_variable="adjusted_gross_income", |
| 492 | + operation="<", |
| 493 | + value=str(upper), |
| 494 | + ), |
| 495 | + ] |
| 496 | + ) |
| 497 | + session.add(stratum) |
| 498 | + session.flush() |
| 499 | + |
| 500 | + person_count = float(row["N2"]) |
| 501 | + agi_amount = float(row["A00100"]) * 1000 |
| 502 | + |
| 503 | + _upsert_target( |
| 504 | + session, |
| 505 | + stratum_id=stratum.stratum_id, |
| 506 | + variable="person_count", |
| 507 | + period=year, |
| 508 | + value=person_count, |
| 509 | + source="IRS SOI", |
| 510 | + notes=f"State fine AGI stub {stub} from in55cmcsv", |
| 511 | + ) |
| 512 | + _upsert_target( |
| 513 | + session, |
| 514 | + stratum_id=stratum.stratum_id, |
| 515 | + variable="adjusted_gross_income", |
| 516 | + period=year, |
| 517 | + value=agi_amount, |
| 518 | + source="IRS SOI", |
| 519 | + notes=f"State fine AGI stub {stub} from in55cmcsv", |
| 520 | + ) |
| 521 | + |
| 522 | + |
| 523 | +def load_national_fine_agi_targets( |
| 524 | + session: Session, national_filer_stratum_id: int, target_year: int |
| 525 | +) -> None: |
| 526 | + """Create strata and targets for national fine AGI brackets from Table 1.4.""" |
| 527 | + workbook = _load_workbook("Table 1.4", target_year) |
| 528 | + |
| 529 | + for excel_row, (lower, upper) in NATIONAL_FINE_AGI_BRACKETS.items(): |
| 530 | + note = f"National filers, AGI >= {lower}, AGI < {upper}" |
| 531 | + |
| 532 | + existing = ( |
| 533 | + session.query(Stratum) |
| 534 | + .filter( |
| 535 | + Stratum.parent_stratum_id == national_filer_stratum_id, |
| 536 | + Stratum.notes == note, |
| 537 | + ) |
| 538 | + .first() |
| 539 | + ) |
| 540 | + |
| 541 | + if existing: |
| 542 | + stratum = existing |
| 543 | + else: |
| 544 | + stratum = Stratum( |
| 545 | + parent_stratum_id=national_filer_stratum_id, |
| 546 | + notes=note, |
| 547 | + ) |
| 548 | + stratum.constraints_rel.extend( |
| 549 | + [ |
| 550 | + StratumConstraint( |
| 551 | + constraint_variable="tax_unit_is_filer", |
| 552 | + operation="==", |
| 553 | + value="1", |
| 554 | + ), |
| 555 | + StratumConstraint( |
| 556 | + constraint_variable="adjusted_gross_income", |
| 557 | + operation=">=", |
| 558 | + value=str(lower), |
| 559 | + ), |
| 560 | + StratumConstraint( |
| 561 | + constraint_variable="adjusted_gross_income", |
| 562 | + operation="<", |
| 563 | + value=str(upper), |
| 564 | + ), |
| 565 | + ] |
| 566 | + ) |
| 567 | + session.add(stratum) |
| 568 | + session.flush() |
| 569 | + |
| 570 | + count_value = _scaled_cell(workbook, excel_row, "B", is_count=True) |
| 571 | + agi_value = _scaled_cell(workbook, excel_row, "C", is_count=False) |
| 572 | + |
| 573 | + _upsert_target( |
| 574 | + session, |
| 575 | + stratum_id=stratum.stratum_id, |
| 576 | + variable="tax_unit_count", |
| 577 | + period=target_year, |
| 578 | + value=count_value, |
| 579 | + source="IRS SOI", |
| 580 | + notes=f"Table 1.4 row {excel_row} fine AGI bracket", |
| 581 | + ) |
| 582 | + _upsert_target( |
| 583 | + session, |
| 584 | + stratum_id=stratum.stratum_id, |
| 585 | + variable="adjusted_gross_income", |
| 586 | + period=target_year, |
| 587 | + value=agi_value, |
| 588 | + source="IRS SOI", |
| 589 | + notes=f"Table 1.4 row {excel_row} fine AGI bracket", |
| 590 | + ) |
| 591 | + |
| 592 | + |
399 | 593 | def transform_soi_data(raw_df): |
400 | 594 |
|
401 | 595 | TARGETS = [ |
@@ -645,7 +839,9 @@ def load_soi_data(long_dfs, year, national_year: Optional[int] = None): |
645 | 839 | filer_strata["national"], |
646 | 840 | national_year, |
647 | 841 | ) |
| 842 | + load_national_fine_agi_targets(session, filer_strata["national"], national_year) |
648 | 843 |
|
| 844 | + load_state_fine_agi_targets(session, filer_strata, year) |
649 | 845 | session.commit() |
650 | 846 |
|
651 | 847 | # Load EITC data -------------------------------------------------------- |
|
0 commit comments