Skip to content

Commit 21e8e9a

Browse files
MaxGhenisclaude
andauthored
Switch from black to ruff format (#577)
Replace black with ruff as the code formatter across pyproject.toml, Makefile, and CI workflows. Reformat all files with ruff defaults. Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent d6ebf70 commit 21e8e9a

113 files changed

Lines changed: 1024 additions & 2134 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/bump_version.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,7 @@ def get_current_version(pyproject_path: Path) -> str:
1919

2020
def infer_bump(changelog_dir: Path) -> str:
2121
fragments = [
22-
f
23-
for f in changelog_dir.iterdir()
24-
if f.is_file() and f.name != ".gitkeep"
22+
f for f in changelog_dir.iterdir() if f.is_file() and f.name != ".gitkeep"
2523
]
2624
if not fragments:
2725
print("No changelog fragments found", file=sys.stderr)

.github/workflows/reusable_lint.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ jobs:
88
runs-on: ubuntu-latest
99
steps:
1010
- uses: actions/checkout@v4
11+
- name: Install ruff
12+
run: pip install ruff>=0.9.0
1113
- name: Check formatting
12-
uses: "lgeiger/black-action@master"
13-
with:
14-
args: ". -l 79 --check"
14+
run: ruff format --check .

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ HF_CLONE_DIR ?= $(HOME)/huggingface/policyengine-us-data
55
all: data test
66

77
format:
8-
black . -l 79
8+
ruff format .
99

1010
test:
1111
pytest
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Switch from black to ruff format.

docs/calibration_matrix.ipynb

Lines changed: 81 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,28 @@
2727
"execution_count": null,
2828
"metadata": {},
2929
"outputs": [],
30-
"source": "import numpy as np\nimport pandas as pd\nfrom policyengine_us import Microsimulation\nfrom policyengine_us_data.storage import STORAGE_FOLDER\nfrom policyengine_us_data.calibration.unified_matrix_builder import (\n UnifiedMatrixBuilder,\n)\nfrom policyengine_us_data.calibration.clone_and_assign import (\n assign_random_geography,\n)\nfrom policyengine_us_data.datasets.cps.local_area_calibration.calibration_utils import (\n create_target_groups,\n drop_target_groups,\n get_geo_level,\n STATE_CODES,\n)\n\ndb_path = STORAGE_FOLDER / \"calibration\" / \"policy_data.db\"\ndb_uri = f\"sqlite:///{db_path}\"\ndataset_path = STORAGE_FOLDER / \"stratified_extended_cps_2024.h5\""
30+
"source": [
31+
"import numpy as np\n",
32+
"import pandas as pd\n",
33+
"from policyengine_us import Microsimulation\n",
34+
"from policyengine_us_data.storage import STORAGE_FOLDER\n",
35+
"from policyengine_us_data.calibration.unified_matrix_builder import (\n",
36+
" UnifiedMatrixBuilder,\n",
37+
")\n",
38+
"from policyengine_us_data.calibration.clone_and_assign import (\n",
39+
" assign_random_geography,\n",
40+
")\n",
41+
"from policyengine_us_data.datasets.cps.local_area_calibration.calibration_utils import (\n",
42+
" create_target_groups,\n",
43+
" drop_target_groups,\n",
44+
" get_geo_level,\n",
45+
" STATE_CODES,\n",
46+
")\n",
47+
"\n",
48+
"db_path = STORAGE_FOLDER / \"calibration\" / \"policy_data.db\"\n",
49+
"db_uri = f\"sqlite:///{db_path}\"\n",
50+
"dataset_path = STORAGE_FOLDER / \"stratified_extended_cps_2024.h5\""
51+
]
3152
},
3253
{
3354
"cell_type": "code",
@@ -82,7 +103,19 @@
82103
"execution_count": null,
83104
"metadata": {},
84105
"outputs": [],
85-
"source": "print(f\"Targets: {X_sparse.shape[0]}\")\nprint(f\"Columns: {X_sparse.shape[1]:,} ({N_CLONES} clones x {n_records:,} records)\")\nprint(f\"Non-zeros: {X_sparse.nnz:,}\")\nprint(f\"Density: {X_sparse.nnz / (X_sparse.shape[0] * X_sparse.shape[1]):.6f}\")\n\ngeo_levels = targets_df[\"geographic_id\"].apply(get_geo_level)\nlevel_names = {0: \"National\", 1: \"State\", 2: \"District\"}\nfor level in [0, 1, 2]:\n n = (geo_levels == level).sum()\n if n > 0:\n print(f\" {level_names[level]}: {n} targets\")"
106+
"source": [
107+
"print(f\"Targets: {X_sparse.shape[0]}\")\n",
108+
"print(f\"Columns: {X_sparse.shape[1]:,} ({N_CLONES} clones x {n_records:,} records)\")\n",
109+
"print(f\"Non-zeros: {X_sparse.nnz:,}\")\n",
110+
"print(f\"Density: {X_sparse.nnz / (X_sparse.shape[0] * X_sparse.shape[1]):.6f}\")\n",
111+
"\n",
112+
"geo_levels = targets_df[\"geographic_id\"].apply(get_geo_level)\n",
113+
"level_names = {0: \"National\", 1: \"State\", 2: \"District\"}\n",
114+
"for level in [0, 1, 2]:\n",
115+
" n = (geo_levels == level).sum()\n",
116+
" if n > 0:\n",
117+
" print(f\" {level_names[level]}: {n} targets\")"
118+
]
86119
},
87120
{
88121
"cell_type": "markdown",
@@ -294,14 +327,16 @@
294327
"for gid, info in enumerate(group_info):\n",
295328
" mask = target_groups == gid\n",
296329
" vals = targets_df.loc[mask, \"value\"]\n",
297-
" records.append({\n",
298-
" \"group_id\": gid,\n",
299-
" \"description\": info,\n",
300-
" \"n_targets\": mask.sum(),\n",
301-
" \"min_value\": vals.min(),\n",
302-
" \"median_value\": vals.median(),\n",
303-
" \"max_value\": vals.max(),\n",
304-
" })\n",
330+
" records.append(\n",
331+
" {\n",
332+
" \"group_id\": gid,\n",
333+
" \"description\": info,\n",
334+
" \"n_targets\": mask.sum(),\n",
335+
" \"min_value\": vals.min(),\n",
336+
" \"median_value\": vals.median(),\n",
337+
" \"max_value\": vals.max(),\n",
338+
" }\n",
339+
" )\n",
305340
"\n",
306341
"group_df = pd.DataFrame(records)\n",
307342
"print(group_df.to_string(index=False))"
@@ -431,8 +466,7 @@
431466
" for r in nz_rows[:5]:\n",
432467
" row = targets_df.iloc[r]\n",
433468
" print(\n",
434-
" f\" {row['variable']} (geo={row['geographic_id']}): \"\n",
435-
" f\"{X_sparse[r, col]:.2f}\"\n",
469+
" f\" {row['variable']} (geo={row['geographic_id']}): {X_sparse[r, col]:.2f}\"\n",
436470
" )\n",
437471
" if len(nz_rows) > 5:\n",
438472
" print(f\" ... and {len(nz_rows) - 5} more\")"
@@ -475,7 +509,28 @@
475509
"execution_count": null,
476510
"metadata": {},
477511
"outputs": [],
478-
"source": "nnz_per_row = np.diff(X_sparse.indptr)\nprint(f\"Non-zeros per row:\")\nprint(f\" min: {nnz_per_row.min():,}\")\nprint(f\" median: {int(np.median(nnz_per_row)):,}\")\nprint(f\" mean: {nnz_per_row.mean():,.0f}\")\nprint(f\" max: {nnz_per_row.max():,}\")\n\ngeo_levels = targets_df[\"geographic_id\"].apply(get_geo_level)\nlevel_names = {0: \"National\", 1: \"State\", 2: \"District\"}\nprint(\"\\nBy geographic level:\")\nfor level in [0, 1, 2]:\n mask = (geo_levels == level).values\n if mask.any():\n vals = nnz_per_row[mask]\n print(\n f\" {level_names[level]:10s}: \"\n f\"n={mask.sum():>4d}, \"\n f\"median nnz={int(np.median(vals)):>7,}, \"\n f\"range=[{vals.min():,}, {vals.max():,}]\"\n )"
512+
"source": [
513+
"nnz_per_row = np.diff(X_sparse.indptr)\n",
514+
"print(f\"Non-zeros per row:\")\n",
515+
"print(f\" min: {nnz_per_row.min():,}\")\n",
516+
"print(f\" median: {int(np.median(nnz_per_row)):,}\")\n",
517+
"print(f\" mean: {nnz_per_row.mean():,.0f}\")\n",
518+
"print(f\" max: {nnz_per_row.max():,}\")\n",
519+
"\n",
520+
"geo_levels = targets_df[\"geographic_id\"].apply(get_geo_level)\n",
521+
"level_names = {0: \"National\", 1: \"State\", 2: \"District\"}\n",
522+
"print(\"\\nBy geographic level:\")\n",
523+
"for level in [0, 1, 2]:\n",
524+
" mask = (geo_levels == level).values\n",
525+
" if mask.any():\n",
526+
" vals = nnz_per_row[mask]\n",
527+
" print(\n",
528+
" f\" {level_names[level]:10s}: \"\n",
529+
" f\"n={mask.sum():>4d}, \"\n",
530+
" f\"median nnz={int(np.median(vals)):>7,}, \"\n",
531+
" f\"range=[{vals.min():,}, {vals.max():,}]\"\n",
532+
" )"
533+
]
479534
},
480535
{
481536
"cell_type": "code",
@@ -498,12 +553,16 @@
498553
"clone_nnz = []\n",
499554
"for ci in range(N_CLONES):\n",
500555
" block = X_sparse[:, ci * n_records : (ci + 1) * n_records]\n",
501-
" n_states = len(np.unique(geography.state_fips[ci * n_records : (ci + 1) * n_records]))\n",
502-
" clone_nnz.append({\n",
503-
" \"clone\": ci,\n",
504-
" \"nnz\": block.nnz,\n",
505-
" \"unique_states\": n_states,\n",
506-
" })\n",
556+
" n_states = len(\n",
557+
" np.unique(geography.state_fips[ci * n_records : (ci + 1) * n_records])\n",
558+
" )\n",
559+
" clone_nnz.append(\n",
560+
" {\n",
561+
" \"clone\": ci,\n",
562+
" \"nnz\": block.nnz,\n",
563+
" \"unique_states\": n_states,\n",
564+
" }\n",
565+
" )\n",
507566
"\n",
508567
"clone_df = pd.DataFrame(clone_nnz)\n",
509568
"print(\"Non-zeros per clone block:\")\n",
@@ -666,7 +725,9 @@
666725
}
667726
],
668727
"source": [
669-
"ratios = row_sums[achievable_mask] / targets_filtered.loc[achievable_mask, \"value\"].values\n",
728+
"ratios = (\n",
729+
" row_sums[achievable_mask] / targets_filtered.loc[achievable_mask, \"value\"].values\n",
730+
")\n",
670731
"ratio_df = targets_filtered[achievable_mask].copy()\n",
671732
"ratio_df[\"row_sum\"] = row_sums[achievable_mask]\n",
672733
"ratio_df[\"ratio\"] = ratios\n",

docs/hierarchical_uprating.ipynb

Lines changed: 14 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -264,8 +264,7 @@
264264
],
265265
"source": [
266266
"snap_hh = raw[\n",
267-
" (raw[\"domain_variable\"] == \"snap\")\n",
268-
" & (raw[\"variable\"] == \"household_count\")\n",
267+
" (raw[\"domain_variable\"] == \"snap\") & (raw[\"variable\"] == \"household_count\")\n",
269268
"]\n",
270269
"for level in [\"state\", \"district\"]:\n",
271270
" total = snap_hh[snap_hh[\"geo_level\"] == level][\"value\"].sum()\n",
@@ -333,9 +332,9 @@
333332
"source": [
334333
"raw[\"original_value\"] = raw[\"value\"].copy()\n",
335334
"raw[\"uprating_factor\"] = raw.apply(\n",
336-
" lambda r: builder._get_uprating_info(\n",
337-
" r[\"variable\"], r[\"period\"], uprating_factors\n",
338-
" )[0],\n",
335+
" lambda r: builder._get_uprating_info(r[\"variable\"], r[\"period\"], uprating_factors)[\n",
336+
" 0\n",
337+
" ],\n",
339338
" axis=1,\n",
340339
")\n",
341340
"raw[\"value\"] = raw[\"original_value\"] * raw[\"uprating_factor\"]"
@@ -376,10 +375,7 @@
376375
"sample_states = {6: \"CA\", 48: \"TX\", 36: \"NY\"}\n",
377376
"\n",
378377
"for fips, abbr in sample_states.items():\n",
379-
" rows = raw[\n",
380-
" (raw[\"geo_level\"] == \"state\")\n",
381-
" & (raw[\"geographic_id\"] == str(fips))\n",
382-
" ]\n",
378+
" rows = raw[(raw[\"geo_level\"] == \"state\") & (raw[\"geographic_id\"] == str(fips))]\n",
383379
" for _, r in rows.iterrows():\n",
384380
" print(\n",
385381
" f\" {abbr} [{r['domain_variable']:8s}] \"\n",
@@ -412,9 +408,7 @@
412408
"metadata": {},
413409
"outputs": [],
414410
"source": [
415-
"result = builder._apply_hierarchical_uprating(\n",
416-
" raw, DOMAINS, uprating_factors\n",
417-
")"
411+
"result = builder._apply_hierarchical_uprating(raw, DOMAINS, uprating_factors)"
418412
]
419413
},
420414
{
@@ -454,11 +448,7 @@
454448
" for fips, abbr in sample_states.items():\n",
455449
" cd_state = cd_domain[\n",
456450
" cd_domain[\"geographic_id\"].apply(\n",
457-
" lambda g, s=fips: (\n",
458-
" int(g) // 100 == s\n",
459-
" if g not in (\"US\",)\n",
460-
" else False\n",
461-
" )\n",
451+
" lambda g, s=fips: int(g) // 100 == s if g not in (\"US\",) else False\n",
462452
" )\n",
463453
" ]\n",
464454
" if cd_state.empty:\n",
@@ -474,11 +464,7 @@
474464
" & (raw[\"variable\"] == var)\n",
475465
" & (raw[\"domain_variable\"] == domain)\n",
476466
" ]\n",
477-
" uprated_state = (\n",
478-
" st_row[\"value\"].iloc[0]\n",
479-
" if len(st_row)\n",
480-
" else np.nan\n",
481-
" )\n",
467+
" uprated_state = st_row[\"value\"].iloc[0] if len(st_row) else np.nan\n",
482468
" print(\n",
483469
" f\" {abbr} {var:20s} \"\n",
484470
" f\"hif={hif:.6f} \"\n",
@@ -487,6 +473,7 @@
487473
" f\"uprated_state={uprated_state:>14,.0f}\"\n",
488474
" )\n",
489475
"\n",
476+
"\n",
490477
"show_reconciliation(result, raw, \"aca_ptc\", sample_states)"
491478
]
492479
},
@@ -527,17 +514,15 @@
527514
"]\n",
528515
"\n",
529516
"state_ufs = (\n",
530-
" aca_cds.assign(state_fips=aca_cds[\"geographic_id\"].apply(\n",
531-
" lambda g: int(g) // 100\n",
532-
" ))\n",
517+
" aca_cds.assign(state_fips=aca_cds[\"geographic_id\"].apply(lambda g: int(g) // 100))\n",
533518
" .groupby(\"state_fips\")[\"state_uprating_factor\"]\n",
534519
" .first()\n",
535520
" .sort_values()\n",
536521
")\n",
537522
"\n",
538523
"print(\"ACA PTC uprating factors (aca_ptc = vol_mult * val_mult):\")\n",
539524
"print(f\" {'State FIPS':>12s} {'Factor':>8s}\")\n",
540-
"print(f\" {'─'*12} {'─'*8}\")\n",
525+
"print(f\" {'─' * 12} {'─' * 8}\")\n",
541526
"for fips in list(state_ufs.index[:5]) + [\"...\"] + list(state_ufs.index[-5:]):\n",
542527
" if fips == \"...\":\n",
543528
" print(f\" {'...':>12s}\")\n",
@@ -676,9 +661,7 @@
676661
],
677662
"source": [
678663
"level_counts = (\n",
679-
" result.groupby([\"domain_variable\", \"geo_level\"])\n",
680-
" .size()\n",
681-
" .reset_index(name=\"count\")\n",
664+
" result.groupby([\"domain_variable\", \"geo_level\"]).size().reset_index(name=\"count\")\n",
682665
")\n",
683666
"level_counts"
684667
]
@@ -749,20 +732,14 @@
749732
"checks = 0\n",
750733
"for domain in DOMAINS:\n",
751734
" domain_result = result[result[\"domain_variable\"] == domain]\n",
752-
" cd_result = domain_result[\n",
753-
" domain_result[\"geo_level\"] == \"district\"\n",
754-
" ]\n",
735+
" cd_result = domain_result[domain_result[\"geo_level\"] == \"district\"]\n",
755736
" if cd_result.empty:\n",
756737
" continue\n",
757738
"\n",
758739
" for fips, abbr in sorted(STATE_CODES.items()):\n",
759740
" cd_rows = cd_result[\n",
760741
" cd_result[\"geographic_id\"].apply(\n",
761-
" lambda g, s=fips: (\n",
762-
" int(g) // 100 == s\n",
763-
" if g not in (\"US\",)\n",
764-
" else False\n",
765-
" )\n",
742+
" lambda g, s=fips: int(g) // 100 == s if g not in (\"US\",) else False\n",
766743
" )\n",
767744
" ]\n",
768745
" if cd_rows.empty:\n",

0 commit comments

Comments
 (0)