Skip to content

Commit f618250

Browse files
committed
Format long-run calibration files
1 parent d85bc0f commit f618250

18 files changed

Lines changed: 382 additions & 341 deletions

policyengine_us_data/datasets/cps/long_term/assess_calibration_frontier.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -86,13 +86,17 @@ def reorder_to_households(values, order, n_households: int) -> np.ndarray:
8686
return ordered
8787

8888

89-
def build_constraint_inputs(year: int, hh_id_to_idx: dict, n_households: int, profile) -> dict:
89+
def build_constraint_inputs(
90+
year: int, hh_id_to_idx: dict, n_households: int, profile
91+
) -> dict:
9092
sim = Microsimulation(dataset=BASE_DATASET_PATH)
9193
if profile.use_h6_reform:
9294
raise NotImplementedError(
9395
"Frontier assessment for H6-enabled profiles is not yet implemented."
9496
)
95-
household_ids = sim.calculate("household_id", period=year, map_to="household").values
97+
household_ids = sim.calculate(
98+
"household_id", period=year, map_to="household"
99+
).values
96100
if len(household_ids) != n_households:
97101
raise ValueError(
98102
f"Household count mismatch for {year}: {len(household_ids)} vs {n_households}"
@@ -226,10 +230,7 @@ def main() -> int:
226230
}
227231
rows.append(row)
228232
best_case_display = "n/a" if best_case is None else f"{best_case:.3f}%"
229-
print(
230-
f"{year}: best-case max error {best_case_display} -> "
231-
f"{row['quality']}"
232-
)
233+
print(f"{year}: best-case max error {best_case_display} -> {row['quality']}")
233234

234235
if args.output:
235236
output_path = Path(args.output)

policyengine_us_data/datasets/cps/long_term/assess_publishable_horizon.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -119,9 +119,7 @@ def benchmark_tob_values(
119119
else (oasdi_achieved - oasdi_target) / oasdi_target * 100
120120
),
121121
"hi_tob_benchmark_pct_error": (
122-
0.0
123-
if hi_target == 0
124-
else (hi_achieved - hi_target) / hi_target * 100
122+
0.0 if hi_target == 0 else (hi_achieved - hi_target) / hi_target * 100
125123
),
126124
}
127125

@@ -287,9 +285,7 @@ def assess_years(
287285
"validation_issues": str(error),
288286
"runtime_error": str(error),
289287
}
290-
best_case_match = re.search(
291-
r"([0-9.]+)%\s*>\s*([0-9.]+)%", str(error)
292-
)
288+
best_case_match = re.search(r"([0-9.]+)%\s*>\s*([0-9.]+)%", str(error))
293289
if best_case_match:
294290
row["reported_best_case_constraint_error_pct"] = float(
295291
best_case_match.group(1)
@@ -315,7 +311,9 @@ def assess_years(
315311
h6_income_values=None,
316312
h6_revenue_target=None,
317313
oasdi_tob_values=oasdi_tob_values if profile.use_tob else None,
318-
oasdi_tob_target=load_oasdi_tob_projections(year) if profile.use_tob else None,
314+
oasdi_tob_target=load_oasdi_tob_projections(year)
315+
if profile.use_tob
316+
else None,
319317
hi_tob_values=hi_tob_values if profile.use_tob else None,
320318
hi_tob_target=load_hi_tob_projections(year) if profile.use_tob else None,
321319
)

policyengine_us_data/datasets/cps/long_term/benchmark_trustees_bracket_indexing.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,7 @@ def modify_parameters(parameters):
4545
bracket_node = thresholds.get_child(bracket)
4646
for filing_status in FILING_STATUSES:
4747
parameter = bracket_node.get_child(filing_status)
48-
interval = float(
49-
parameter.metadata["uprating"]["rounding"]["interval"]
50-
)
48+
interval = float(parameter.metadata["uprating"]["rounding"]["interval"])
5149

5250
for year in range(start_year, end_year + 1):
5351
previous_value = float(parameter(f"{year - 1}-01-01"))
@@ -66,6 +64,8 @@ def apply(self):
6664
self.modify_parameters(modify_parameters)
6765

6866
return reform
67+
68+
6969
def _coerce_h5_path(raw: str) -> Path:
7070
path = Path(raw).expanduser()
7171
if path.is_dir():

policyengine_us_data/datasets/cps/long_term/build_long_term_target_sources.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,7 @@ def build_oact_source(trustees: pd.DataFrame) -> pd.DataFrame:
4545
missing_years = merged.loc[
4646
merged["oasdi_nominal_delta_billions"].isna(), "year"
4747
].tolist()
48-
raise ValueError(
49-
f"Missing OACT OASDI deltas for years: {missing_years}"
50-
)
48+
raise ValueError(f"Missing OACT OASDI deltas for years: {missing_years}")
5149

5250
merged["oasdi_tob_billions_nominal_usd"] = (
5351
merged["oasdi_tob_billions_nominal_usd"]

policyengine_us_data/datasets/cps/long_term/calibration.py

Lines changed: 18 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -279,9 +279,7 @@ def calibrate_entropy(
279279
baseline_weights = np.asarray(baseline_weights, dtype=float)
280280
gram = A_scaled.T @ (baseline_weights[:, None] * A_scaled)
281281
gram += np.eye(gram.shape[0]) * 1e-12
282-
beta0 = np.linalg.solve(
283-
gram, targets_scaled - (A_scaled.T @ baseline_weights)
284-
)
282+
beta0 = np.linalg.solve(gram, targets_scaled - (A_scaled.T @ baseline_weights))
285283

286284
def objective_gradient_hessian(beta):
287285
eta = np.clip(A_scaled @ beta, -700, 700)
@@ -313,11 +311,7 @@ def _cached_ogh(z):
313311
return None
314312
_, gradient, _ = objective_gradient_hessian(result.x)
315313
max_error = float(
316-
np.max(
317-
100
318-
* np.abs(gradient)
319-
/ np.maximum(np.abs(targets_scaled), 1e-12)
320-
)
314+
np.max(100 * np.abs(gradient) / np.maximum(np.abs(targets_scaled), 1e-12))
321315
)
322316
if max_error > tol * 100:
323317
return None
@@ -343,11 +337,7 @@ def infeasibility_error(prefix):
343337
for iterations in range(1, max_iters + 1):
344338
objective, gradient, hessian = objective_gradient_hessian(beta)
345339
final_max_error = float(
346-
np.max(
347-
100
348-
* np.abs(gradient)
349-
/ np.maximum(np.abs(targets_scaled), 1e-12)
350-
)
340+
np.max(100 * np.abs(gradient) / np.maximum(np.abs(targets_scaled), 1e-12))
351341
)
352342
if final_max_error <= tol * 100:
353343
break
@@ -361,8 +351,8 @@ def infeasibility_error(prefix):
361351
step = 1.0
362352
while step >= 1e-8:
363353
candidate = beta - step * delta
364-
candidate_objective, candidate_gradient, _ = (
365-
objective_gradient_hessian(candidate)
354+
candidate_objective, candidate_gradient, _ = objective_gradient_hessian(
355+
candidate
366356
)
367357
candidate_max_error = float(
368358
np.max(
@@ -477,9 +467,7 @@ def objective_and_gradient(z):
477467
exp_eta = np.exp(eta)
478468
weights = baseline_weights * exp_eta
479469
achieved = A_scaled.T @ weights
480-
objective = float(
481-
np.sum(weights) + upper_bounds @ alpha - lower_bounds @ gamma
482-
)
470+
objective = float(np.sum(weights) + upper_bounds @ alpha - lower_bounds @ gamma)
483471
gradient = np.concatenate(
484472
[
485473
upper_bounds - achieved,
@@ -561,9 +549,7 @@ def objective_with_gradient(z):
561549
)
562550

563551
if best_result is None or best_weights is None:
564-
raise RuntimeError(
565-
"Approximate bounded entropy calibration did not run."
566-
)
552+
raise RuntimeError("Approximate bounded entropy calibration did not run.")
567553

568554
raise RuntimeError(
569555
"Approximate bounded entropy calibration failed: "
@@ -610,8 +596,7 @@ def densify_lp_solution(
610596
lam = (lo + hi) / 2.0
611597
candidate_weights = (1.0 - lam) * lp_weights + lam * baseline_weights
612598
candidate_error_pct = float(
613-
np.max(np.abs(A_scaled.T @ candidate_weights - targets_scaled))
614-
* 100
599+
np.max(np.abs(A_scaled.T @ candidate_weights - targets_scaled)) * 100
615600
)
616601
if candidate_error_pct <= max_constraint_error_pct + 1e-6:
617602
best_lambda = lam
@@ -670,14 +655,11 @@ def calibrate_lp_minimax(
670655

671656
A = aux_df.to_numpy(dtype=float)
672657
targets = np.array(list(controls.values()), dtype=float)
673-
feasibility = assess_nonnegative_feasibility(
674-
A, targets, return_weights=True
675-
)
658+
feasibility = assess_nonnegative_feasibility(A, targets, return_weights=True)
676659
weights = feasibility.get("weights")
677660
if not feasibility["success"] or weights is None:
678661
raise RuntimeError(
679-
"Approximate nonnegative calibration failed: "
680-
f"{feasibility['message']}"
662+
f"Approximate nonnegative calibration failed: {feasibility['message']}"
681663
)
682664

683665
return np.asarray(weights, dtype=float), 1, feasibility
@@ -706,9 +688,7 @@ def assess_nonnegative_feasibility(A, targets, *, return_weights=False):
706688
b_rel = targets / scales
707689

708690
constraint_matrix = sparse.csr_matrix(A_rel)
709-
epsilon_column = sparse.csc_matrix(
710-
np.ones((constraint_matrix.shape[0], 1))
711-
)
691+
epsilon_column = sparse.csc_matrix(np.ones((constraint_matrix.shape[0], 1)))
712692
A_ub = sparse.vstack(
713693
[
714694
sparse.hstack([constraint_matrix, -epsilon_column]),
@@ -886,9 +866,7 @@ def calibrate_weights(
886866
hi_tob_target=hi_tob_target,
887867
n_ages=n_ages,
888868
)
889-
approximate_error_pct = float(
890-
feasibility["best_case_max_pct_error"]
891-
)
869+
approximate_error_pct = float(feasibility["best_case_max_pct_error"])
892870
if approximate_error_pct <= max(tol * 100, 1e-6):
893871
audit["lp_fallback_used"] = True
894872
audit["approximation_method"] = "lp_minimax_exact"
@@ -1019,9 +997,7 @@ def build_calibration_audit(
1019997
):
1020998
achieved_ages = X.T @ weights
1021999
age_errors = (
1022-
np.abs(achieved_ages - y_target)
1023-
/ np.maximum(np.abs(y_target), 1e-10)
1024-
* 100
1000+
np.abs(achieved_ages - y_target) / np.maximum(np.abs(y_target), 1e-10) * 100
10251001
)
10261002

10271003
neg_mask = weights < 0
@@ -1031,12 +1007,8 @@ def build_calibration_audit(
10311007
abs_weight_sum = float(np.sum(np.abs(weights)))
10321008
if weight_sum > 0:
10331009
sorted_weights = np.sort(weights)
1034-
top_10_weight_share_pct = float(
1035-
sorted_weights[-10:].sum() / weight_sum * 100
1036-
)
1037-
top_100_weight_share_pct = float(
1038-
sorted_weights[-100:].sum() / weight_sum * 100
1039-
)
1010+
top_10_weight_share_pct = float(sorted_weights[-10:].sum() / weight_sum * 100)
1011+
top_100_weight_share_pct = float(sorted_weights[-100:].sum() / weight_sum * 100)
10401012
else:
10411013
top_10_weight_share_pct = 0.0
10421014
top_100_weight_share_pct = 0.0
@@ -1051,9 +1023,7 @@ def build_calibration_audit(
10511023
{
10521024
"age_max_pct_error": float(age_errors.max()),
10531025
"negative_weight_count": int(neg_mask.sum()),
1054-
"negative_weight_household_pct": float(
1055-
100 * neg_mask.sum() / len(weights)
1056-
),
1026+
"negative_weight_household_pct": float(100 * neg_mask.sum() / len(weights)),
10571027
"negative_weight_pct": (
10581028
float(100 * negative_values.sum() / abs_weight_sum)
10591029
if abs_weight_sum > 0
@@ -1063,9 +1033,7 @@ def build_calibration_audit(
10631033
float(negative_values.max()) if negative_values.size else 0.0
10641034
),
10651035
"positive_weight_count": int(positive_mask.sum()),
1066-
"positive_weight_pct": float(
1067-
100 * positive_mask.sum() / len(weights)
1068-
),
1036+
"positive_weight_pct": float(100 * positive_mask.sum() / len(weights)),
10691037
"effective_sample_size": effective_sample_size,
10701038
"top_10_weight_share_pct": top_10_weight_share_pct,
10711039
"top_100_weight_share_pct": top_100_weight_share_pct,
@@ -1097,10 +1065,7 @@ def build_calibration_audit(
10971065

10981066
if audit["constraints"]:
10991067
audit["max_constraint_pct_error"] = float(
1100-
max(
1101-
abs(stats["pct_error"])
1102-
for stats in audit["constraints"].values()
1103-
)
1068+
max(abs(stats["pct_error"]) for stats in audit["constraints"].values())
11041069
)
11051070

11061071
return audit

policyengine_us_data/datasets/cps/long_term/calibration_artifacts.py

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,7 @@ def normalize_metadata(metadata: dict[str, Any]) -> dict[str, Any]:
3939
if "max_constraint_pct_error" not in audit:
4040
audit["max_constraint_pct_error"] = float(
4141
max(
42-
(
43-
abs(stats.get("pct_error", 0.0))
44-
for stats in constraints.values()
45-
),
42+
(abs(stats.get("pct_error", 0.0)) for stats in constraints.values()),
4643
default=0.0,
4744
)
4845
)
@@ -186,11 +183,9 @@ def update_dataset_manifest(
186183
)
187184
manifest_profile = json.loads(json.dumps(manifest["profile"]))
188185
if manifest_profile != profile:
189-
if manifest_profile.get("name") == profile.get(
190-
"name"
191-
) and manifest_profile.get("calibration_method") == profile.get(
186+
if manifest_profile.get("name") == profile.get("name") and manifest_profile.get(
192187
"calibration_method"
193-
):
188+
) == profile.get("calibration_method"):
194189
manifest["profile"] = profile
195190
else:
196191
raise ValueError(
@@ -230,17 +225,13 @@ def update_dataset_manifest(
230225
"method_used": calibration_audit.get("method_used"),
231226
"fell_back_to_ipf": calibration_audit.get("fell_back_to_ipf"),
232227
"age_max_pct_error": calibration_audit.get("age_max_pct_error"),
233-
"max_constraint_pct_error": calibration_audit.get(
234-
"max_constraint_pct_error"
235-
),
228+
"max_constraint_pct_error": calibration_audit.get("max_constraint_pct_error"),
236229
"negative_weight_pct": calibration_audit.get("negative_weight_pct"),
237230
"negative_weight_household_pct": calibration_audit.get(
238231
"negative_weight_household_pct"
239232
),
240233
"validation_passed": calibration_audit.get("validation_passed"),
241-
"validation_issue_count": len(
242-
calibration_audit.get("validation_issues", [])
243-
),
234+
"validation_issue_count": len(calibration_audit.get("validation_issues", [])),
244235
}
245236

246237
year_set = {int(value) for value in manifest.get("years", [])}

policyengine_us_data/datasets/cps/long_term/calibration_profiles.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,9 @@ def get_profile(name: str) -> CalibrationProfile:
203203
return NAMED_PROFILES[name]
204204
except KeyError as error:
205205
valid = ", ".join(sorted(NAMED_PROFILES))
206-
raise ValueError(f"Unknown calibration profile '{name}'. Valid profiles: {valid}") from error
206+
raise ValueError(
207+
f"Unknown calibration profile '{name}'. Valid profiles: {valid}"
208+
) from error
207209

208210

209211
def approximate_window_for_year(
@@ -459,8 +461,7 @@ def _collect_threshold_issues(
459461
and ess < min_effective_sample_size
460462
):
461463
issues.append(
462-
f"Effective sample size {ess:.3f} is below "
463-
f"{min_effective_sample_size:.3f}"
464+
f"Effective sample size {ess:.3f} is below {min_effective_sample_size:.3f}"
464465
)
465466

466467
top_10_share = audit.get("top_10_weight_share_pct")

policyengine_us_data/datasets/cps/long_term/compare_tob_shares.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,7 @@ def parse_args() -> argparse.Namespace:
8383
parser.add_argument(
8484
"paths",
8585
nargs="+",
86-
help=(
87-
"Metadata files or directories containing *.metadata.json sidecars."
88-
),
86+
help=("Metadata files or directories containing *.metadata.json sidecars."),
8987
)
9088
parser.add_argument(
9189
"--format",

policyengine_us_data/datasets/cps/long_term/diagnose_support_augmentation_translation.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,9 @@ def main() -> int:
3636
"report_file named in metadata, or <output_dir>/support_augmentation_report.json."
3737
),
3838
)
39-
parser.add_argument("--year", type=int, required=True, help="Output year to inspect.")
39+
parser.add_argument(
40+
"--year", type=int, required=True, help="Output year to inspect."
41+
)
4042
parser.add_argument(
4143
"--age-bucket-size",
4244
type=int,

policyengine_us_data/datasets/cps/long_term/evaluate_support_augmentation.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,7 @@ def _evaluate_dataset(
4949

5050
approximate_window = approximate_window_for_year(profile, year)
5151
age_bucket_size = (
52-
approximate_window.age_bucket_size
53-
if approximate_window is not None
54-
else None
52+
approximate_window.age_bucket_size if approximate_window is not None else None
5553
)
5654
if age_bucket_size and age_bucket_size > 1:
5755
age_bins = build_age_bins(n_ages=n_ages, bucket_size=age_bucket_size)
@@ -68,7 +66,9 @@ def _evaluate_dataset(
6866
ss_values = None
6967
ss_target = None
7068
if profile.use_ss:
71-
ss_values = sim.calculate("social_security", period=year, map_to="household").values
69+
ss_values = sim.calculate(
70+
"social_security", period=year, map_to="household"
71+
).values
7272
ss_target = load_ssa_benefit_projections(year)
7373

7474
payroll_values = None

0 commit comments

Comments
 (0)