Skip to content

Commit 8e3ca8e

Browse files
committed
fix explanation of ml adjustment and fix stratum comparison
1 parent 53c6eab commit 8e3ca8e

2 files changed

Lines changed: 49 additions & 96 deletions

File tree

7.21 KB
Loading

docs/source/tutorials/oregon.rst

Lines changed: 49 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,8 @@ The Oregon experiment allows us to examine how treatment effects vary across dif
404404

405405
.. code-block:: python
406406
407+
from sklearn.ensemble import RandomForestRegressor
408+
407409
# Individual Stratum Analysis with Local Estimators
408410
print("\n=== Individual Stratum Analysis (Local Estimators) ===")
409411
@@ -428,15 +430,15 @@ The Oregon experiment allows us to examine how treatment effects vary across dif
428430
# Initialize estimators
429431
simple_estimator = dte_adj.SimpleLocalDistributionEstimator()
430432
ml_estimator = dte_adj.AdjustedLocalDistributionEstimator(
431-
LinearRegression(),
433+
RandomForestRegressor(n_estimators=10, random_state=42),
432434
folds=folds
433435
)
434436
435437
# Fit estimators
436438
simple_estimator.fit(stratum_data['X'], stratum_data['Z'],
437-
stratum_data['D'], stratum_data['Y'], stratum_data['strata'])
439+
stratum_data['D'], stratum_data['Y'], stratum_data['strata'])
438440
ml_estimator.fit(stratum_data['X'], stratum_data['Z'],
439-
stratum_data['D'], stratum_data['Y'], stratum_data['strata'])
441+
stratum_data['D'], stratum_data['Y'], stratum_data['strata'])
440442
441443
# Define evaluation locations based on stratum's data range
442444
locations = np.arange(stratum_data['Y'].min(), stratum_data['Y'].max(), location_step)
@@ -479,7 +481,7 @@ The Oregon experiment allows us to examine how treatment effects vary across dif
479481
480482
# Estimate LDTE for this stratum
481483
individual_results[stratum] = estimate_stratum_ldte(
482-
stratum_data, location_step=3000, folds=3
484+
stratum_data, location_step=2000, folds=3
483485
)
484486
485487
Visualization: Comparing Overall Population vs Stratified Results
@@ -488,101 +490,53 @@ Visualization: Comparing Overall Population vs Stratified Results
488490
.. code-block:: python
489491
490492
# Comparison: Overall vs Individual Strata (Local Estimators)
491-
fig, axes = plt.subplots(2, 2, figsize=(24, 12))
492-
493-
# Calculate global y-axis limits across all plots (to align y-axis)
494-
all_ydatas = []
495-
all_yerr_lowers = []
496-
all_yerr_uppers = []
497-
498-
# Collect all y values (means and error bounds) for ALL subplots
499-
# Overall population: Simple and ML-adjusted
500-
all_ydatas.append(ldte_simple)
501-
all_yerr_lowers.append(lower_simple)
502-
all_yerr_uppers.append(upper_simple)
503-
all_ydatas.append(ldte_ml)
504-
all_yerr_lowers.append(lower_ml)
505-
all_yerr_uppers.append(upper_ml)
506-
507-
# Each stratum: Simple and ML-adjusted
508-
for stratum, results in individual_results.items():
509-
if stratum == 'signed self up + others':
510-
continue
511-
if results is None:
512-
continue
513-
all_ydatas.append(results['simple']['ldte'])
514-
all_yerr_lowers.append(results['simple']['lower'])
515-
all_yerr_uppers.append(results['simple']['upper'])
516-
all_ydatas.append(results['ml']['ldte'])
517-
all_yerr_lowers.append(results['ml']['lower'])
518-
all_yerr_uppers.append(results['ml']['upper'])
519-
520-
# Determine min/max y for unified y-axis
521-
y_min = np.min([np.min(dat) for dat in all_yerr_lowers if dat is not None])
522-
y_max = np.max([np.max(dat) for dat in all_yerr_uppers if dat is not None])
493+
fig, axes = plt.subplots(2, 3, figsize=(24, 12))
523494
524495
# Row 1: Simple local estimators
525496
# Overall (all data)
526-
plot(
527-
outcome_ed_costs_locations, ldte_simple, lower_simple, upper_simple,
528-
title="ED Costs: Overall Population\n(Simple Local Estimator)",
529-
xlabel="Emergency Department Costs",
530-
ylabel="Local Distribution Treatment Effect",
531-
color="black", ax=axes[0, 0]
532-
)
533-
axes[0, 0].set_ylim(y_min, y_max)
497+
plot(outcome_ed_costs_locations, ldte_simple, lower_simple, upper_simple,
498+
title="ED Costs: Overall Population\n(Simple Local Estimator)",
499+
xlabel="Emergency Department Costs",
500+
ylabel="Local Distribution Treatment Effect",
501+
color="black", ax=axes[0, 0])
534502
535503
# Individual strata
536504
col_idx = 1
537505
for stratum, results in individual_results.items():
538-
if stratum == 'signed self up + others':
539-
continue
540506
if results is None or col_idx > 2:
541507
continue
542-
plot(
543-
results['locations'], results['simple']['ldte'],
544-
results['simple']['lower'], results['simple']['upper'],
545-
title=f"ED Costs: {stratum}\n(Simple Local Estimator, n={results['sample_size']:,})",
546-
xlabel="Emergency Department Costs",
547-
ylabel="Local Distribution Treatment Effect",
548-
color="blue" if col_idx == 1 else "green", ax=axes[0, col_idx]
549-
)
550-
axes[0, col_idx].set_ylim(y_min, y_max)
508+
509+
plot(results['locations'], results['simple']['ldte'],
510+
results['simple']['lower'], results['simple']['upper'],
511+
title=f"ED Costs: {stratum}\n(Simple Local Estimator, n={results['sample_size']:,})",
512+
xlabel="Emergency Department Costs",
513+
ylabel="Local Distribution Treatment Effect",
514+
color="blue" if col_idx == 1 else "green", ax=axes[0, col_idx])
551515
col_idx += 1
552516
553517
# Row 2: ML-Adjusted local estimators
554518
# Overall (all data)
555-
plot(
556-
outcome_ed_costs_locations, ldte_ml, lower_ml, upper_ml,
557-
title="ED Costs: Overall Population\n(ML-Adjusted Local Estimator)",
558-
xlabel="Emergency Department Costs",
559-
ylabel="Local Distribution Treatment Effect",
560-
color="black", ax=axes[1, 0]
561-
)
562-
axes[1, 0].set_ylim(y_min, y_max)
519+
plot(outcome_ed_costs_locations, ldte_ml, lower_ml, upper_ml,
520+
title="ED Costs: Overall Population\n(ML-Adjusted Local Estimator)",
521+
xlabel="Emergency Department Costs",
522+
ylabel="Local Distribution Treatment Effect",
523+
color="black", ax=axes[1, 0])
563524
564525
# Individual strata
565526
col_idx = 1
566527
for stratum, results in individual_results.items():
567-
if stratum == 'signed self up + others':
568-
continue
569528
if results is None or col_idx > 2:
570529
continue
571-
plot(
572-
results['locations'], results['ml']['ldte'],
573-
results['ml']['lower'], results['ml']['upper'],
574-
title=f"ED Costs: {stratum}\n(ML-Adjusted Local Estimator, n={results['sample_size']:,})",
575-
xlabel="Emergency Department Costs",
576-
ylabel="Local Distribution Treatment Effect",
577-
color="blue" if col_idx == 1 else "green", ax=axes[1, col_idx]
578-
)
579-
axes[1, col_idx].set_ylim(y_min, y_max)
530+
531+
plot(results['locations'], results['ml']['ldte'],
532+
results['ml']['lower'], results['ml']['upper'],
533+
title=f"ED Costs: {stratum}\n(ML-Adjusted Local Estimator, n={results['sample_size']:,})",
534+
xlabel="Emergency Department Costs",
535+
ylabel="Local Distribution Treatment Effect",
536+
color="red" if col_idx == 1 else "purple", ax=axes[1, col_idx])
580537
col_idx += 1
581538
582-
plt.suptitle(
583-
"Comparison: Overall Population vs Individual Household Registration Strata (Local Estimators)",
584-
fontsize=16
585-
)
539+
plt.suptitle("Comparison: Overall Population vs Individual Household Registration Strata (Local Estimators)", fontsize=16)
586540
plt.tight_layout()
587541
plt.show()
588542
@@ -608,37 +562,36 @@ Visualization: Comparing Overall Population vs Stratified Results
608562
- **Signed Self Up + Others (Right panels, n=4,068)**:
609563

610564
- Simple: LDTE ≈ -0.55 at zero costs, converging to zero around $15,000-$20,000
611-
- ML-Adjusted: Shows extreme values (≈ -0.30 to +20 near zero costs with very wide confidence intervals)
612-
- Much larger magnitude effects, indicating households with multiple members show substantially stronger treatment effects
565+
- ML-Adjusted: LDTE ≈ -0.10 to -0.15 at zero costs, stable pattern with improved confidence intervals
566+
- Much larger magnitude effects in the Simple estimator, indicating households with multiple members show substantially stronger treatment effects
567+
- ML adjustment provides more conservative estimates, potentially controlling for confounding household characteristics
613568

614569
**2. Heterogeneity Across Strata**
615570

616571
The stratified analysis reveals substantial treatment effect heterogeneity:
617572

618573
- **"Signed self up" stratum**: Moderate effects (LDTE ≈ -0.18 to -0.20), suggesting single-person households have more modest increases in ED utilization
619-
- **"Signed self up + others" stratum**: Large effects (LDTE ≈ -0.55 for Simple), suggesting multi-person households experience much greater increases in ED access
620-
- The 3-4x larger effect in the "signed self up + others" group indicates that household composition is a critical moderator of insurance impact
574+
- **"Signed self up + others" stratum**: Large effects in Simple estimator (LDTE ≈ -0.55), suggesting multi-person households experience much greater increases in ED access when not adjusting for covariates
575+
- The 3-4x larger effect in the "signed self up + others" group (Simple estimator) indicates that household composition is a critical moderator of insurance impact
576+
- However, ML adjustment substantially reduces this estimate, suggesting that some of the observed effect may be attributable to observable household characteristics rather than pure treatment effects
621577

622-
**3. Estimation Challenges and Confidence Intervals**
578+
**3. Comparison of Estimation Methods**
623579

624-
- **Overall population**: Both estimators show reasonable confidence intervals, with ML adjustment providing modest improvements in the mid-range.
625-
- **"Signed self up" stratum**: Confidence intervals remain wide but manageable for both estimators, showing similar patterns to the overall population.
580+
- **Overall population**: Both estimators show reasonable confidence intervals, with ML adjustment providing modest improvements in precision and slightly more conservative estimates.
581+
- **"Signed self up" stratum**: Both estimators yield similar point estimates and manageable confidence intervals, suggesting robustness to model specification in this larger subsample.
626582
- **"Signed self up + others" stratum**:
627583

628-
- Extreme estimation instability, particularly for ML-adjusted estimator
629-
- Very wide confidence intervals and implausible point estimates (values reaching +20) suggest:
630-
631-
- Small sample size (n=4,068) insufficient for stable ML estimation
632-
- Extreme outliers or sparse data in certain cost regions
633-
- Overfitting or poor model specification in the ML adjustment
634-
635-
- The Simple estimator appears more stable for this smaller stratum
584+
- The Simple estimator shows the largest treatment effects across all strata (LDTE ≈ -0.55)
585+
- ML adjustment substantially reduces the estimated effect and stabilizes confidence intervals
586+
- This divergence suggests that observable covariates (e.g., household size, age composition, baseline health status) explain a significant portion of the treatment effect heterogeneity
587+
- The improved stability of ML-adjusted estimates indicates successful control for confounding factors that may have been correlated with both treatment assignment and outcomes
636588

637589
**4. Practical Implications**
638590

639-
- **Household structure matters**: Multi-person households show 3-4x larger treatment effects, likely because insurance coverage enables care-seeking for multiple family members
640-
- **Stratification reveals hidden heterogeneity**: The overall population estimate masks substantial variation across household types
641-
- **Sample size considerations**: ML adjustment may be counterproductive in smaller strata where model complexity exceeds data informativeness
591+
- **Household structure matters**: Multi-person households show substantially larger treatment effects in unadjusted analyses, likely because insurance coverage enables care-seeking for multiple family members.
592+
- **The role of covariates**: The difference between Simple and ML-adjusted estimates in the "signed self up + others" stratum highlights the importance of controlling for household characteristics. The unadjusted effect may overstate the pure treatment effect by conflating insurance provision with pre-existing household differences.
593+
- **Stratification reveals hidden heterogeneity**: The overall population estimate masks substantial variation across household types, demonstrating the value of subgroup analysis.
594+
- **Model specification considerations**: ML adjustment improves estimation stability in smaller strata and provides more defensible causal estimates by controlling for observable confounders. The convergence of all estimates to zero at higher cost levels confirms that the treatment primarily affects the lower tail of the cost distribution.
642595

643596
Conclusion
644597
~~~~~~~~~~

0 commit comments

Comments
 (0)