Skip to content

Commit 89bbf73

Browse files
committed
simplify
1 parent 7eabd9b commit 89bbf73

1 file changed

Lines changed: 46 additions & 94 deletions

File tree

dte_adj/__init__.py

Lines changed: 46 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def predict_dte(
5050
control_treatment_arm (int): The index of the treatment arm of the control group.
5151
locations (np.ndarray): Scalar values to be used for computing the cumulative distribution.
5252
alpha (float, optional): Significance level of the confidence bound. Defaults to 0.05.
53-
variance_type (str, optional): Variance type to be used to compute confidence intervals.
53+
variance_type (str, optional): Variance type to be used to compute confidence intervals.
5454
Available values are "moment", "simple", and "uniform". Defaults to "moment".
5555
n_bootstrap (int, optional): Number of bootstrap samples. Defaults to 500.
5656
@@ -65,16 +65,16 @@ def predict_dte(
6565
6666
import numpy as np
6767
from dte_adj import SimpleDistributionEstimator
68-
68+
6969
# Generate sample data
7070
X = np.random.randn(1000, 5)
7171
D = np.random.binomial(1, 0.5, 1000)
7272
Y = X[:, 0] + 2 * D + np.random.randn(1000)
73-
73+
7474
# Fit estimator
7575
estimator = SimpleDistributionEstimator()
7676
estimator.fit(X, D, Y)
77-
77+
7878
# Compute DTE
7979
locations = np.linspace(Y.min(), Y.max(), 20)
8080
dte, lower, upper = estimator.predict_dte(
@@ -83,7 +83,7 @@ def predict_dte(
8383
locations=locations,
8484
variance_type="moment"
8585
)
86-
86+
8787
print(f"DTE shape: {dte.shape}") # Should match locations.shape
8888
print(f"Average DTE: {dte.mean():.3f}")
8989
"""
@@ -118,13 +118,13 @@ def predict_pte(
118118
locations (np.ndarray): Scalar values defining interval boundaries for probability computation.
119119
For each interval (locations[i], locations[i+1]], the PTE is computed.
120120
alpha (float, optional): Significance level of the confidence bound. Defaults to 0.05.
121-
variance_type (str, optional): Variance type to be used to compute confidence intervals.
121+
variance_type (str, optional): Variance type to be used to compute confidence intervals.
122122
Available values are "moment", "simple", and "uniform". Defaults to "moment".
123123
n_bootstrap (int, optional): Number of bootstrap samples. Defaults to 500.
124124
125125
Returns:
126126
Tuple[np.ndarray, np.ndarray, np.ndarray]: A tuple containing:
127-
- Expected PTEs (np.ndarray): Treatment effect estimates for each interval,
127+
- Expected PTEs (np.ndarray): Treatment effect estimates for each interval,
128128
shape (len(locations)-1,)
129129
- Lower bounds (np.ndarray): Lower confidence interval bounds
130130
- Upper bounds (np.ndarray): Upper confidence interval bounds
@@ -134,27 +134,27 @@ def predict_pte(
134134
135135
import numpy as np
136136
from dte_adj import SimpleDistributionEstimator
137-
137+
138138
# Generate sample data
139139
X = np.random.randn(1000, 5)
140140
D = np.random.binomial(1, 0.5, 1000)
141141
Y = X[:, 0] + 2 * D + np.random.randn(1000)
142-
142+
143143
# Fit estimator
144144
estimator = SimpleDistributionEstimator()
145145
estimator.fit(X, D, Y)
146-
146+
147147
# Define interval boundaries
148148
locations = np.array([-2, -1, 0, 1, 2]) # Creates intervals: (-2,-1], (-1,0], (0,1], (1,2]
149-
149+
150150
# Compute PTE
151151
pte, lower, upper = estimator.predict_pte(
152152
target_treatment_arm=1,
153153
control_treatment_arm=0,
154154
locations=locations,
155155
variance_type="moment"
156156
)
157-
157+
158158
print(f"PTE shape: {pte.shape}") # Should be (4,) for 4 intervals
159159
print(f"Interval effects: {pte}")
160160
"""
@@ -809,27 +809,27 @@ class SimpleDistributionEstimator(SimpleStratifiedDistributionEstimator):
809809
"""
810810
A class for computing the empirical distribution function and distributional treatment effects
811811
using simple (unadjusted) estimation methods.
812-
812+
813813
This estimator computes Distribution Treatment Effects (DTE), Probability Treatment Effects (PTE),
814814
and Quantile Treatment Effects (QTE) without using machine learning models for adjustment.
815815
It provides a baseline approach suitable when treatment assignment is random or when
816816
covariate adjustment is not needed.
817-
817+
818818
Example:
819819
.. code-block:: python
820820
821821
import numpy as np
822822
from dte_adj import SimpleDistributionEstimator
823-
823+
824824
# Generate sample data
825825
X = np.random.randn(1000, 5)
826826
D = np.random.binomial(1, 0.5, 1000) # Random treatment
827827
Y = X[:, 0] + 2 * D + np.random.randn(1000)
828-
828+
829829
# Fit simple estimator
830830
estimator = SimpleDistributionEstimator()
831831
estimator.fit(X, D, Y)
832-
832+
833833
# Compute treatment effects
834834
locations = np.linspace(Y.min(), Y.max(), 20)
835835
dte, lower, upper = estimator.predict_dte(1, 0, locations)
@@ -875,30 +875,30 @@ def fit(
875875
class AdjustedDistributionEstimator(AdjustedStratifiedDistributionEstimator):
876876
"""
877877
A class for computing distribution treatment effects using machine learning adjustment.
878-
878+
879879
This estimator uses cross-fitting with ML models to adjust for confounding when computing
880880
Distribution Treatment Effects (DTE), Probability Treatment Effects (PTE), and
881881
Quantile Treatment Effects (QTE). It provides more precise estimates when treatment
882882
assignment depends on observed covariates.
883-
883+
884884
Example:
885885
.. code-block:: python
886886
887887
import numpy as np
888888
from sklearn.ensemble import RandomForestClassifier
889889
from dte_adj import AdjustedDistributionEstimator
890-
890+
891891
# Generate confounded data
892892
X = np.random.randn(1000, 5)
893893
treatment_prob = 1 / (1 + np.exp(-(X[:, 0] + X[:, 1])))
894894
D = np.random.binomial(1, treatment_prob, 1000)
895895
Y = X.sum(axis=1) + 2 * D + np.random.randn(1000)
896-
896+
897897
# Fit adjusted estimator
898898
base_model = RandomForestClassifier(n_estimators=100)
899899
estimator = AdjustedDistributionEstimator(base_model, folds=3)
900900
estimator.fit(X, D, Y)
901-
901+
902902
# Compute adjusted treatment effects
903903
locations = np.linspace(Y.min(), Y.max(), 20)
904904
dte, lower, upper = estimator.predict_dte(1, 0, locations, variance_type="moment")
@@ -934,34 +934,12 @@ def fit(
934934

935935
class SimpleLocalDistributionEstimator(SimpleStratifiedDistributionEstimator):
936936
"""
937-
A class for computing Local Distribution Treatment Effects (LDTE) and Local Probability
937+
A class for computing Local Distribution Treatment Effects (LDTE) and Local Probability
938938
Treatment Effects (LPTE) using simple empirical estimation.
939-
939+
940940
This estimator computes treatment effects that are weighted by treatment propensity
941941
within each stratum, providing estimates that are locally robust to treatment assignment
942942
heterogeneity across strata. It uses empirical methods without ML adjustment.
943-
944-
Example:
945-
.. code-block:: python
946-
947-
import numpy as np
948-
from dte_adj import SimpleLocalDistributionEstimator
949-
950-
# Generate stratified data
951-
X = np.random.randn(1000, 5)
952-
strata = np.random.choice([0, 1, 2], size=1000)
953-
# Treatment probability varies by stratum
954-
D = np.random.binomial(1, 0.2 + 0.3 * (strata == 1) + 0.4 * (strata == 2), 1000)
955-
Y = X[:, 0] + 2 * D + 0.5 * strata + np.random.randn(1000)
956-
957-
# Fit local estimator
958-
estimator = SimpleLocalDistributionEstimator()
959-
estimator.fit(X, D, D, Y, strata) # treatment_arms = treatment_indicator for binary
960-
961-
# Compute local treatment effects
962-
locations = np.linspace(Y.min(), Y.max(), 15)
963-
ldte, lower, upper = estimator.predict_ldte(1, 0, locations)
964-
lpte, lpte_lower, lpte_upper = estimator.predict_lpte(1, 0, locations)
965943
"""
966944

967945
def __init__(self):
@@ -1031,27 +1009,27 @@ def predict_ldte(
10311009
import numpy as np
10321010
from sklearn.linear_model import LogisticRegression
10331011
from dte_adj import AdjustedLocalDistributionEstimator
1034-
1012+
10351013
# Generate sample data with strata
10361014
np.random.seed(42)
10371015
X = np.random.randn(1000, 5)
10381016
strata = np.random.choice([0, 1], size=1000) # Binary strata
10391017
D = np.random.binomial(1, 0.3 + 0.4 * strata, 1000) # Treatment depends on strata
10401018
Y = X[:, 0] + 2 * D + strata + np.random.randn(1000)
1041-
1019+
10421020
# Fit local estimator
10431021
base_model = LogisticRegression()
10441022
estimator = AdjustedLocalDistributionEstimator(base_model)
10451023
estimator.fit(X, D, D, Y, strata) # treatment_arms = treatment_indicator for binary case
1046-
1024+
10471025
# Compute LDTE
10481026
locations = np.linspace(Y.min(), Y.max(), 20)
10491027
ldte, lower, upper = estimator.predict_ldte(
10501028
target_treatment_arm=1,
10511029
control_treatment_arm=0,
10521030
locations=locations
10531031
)
1054-
1032+
10551033
print(f"LDTE shape: {ldte.shape}") # Should match locations.shape
10561034
print(f"Average LDTE: {ldte.mean():.3f}")
10571035
"""
@@ -1093,28 +1071,28 @@ def predict_lpte(
10931071
import numpy as np
10941072
from sklearn.linear_model import LogisticRegression
10951073
from dte_adj import SimpleLocalDistributionEstimator
1096-
1074+
10971075
# Generate sample data with strata
10981076
np.random.seed(42)
10991077
X = np.random.randn(1000, 5)
11001078
strata = np.random.choice([0, 1, 2], size=1000) # Multiple strata
11011079
D = np.random.binomial(1, 0.2 + 0.3 * (strata == 1) + 0.4 * (strata == 2), 1000)
11021080
Y = X[:, 0] + 1.5 * D + 0.5 * strata + np.random.randn(1000)
1103-
1081+
11041082
# Fit simple local estimator
11051083
estimator = SimpleLocalDistributionEstimator()
11061084
estimator.fit(X, D, D, Y, strata)
1107-
1108-
# Define interval boundaries
1085+
1086+
# Define interval boundaries
11091087
locations = np.array([-2, -1, 0, 1, 2]) # Creates 4 intervals
1110-
1088+
11111089
# Compute LPTE
11121090
lpte, lower, upper = estimator.predict_lpte(
11131091
target_treatment_arm=1,
11141092
control_treatment_arm=0,
11151093
locations=locations
11161094
)
1117-
1095+
11181096
print(f"LPTE shape: {lpte.shape}") # Should be (4,) for 4 intervals
11191097
print(f"Interval effects: {lpte}")
11201098
"""
@@ -1125,39 +1103,13 @@ def predict_lpte(
11251103

11261104
class AdjustedLocalDistributionEstimator(AdjustedStratifiedDistributionEstimator):
11271105
"""
1128-
A class for computing Local Distribution Treatment Effects (LDTE) and Local Probability
1106+
A class for computing Local Distribution Treatment Effects (LDTE) and Local Probability
11291107
Treatment Effects (LPTE) using ML-adjusted estimation.
1130-
1108+
11311109
This estimator combines local treatment effect estimation with machine learning adjustment,
11321110
providing treatment effects that are both locally robust to treatment assignment heterogeneity
1133-
and adjusted for confounding through observed covariates. It uses cross-fitting for
1111+
and adjusted for confounding through observed covariates. It uses cross-fitting for
11341112
more precise estimates in complex treatment assignment scenarios.
1135-
1136-
Example:
1137-
.. code-block:: python
1138-
1139-
import numpy as np
1140-
from sklearn.ensemble import GradientBoostingClassifier
1141-
from dte_adj import AdjustedLocalDistributionEstimator
1142-
1143-
# Generate complex stratified and confounded data
1144-
X = np.random.randn(1000, 5)
1145-
strata = np.random.choice([0, 1], size=1000)
1146-
# Treatment depends on both covariates and strata
1147-
logit_score = X[:, 0] + 0.5 * X[:, 1] + 2 * strata
1148-
treatment_prob = 1 / (1 + np.exp(-logit_score))
1149-
D = np.random.binomial(1, treatment_prob, 1000)
1150-
Y = X.sum(axis=1) + 2 * D + strata + np.random.randn(1000)
1151-
1152-
# Fit adjusted local estimator
1153-
base_model = GradientBoostingClassifier(n_estimators=100)
1154-
estimator = AdjustedLocalDistributionEstimator(base_model, folds=5)
1155-
estimator.fit(X, D, D, Y, strata)
1156-
1157-
# Compute ML-adjusted local treatment effects
1158-
locations = np.linspace(Y.min(), Y.max(), 15)
1159-
ldte, lower, upper = estimator.predict_ldte(1, 0, locations)
1160-
lpte, lpte_lower, lpte_upper = estimator.predict_lpte(1, 0, locations)
11611113
"""
11621114

11631115
def __init__(self, base_model: Any, folds=3, is_multi_task=False):
@@ -1222,7 +1174,7 @@ def predict_ldte(
12221174
Returns:
12231175
Tuple[np.ndarray, np.ndarray, np.ndarray]: A tuple containing:
12241176
- Expected LDTEs (np.ndarray): ML-adjusted local treatment effect estimates
1225-
- Lower bounds (np.ndarray): Lower confidence interval bounds
1177+
- Lower bounds (np.ndarray): Lower confidence interval bounds
12261178
- Upper bounds (np.ndarray): Upper confidence interval bounds
12271179
12281180
Example:
@@ -1231,7 +1183,7 @@ def predict_ldte(
12311183
import numpy as np
12321184
from sklearn.ensemble import RandomForestClassifier
12331185
from dte_adj import AdjustedLocalDistributionEstimator
1234-
1186+
12351187
# Generate sample data with complex treatment assignment
12361188
np.random.seed(42)
12371189
X = np.random.randn(1000, 5)
@@ -1240,20 +1192,20 @@ def predict_ldte(
12401192
treatment_prob = 0.2 + 0.3 * (X[:, 0] > 0) + 0.2 * strata
12411193
D = np.random.binomial(1, treatment_prob, 1000)
12421194
Y = X.sum(axis=1) + 2 * D + strata + np.random.randn(1000)
1243-
1195+
12441196
# Fit adjusted local estimator
12451197
base_model = RandomForestClassifier(n_estimators=50, random_state=42)
12461198
estimator = AdjustedLocalDistributionEstimator(base_model, folds=3)
12471199
estimator.fit(X, D, D, Y, strata)
1248-
1200+
12491201
# Compute LDTE
12501202
locations = np.linspace(Y.min(), Y.max(), 15)
12511203
ldte, lower, upper = estimator.predict_ldte(
12521204
target_treatment_arm=1,
12531205
control_treatment_arm=0,
12541206
locations=locations
12551207
)
1256-
1208+
12571209
print(f"ML-adjusted LDTE shape: {ldte.shape}")
12581210
print(f"Average LDTE: {ldte.mean():.3f}")
12591211
"""
@@ -1294,7 +1246,7 @@ def predict_lpte(
12941246
import numpy as np
12951247
from sklearn.ensemble import GradientBoostingClassifier
12961248
from dte_adj import AdjustedLocalDistributionEstimator
1297-
1249+
12981250
# Generate sample data with confounding
12991251
np.random.seed(42)
13001252
X = np.random.randn(1000, 5)
@@ -1304,20 +1256,20 @@ def predict_lpte(
13041256
treatment_prob = 1 / (1 + np.exp(-logit_score))
13051257
D = np.random.binomial(1, treatment_prob, 1000)
13061258
Y = X.sum(axis=1) + 1.5 * D + 0.3 * strata + np.random.randn(1000)
1307-
1259+
13081260
# Fit adjusted estimator with gradient boosting
13091261
base_model = GradientBoostingClassifier(n_estimators=100, random_state=42)
13101262
estimator = AdjustedLocalDistributionEstimator(base_model, folds=5)
13111263
estimator.fit(X, D, D, Y, strata)
1312-
1264+
13131265
# Define intervals and compute LPTE
13141266
locations = np.array([-3, -1, 0, 1, 3]) # 4 intervals
13151267
lpte, lower, upper = estimator.predict_lpte(
13161268
target_treatment_arm=1,
13171269
control_treatment_arm=0,
13181270
locations=locations
13191271
)
1320-
1272+
13211273
print(f"ML-adjusted LPTE shape: {lpte.shape}") # Should be (4,)
13221274
print(f"Interval effects: {lpte}")
13231275
"""

0 commit comments

Comments
 (0)