|
| 1 | +Get Started |
| 2 | +======================= |
| 3 | + |
| 4 | +This page contains basic usage of dte_adj library. |
| 5 | + |
| 6 | +Generate data and train cumulative distribution function. |
| 7 | + |
| 8 | +.. code-block:: python |
| 9 | +
|
| 10 | + import numpy as np |
| 11 | +
|
| 12 | + def generate_data(n, d_x=100, rho=0.5): |
| 13 | + """ |
| 14 | + Generate data according to the described data generating process (DGP). |
| 15 | +
|
| 16 | + Args: |
| 17 | + n (int): Number of samples. |
| 18 | + d_x (int): Number of covariates. Default is 100. |
| 19 | + rho (float): Success probability for the Bernoulli distribution. Default is 0.5. |
| 20 | +
|
| 21 | + Returns: |
| 22 | + X (np.ndarray): Covariates matrix of shape (n, d_x). |
| 23 | + D (np.ndarray): Treatment variable array of shape (n,). |
| 24 | + Y (np.ndarray): Outcome variable array of shape (n,). |
| 25 | + """ |
| 26 | + # Generate covariates X from a uniform distribution on (0, 1) |
| 27 | + X = np.random.uniform(0, 1, (n, d_x)) |
| 28 | + |
| 29 | + # Generate treatment variable D from a Bernoulli distribution with success probability rho |
| 30 | + D = np.random.binomial(1, rho, n) |
| 31 | + |
| 32 | + # Define beta_j and gamma_j according to the problem statement |
| 33 | + beta = np.zeros(d_x) |
| 34 | + gamma = np.zeros(d_x) |
| 35 | + |
| 36 | + # Set the first 50 values of beta and gamma to 1 |
| 37 | + beta[:50] = 1 |
| 38 | + gamma[:50] = 1 |
| 39 | + |
| 40 | + # Compute the outcome Y |
| 41 | + U = np.random.normal(0, 1, n) # Error term |
| 42 | + linear_term = np.dot(X, beta) |
| 43 | + quadratic_term = np.dot(X**2, gamma) |
| 44 | + |
| 45 | + # Outcome equation |
| 46 | + Y = D + linear_term + quadratic_term + U |
| 47 | + |
| 48 | + return X, D, Y |
| 49 | +
|
| 50 | + n = 100 # Sample size |
| 51 | + X, D, Y = generate_data(n) |
| 52 | +
|
| 53 | +Then, let's build an empirical cumulative distribution function (CDF). |
| 54 | + |
| 55 | +.. code-block:: python |
| 56 | +
|
| 57 | + import dte_adj |
| 58 | + estimator = dte_adj.SimpleDistributionEstimator() |
| 59 | + estimator.fit(X, D, Y) |
| 60 | + cdf = estimator.predict(D, Y) |
| 61 | +
|
| 62 | +Distributional treatment effect (DTE) can be computed easily in the following code. |
| 63 | + |
| 64 | +.. code-block:: python |
| 65 | +
|
| 66 | + dte, lower_bound, upper_bound = estimator.predict_dte(target_treatment_arm=1, control_treatment_arm=0, locations=np.sort(Y), variance_type="simple") |
| 67 | +
|
| 68 | +An convenience function is available to visualize distribution effects. This method can be used for other distribution parameters including Probability Treatment Effect (PTE) and Quantile Treatment Effect (QTE). |
| 69 | + |
| 70 | +.. code-block:: python |
| 71 | +
|
| 72 | + plot(np.sort(Y), dte, lower_bound, upper_bound, title="DTE of simple estimator") |
| 73 | +
|
| 74 | +.. image:: _static/dte_empirical.png |
| 75 | + :alt: DTE of empirical estimator |
| 76 | + :height: 300px |
| 77 | + :width: 450px |
| 78 | + :align: center |
| 79 | + |
| 80 | +To initialize the adjusted distribution function, the base model for conditional distribution function needs to be passed. |
| 81 | +In the following example, we use Logistic Regression. |
| 82 | + |
| 83 | +.. code-block:: python |
| 84 | +
|
| 85 | + from sklearn.linear_model import LogisticRegression |
| 86 | + logit = LogisticRegression() |
| 87 | + estimator = dte_adj.AdjustedDistributionEstimator(logit, folds=3) |
| 88 | + estimator.fit(X, D, Y) |
| 89 | + cdf = estimator.predict(D, Y) |
| 90 | +
|
| 91 | +DTE can be computed and visualized in the following code. |
| 92 | + |
| 93 | +.. code-block:: python |
| 94 | +
|
| 95 | + dte, lower_bound, upper_bound = estimator.predict_dte(target_treatment_arm=1, control_treatment_arm=0, locations=np.sort(Y), variance_type="simple") |
| 96 | + plot(np.sort(Y), dte, lower_bound, upper_bound, title="DTE of adjusted estimator with simple confidence band") |
| 97 | +
|
| 98 | +.. image:: _static/dte_simple.png |
| 99 | + :alt: DTE of adjusted estimator with simple confidence band |
| 100 | + :height: 300px |
| 101 | + :width: 450px |
| 102 | + :align: center |
| 103 | + |
| 104 | +Confidence bands can be computed in different ways. In the following code, we used moment method to calculate the confidence bands. |
| 105 | + |
| 106 | +.. code-block:: python |
| 107 | +
|
| 108 | + dte, lower_bound, upper_bound = estimator.predict_dte(target_treatment_arm=1, control_treatment_arm=0, locations=np.sort(Y), variance_type="moment") |
| 109 | + plot(np.sort(Y), dte, lower_bound, upper_bound, title="DTE of adjusted estimator with moment confidence band") |
| 110 | +
|
| 111 | +.. image:: _static/dte_moment.png |
| 112 | + :alt: DTE of adjusted estimator with moment confidence band |
| 113 | + :height: 300px |
| 114 | + :width: 450px |
| 115 | + :align: center |
| 116 | + |
| 117 | +Also, uniform confidence band is used when "uniform" is specified for the "variance_type" argument. |
| 118 | + |
| 119 | +.. code-block:: python |
| 120 | +
|
| 121 | + dte, lower_bound, upper_bound = estimator.predict_dte(target_treatment_arm=1, control_treatment_arm=0, locations=np.sort(Y), variance_type="uniform") |
| 122 | + plot(np.sort(Y), dte, lower_bound, upper_bound, title="DTE of adjusted estimator with uniform confidence band") |
| 123 | +
|
| 124 | +.. image:: _static/dte_uniform.png |
| 125 | + :alt: DTE of adjusted estimator with uniform confidence band |
| 126 | + :height: 300px |
| 127 | + :width: 450px |
| 128 | + :align: center |
| 129 | + |
| 130 | +To compute PTE, we can use "predict_pte" method. |
| 131 | + |
| 132 | +.. code-block:: python |
| 133 | +
|
| 134 | + locations = np.linspace(Y.min(), Y.max(), 20) |
| 135 | + pte, lower_bound, upper_bound = estimator.predict_pte(target_treatment_arm=1, control_treatment_arm=0, width=1, locations=locations, variance_type="simple") |
| 136 | + plot(locations, pte, lower_bound, upper_bound, chart_type="bar", title="PTE of adjusted estimator with simple confidence band") |
| 137 | +
|
| 138 | +.. image:: _static/pte_simple.png |
| 139 | + :alt: PTE of adjusted estimator with simple confidence band |
| 140 | + :height: 300px |
| 141 | + :width: 450px |
| 142 | + :align: center |
| 143 | + |
| 144 | +To compute QTE, we can use "predict_qte" method. The confidence band is computed by bootstrap method. |
| 145 | + |
| 146 | +.. code-block:: python |
| 147 | +
|
| 148 | + quantiles = np.array([0.1 * i for i in range(1, 10)], dtype=np.float32) |
| 149 | + qte, lower_bound, upper_bound = estimator.predict_qte(target_treatment_arm=1, control_treatment_arm=0, quantiles=quantiles, n_bootstrap=30) |
| 150 | + plot(quantiles, qte, lower_bound, upper_bound, title="QTE of adjusted estimator") |
| 151 | +
|
| 152 | +.. image:: _static/qte.png |
| 153 | + :alt: QTE of adjusted estimator |
| 154 | + :height: 300px |
| 155 | + :width: 450px |
| 156 | + :align: center |
0 commit comments