CyberAgentAILab
diff --git a/‎dte_adj/base.py‎
Lines changed: 26 additions & 2 deletions b/‎dte_adj/base.py‎
Lines changed: 26 additions & 2 deletions
diff --git a/‎dte_adj/local.py‎
Lines changed: 12 additions & 0 deletions b/‎dte_adj/local.py‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎dte_adj/stratified.py‎
Lines changed: 21 additions & 3 deletions b/‎dte_adj/stratified.py‎
Lines changed: 21 additions & 3 deletions
@@ -2,6 +2,7 @@
 from typing import Tuple, Optional
 from scipy.stats import norm
 from abc import ABC
+from tqdm.auto import tqdm
 import dte_adj
 
 
@@ -27,6 +28,7 @@ def predict_dte(
         alpha: float = 0.05,
         variance_type="moment",
         n_bootstrap=500,
+        display_progress: bool = True,
     ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
         """
         Compute Distribution Treatment Effects (DTE) based on the estimator for the distribution function.
@@ -43,6 +45,7 @@ def predict_dte(
             variance_type (str, optional): Variance type to be used to compute confidence intervals.
                 Available values are "moment", "simple", and "uniform". Defaults to "moment".
             n_bootstrap (int, optional): Number of bootstrap samples. Defaults to 500.
+            display_progress (bool, optional): Whether to display a progress bar. Defaults to True.
 
         Returns:
             Tuple[np.ndarray, np.ndarray, np.ndarray]: A tuple containing:
@@ -84,6 +87,7 @@ def predict_dte(
             alpha,
             variance_type,
             n_bootstrap,
+            display_progress,
         )
 
     def predict_pte(
@@ -94,6 +98,7 @@ def predict_pte(
         alpha: float = 0.05,
         variance_type="moment",
         n_bootstrap=500,
+        display_progress: bool = True,
     ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
         """
         Compute Probability Treatment Effects (PTE) based on the estimator for the distribution function.
@@ -111,6 +116,7 @@ def predict_pte(
             variance_type (str, optional): Variance type to be used to compute confidence intervals.
                 Available values are "moment", "simple", and "uniform". Defaults to "moment".
             n_bootstrap (int, optional): Number of bootstrap samples. Defaults to 500.
+            display_progress (bool, optional): Whether to display a progress bar. Defaults to True.
 
         Returns:
             Tuple[np.ndarray, np.ndarray, np.ndarray]: A tuple containing:
@@ -155,6 +161,7 @@ def predict_pte(
             alpha,
             variance_type,
             n_bootstrap,
+            display_progress,
         )
 
     def predict_qte(
@@ -164,6 +171,7 @@ def predict_qte(
         quantiles: Optional[np.ndarray] = None,
         alpha: float = 0.05,
         n_bootstrap=500,
+        display_progress: bool = True,
     ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
         """
         Compute Quantile Treatment Effects (QTE) based on the estimator for the distribution function.
@@ -178,6 +186,7 @@ def predict_qte(
             quantiles (np.ndarray, optional): Quantiles used for QTE. Defaults to [0.1, 0.2, ..., 0.9].
             alpha (float, optional): Significance level of the confidence bound. Defaults to 0.05.
             n_bootstrap (int, optional): Number of bootstrap samples. Defaults to 500.
+            display_progress (bool, optional): Whether to display a progress bar. Defaults to True.
 
         Returns:
             Tuple[np.ndarray, np.ndarray, np.ndarray]: A tuple containing:
@@ -226,7 +235,10 @@ def predict_qte(
         indexes = np.arange(n_obs)
 
         qtes = np.zeros((n_bootstrap, qte.shape[0]))
-        for b in range(n_bootstrap):
+        bootstrap_iter = range(n_bootstrap)
+        if display_progress:
+            bootstrap_iter = tqdm(bootstrap_iter, desc="Bootstrap QTE")
+        for b in bootstrap_iter:
             bootstrap_indexes = np.random.choice(indexes, size=n_obs, replace=True)
 
             qtes[b] = self._compute_qtes(
@@ -254,6 +266,7 @@ def _compute_dtes(
         alpha: float,
         variance_type: str,
         n_bootstrap: int,
+        display_progress: bool = False,
     ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
         """Compute expected DTEs."""
         treatment_cdf, treatment_cdf_mat, _ = self._compute_cumulative_distribution(
@@ -262,13 +275,15 @@ def _compute_dtes(
             self.covariates,
             self.treatment_arms,
             self.outcomes,
+            display_progress=display_progress,
         )
         control_cdf, control_cdf_mat, _ = self._compute_cumulative_distribution(
             control_treatment_arm,
             locations,
             self.covariates,
             self.treatment_arms,
             self.outcomes,
+            display_progress=display_progress,
         )
 
         dte = treatment_cdf - control_cdf
@@ -305,6 +320,7 @@ def _compute_ptes(
         alpha: float,
         variance_type: str,
         n_bootstrap: int,
+        display_progress: bool = False,
     ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
         """Compute expected PTEs."""
         treatment_pdf, treatment_pdf_mat, _ = self._compute_interval_probability(
@@ -313,13 +329,15 @@ def _compute_ptes(
             self.covariates,
             self.treatment_arms,
             self.outcomes,
+            display_progress=display_progress,
         )
         control_pdf, control_pdf_mat, _ = self._compute_interval_probability(
             control_treatment_arm,
             locations,
             self.covariates,
             self.treatment_arms,
             self.outcomes,
+            display_progress=display_progress,
         )
 
         pte = treatment_pdf - control_pdf
@@ -398,13 +416,16 @@ def find_quantile(quantile, arm):
 
         return result
 
-    def predict(self, treatment_arm: int, locations: np.ndarray) -> np.ndarray:
+    def predict(
+        self, treatment_arm: int, locations: np.ndarray, display_progress: bool = True
+    ) -> np.ndarray:
         """
         Compute cumulative distribution values.
 
         Args:
             treatment_arm (int): The index of the treatment arm.
             outcomes (np.ndarray): Scalar values to be used for computing the cumulative distribution.
+            display_progress (bool, optional): Whether to display a progress bar. Defaults to True.
 
         Returns:
             np.ndarray: Estimated cumulative distribution values for the input.
@@ -425,6 +446,7 @@ def predict(self, treatment_arm: int, locations: np.ndarray) -> np.ndarray:
             self.covariates,
             self.treatment_arms,
             self.outcomes,
+            display_progress=display_progress,
         )[0]
 
     def _compute_cumulative_distribution(
@@ -434,6 +456,7 @@ def _compute_cumulative_distribution(
         covariates: np.ndarray,
         treatment_arms: np.ndarray,
         outcomes: np.array,
+        display_progress: bool = False,
     ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
         """
         Compute the cumulative distribution values.
@@ -444,6 +467,7 @@ def _compute_cumulative_distribution(
             covariates: (np.ndarray): An array of covariates variables in the observed data.
             treatment_arms (np.ndarray): An array of treatment arms in the observed data.
             outcomes (np.ndarray): An array of outcomes in the observed data.
+            display_progress (bool): Whether to display a progress bar.
 
         Returns:
             Tuple[np.ndarray, np.ndarray, np.ndarray]: Estimated cumulative distribution values, prediction for each observation, and superset prediction for each observation.
 
@@ -61,6 +61,7 @@ def predict_ldte(
         control_treatment_arm: int,
         locations: np.ndarray,
         alpha: float = 0.05,
+        display_progress: bool = True,
     ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
         """
         Compute Local Distribution Treatment Effects (LDTE).
@@ -74,6 +75,7 @@ def predict_ldte(
             control_treatment_arm (int): The index of the treatment arm of the control group.
             locations (np.ndarray): Scalar values to be used for computing the cumulative distribution.
             alpha (float, optional): Significance level of the confidence bound. Defaults to 0.05.
+            display_progress (bool, optional): Whether to display a progress bar. Defaults to True.
 
         Returns:
             Tuple[np.ndarray, np.ndarray, np.ndarray]: A tuple containing:
@@ -117,6 +119,7 @@ def predict_ldte(
             control_treatment_arm,
             locations,
             alpha,
+            display_progress,
         )
 
     def predict_lpte(
@@ -125,6 +128,7 @@ def predict_lpte(
         control_treatment_arm: int,
         locations: np.ndarray,
         alpha: float = 0.05,
+        display_progress: bool = True,
     ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
         """
         Compute Local Probability Treatment Effects (LPTE).
@@ -139,6 +143,7 @@ def predict_lpte(
             locations (np.ndarray): Scalar values defining interval boundaries for probability computation.
                 For each interval (locations[i], locations[i+1]], the LPTE is computed.
             alpha (float, optional): Significance level of the confidence bound. Defaults to 0.05.
+            display_progress (bool, optional): Whether to display a progress bar. Defaults to True.
 
         Returns:
             Tuple[np.ndarray, np.ndarray, np.ndarray]: A tuple containing:
@@ -184,6 +189,7 @@ def predict_lpte(
             control_treatment_arm,
             locations,
             alpha,
+            display_progress,
         )
 
 
@@ -230,6 +236,7 @@ def predict_ldte(
         control_treatment_arm: int,
         locations: np.ndarray,
         alpha: float = 0.05,
+        display_progress: bool = True,
     ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
         """
         Compute Local Distribution Treatment Effects (LDTE) using ML adjustment.
@@ -242,6 +249,7 @@ def predict_ldte(
             control_treatment_arm (int): The index of the treatment arm of the control group.
             locations (np.ndarray): Scalar values to be used for computing the cumulative distribution.
             alpha (float, optional): Significance level of the confidence bound. Defaults to 0.05.
+            display_progress (bool, optional): Whether to display a progress bar. Defaults to True.
 
         Returns:
             Tuple[np.ndarray, np.ndarray, np.ndarray]: A tuple containing:
@@ -287,6 +295,7 @@ def predict_ldte(
             control_treatment_arm,
             locations,
             alpha,
+            display_progress,
         )
 
     def predict_lpte(
@@ -295,6 +304,7 @@ def predict_lpte(
         control_treatment_arm: int,
         locations: np.ndarray,
         alpha: float = 0.05,
+        display_progress: bool = True,
     ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
         """
         Compute Local Probability Treatment Effects (LPTE) using ML adjustment.
@@ -308,6 +318,7 @@ def predict_lpte(
             locations (np.ndarray): Scalar values defining interval boundaries for probability computation.
                 For each interval (locations[i], locations[i+1]], the LPTE is computed.
             alpha (float, optional): Significance level of the confidence bound. Defaults to 0.05.
+            display_progress (bool, optional): Whether to display a progress bar. Defaults to True.
 
         Returns:
             Tuple[np.ndarray, np.ndarray, np.ndarray]: A tuple containing:
@@ -356,4 +367,5 @@ def predict_lpte(
             control_treatment_arm,
             locations,
             alpha,
+            display_progress,
         )
@@ -3,6 +3,7 @@
 import numpy as np
 from typing import Tuple, Any
 from copy import deepcopy
+from tqdm.auto import tqdm
 from dte_adj.base import DistributionEstimatorBase
 from dte_adj.util import ArrayLike, _convert_to_ndarray
 
@@ -54,6 +55,7 @@ def _compute_cumulative_distribution(
         covariates: np.ndarray,
         treatment_arms: np.ndarray,
         outcomes: np.array,
+        display_progress: bool = False,
     ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
         """
         Compute the cumulative distribution values.
@@ -64,6 +66,7 @@ def _compute_cumulative_distribution(
             covariates: (np.ndarray): An array of covariates variables in the observed data.
             treatment_arm (np.ndarray): An array of treatment arms in the observed data.
             outcomes (np.ndarray): An array of outcomes in the observed data
+            display_progress (bool): Whether to display a progress bar.
 
         Returns:
             Tuple of numpy arrays:
@@ -102,6 +105,7 @@ def _compute_interval_probability(
         covariates: np.ndarray,
         treatment_arms: np.ndarray,
         outcomes: np.array,
+        display_progress: bool = False,
     ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
         """Compute the interval probabilities.
 
@@ -111,6 +115,7 @@ def _compute_interval_probability(
             covariates: (np.ndarray): An array of covariates variables in the observed data.
             treatment_arm (np.ndarray): An array of treatment arms in the observed data.
             outcomes (np.ndarray): An array of outcomes in the observed data
+            display_progress (bool): Whether to display a progress bar.
 
         Returns:
             Tuple of numpy arrays:
@@ -219,6 +224,7 @@ def _compute_cumulative_distribution(
         covariates: np.ndarray,
         treatment_arms: np.ndarray,
         outcomes: np.array,
+        display_progress: bool = False,
     ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
         """
         Compute the cumulative distribution values.
@@ -229,6 +235,7 @@ def _compute_cumulative_distribution(
             covariates: (np.ndarray): An array of covariates variables in the observed data.
             treatment_arm (np.ndarray): An array of treatment arms in the observed data.
             outcomes (np.ndarray): An array of outcomes in the observed data
+            display_progress (bool): Whether to display a progress bar.
 
         Returns:
             Tuple of numpy arrays:
@@ -246,7 +253,10 @@ def _compute_cumulative_distribution(
         s_list = np.unique(strata)
         if self.is_multi_task:
             binomial = (outcomes.reshape(-1, 1) <= locations) * 1  # (n_records, n_loc)
-            for fold in range(self.folds):
+            fold_iter = range(self.folds)
+            if display_progress:
+                fold_iter = tqdm(fold_iter, desc="Cross-fitting (multi-task)")
+            for fold in fold_iter:
                 fold_mask = (folds != fold) & treatment_mask
                 for s in s_list:
                     s_mask = strata == s
@@ -270,7 +280,10 @@ def _compute_cumulative_distribution(
                     )
                     superset_prediction[superset_mask] = pred
         else:
-            for i, location in enumerate(locations):
+            loc_iter = enumerate(locations)
+            if display_progress:
+                loc_iter = tqdm(loc_iter, total=len(locations), desc="Computing CDF")
+            for i, location in loc_iter:
                 binomial = (outcomes <= location) * 1  # (n_records)
                 for fold in range(self.folds):
                     fold_mask = (folds != fold) & treatment_mask
@@ -322,6 +335,7 @@ def _compute_interval_probability(
         covariates: np.ndarray,
         treatment_arms: np.ndarray,
         outcomes: np.array,
+        display_progress: bool = False,
     ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
         """
         Compute the interval probabilities.
@@ -332,6 +346,7 @@ def _compute_interval_probability(
             covariates: (np.ndarray): An array of covariates variables in the observed data.
             treatment_arm (np.ndarray): An array of treatment arms in the observed data.
             outcomes (np.ndarray): An array of outcomes in the observed data
+            display_progress (bool): Whether to display a progress bar.
 
         Returns:
             Tuple of numpy arrays:
@@ -348,7 +363,10 @@ def _compute_interval_probability(
         strata = self.strata
         s_list = np.unique(strata)
         binominals = (outcomes[:, np.newaxis] <= locations) * 1  # (n_records, n_loc)
-        for i in range(len(locations) - 1):
+        interval_iter = range(len(locations) - 1)
+        if display_progress:
+            interval_iter = tqdm(interval_iter, desc="Computing interval prob.")
+        for i in interval_iter:
             binomial = binominals[:, i + 1] - binominals[:, i]
             for fold in range(self.folds):
                 fold_mask = (folds != fold) & treatment_mask