test: add unit tests for AdjustedDistributionEstimator

TomeHirata · TomeHirata · commit bceda6afe11c · 2024-07-21T20:54:29.000+09:00
diff --git a/dte_adj/__init__.py b/dte_adj/__init__.py
@@ -311,7 +311,7 @@ def find_quantile(quantile, arm):
             )
 
         return result
-    
+
     def fit(
         self, confoundings: np.ndarray, treatment_arms: np.ndarray, outcomes: np.ndarray
     ) -> "DistributionEstimatorBase":
@@ -446,8 +446,12 @@ def __init__(self, base_model, folds=3):
         Returns:
             AdjustedDistributionEstimator: An instance of the estimator.
         """
-        if (not hasattr(base_model, 'predict')) and (not hasattr(base_model, 'predict_proba')):
-            raise ValueError('base_model should implement either predict_proba or predict')
+        if (not hasattr(base_model, "predict")) and (
+            not hasattr(base_model, "predict_proba")
+        ):
+            raise ValueError(
+                "Base model should implement either predict_proba or predict"
+            )
         self.base_model = base_model
         self.folds = folds
         super().__init__()
@@ -496,13 +500,19 @@ def _compute_cumulative_distribution(
                     continue
                 model = deepcopy(self.base_model)
                 model.fit(confounding_train, binominal_train)
-                subset_prediction[subset_mask] = model.predict_proba(confounding_fit)[
-                    :, 1
-                ]
-                superset_prediction[superset_mask, i] = model.predict_proba(
-                    confoundings[superset_mask]
-                )[:, 1]
+                subset_prediction[subset_mask] = self._compute_model_prediction(
+                    model, confounding_fit
+                )
+                superset_prediction[superset_mask, i] = self._compute_model_prediction(
+                    model, confoundings[superset_mask]
+                )
             cumulative_distribution[i] = (
                 cdf - subset_prediction.mean() + superset_prediction[:, i].mean()
             )
         return cumulative_distribution, superset_prediction
+
+    def _compute_model_prediction(self, model, confoundings: np.ndarray) -> np.ndarray:
+        if hasattr(model, "predict_proba"):
+            return model.predict_proba(confoundings)[:, 1]
+        else:
+            return model.predict(confoundings)
diff --git a/tests/test_adjusted_estimator.py b/tests/test_adjusted_estimator.py
@@ -5,11 +5,25 @@
 
 
 class TestAdjustedEstimator(unittest.TestCase):
-    def test_prediction_success(self):
-        # TODO!
-        return
+    def setUp(self):
+        base_model = MagicMock()
+        base_model.predict_proba.side_effect = lambda x, y: x
+        self.estimator = AdjustedDistributionEstimator(base_model, folds=1)
+        self.confoundings = np.zeros((20, 5))
+        self.treatment_arms = np.hstack([np.zeros(10), np.ones(10)])
+        self.outcomes = np.arange(20)
+        self.estimator.fit(self.confoundings, self.treatment_arms, self.outcomes)
+
+    def test_init_fail_incorrect_base_model(self):
+        # Act, Assert
+        with self.assertRaises(ValueError) as cm:
+            AdjustedDistributionEstimator("dummy")
+        self.assertEqual(
+            str(cm.exception),
+            "Base model should implement either predict_proba or predict",
+        )
 
-    def test_prediction_fail_before_fit(self):
+    def test_predict_fail_before_fit(self):
         # Arrange
         D = np.zeros(20)
         D[:10] = 1
@@ -41,3 +55,32 @@ def test_fit_fail_invalid_input(self):
             str(cm.exception),
             "The shape of confounding and treatment_arm should be same",
         )
+
+    def test_compute_cumulative_distribution(self):
+        # Arrange
+        mock_model = self.estimator.base_model
+        mock_model.predict_proba.side_effect = lambda x: np.ones((x.shape[0], 2)) * 0.5
+        target_treatment_arms = np.zeros(10)
+        locations = np.arange(10)
+
+        # Act
+        cumulative_distribution, superset_prediction = (
+            self.estimator._compute_cumulative_distribution(
+                target_treatment_arms,
+                locations,
+                self.confoundings,
+                self.treatment_arms,
+                self.outcomes,
+            )
+        )
+
+        # Assert
+        self.assertEqual(cumulative_distribution.shape, (10,))
+        self.assertEqual(superset_prediction.shape, (20, 10))
+
+        for i in range(10):
+            self.assertAlmostEqual(cumulative_distribution[i], (i + 1) / 10, places=2)
+
+        for i in range(20):
+            for j in range(10):
+                self.assertAlmostEqual(superset_prediction[i, j], 0.5, places=2)
diff --git a/tests/test_distribution_estimator_base.py b/tests/test_distribution_estimator_base.py
@@ -133,7 +133,7 @@ def test_fit_success(self):
             np.array_equal(self.estimator.treatment_arms, self.treatment_arms)
         )
         self.assertTrue(np.array_equal(self.estimator.outcomes, self.outcomes))
-      
+
     def test_fit_invalid_shapes(self):
         # Arrange
         confoundings_invalid = np.array([[1, 2], [3, 4]])
@@ -149,7 +149,7 @@ def test_fit_invalid_shapes(self):
 
         with self.assertRaises(ValueError):
             self.estimator.fit(self.confoundings, self.treatment_arms, outcomes_invalid)
-    
+
     def test_predict_success(self):
         # Arrange
         treatment_arms_test = np.array([0, 1])
@@ -174,7 +174,7 @@ def test_predict_fail_before_fit(self):
         self.assertEqual(
             str(cm.exception),
             "This estimator has not been trained yet. Please call fit first",
-        )    
+        )
 
     def test_predict_fail_invalid_arm(self):
         # Arrange