test: improve unittest for simple estimator

TomeHirata · TomeHirata · commit d9417029b4af · 2024-07-20T20:42:13.000+09:00
diff --git a/dte_adj/__init__.py b/dte_adj/__init__.py
@@ -311,7 +311,7 @@ def find_quantile(quantile, arm):
 
         return result
 
-    def predict(self, treatment_arms: np.ndarray, outcomes: np.ndarray) -> np.ndarray:
+    def predict(self, treatment_arms: np.ndarray, locations: np.ndarray) -> np.ndarray:
         """Compute cumulative distribution values.
 
         Args:
@@ -321,7 +321,25 @@ def predict(self, treatment_arms: np.ndarray, outcomes: np.ndarray) -> np.ndarra
         Returns:
             np.ndarray: Estimated cumulative distribution values for the input.
         """
-        raise NotImplementedError()
+        if self.outcomes is None:
+            raise ValueError(
+                "This estimator has not been trained yet. Please call fit first"
+            )
+
+        unincluded_arms = set(treatment_arms) - set(self.treatment_arms)
+
+        if len(unincluded_arms) > 0:
+            raise ValueError(
+                f"This treatment_arms argument contains arms not included in the training data: {unincluded_arms}"
+            )
+
+        return self._compute_cumulative_distribution(
+            treatment_arms,
+            locations,
+            self.confoundings,
+            self.treatment_arms,
+            self.outcomes,
+        )[0]
 
     def _compute_cumulative_distribution(
         self,
@@ -375,29 +393,6 @@ def fit(
 
         return self
 
-    def predict(self, treatment_arms: np.ndarray, locations: np.ndarray) -> np.ndarray:
-        """Compute cumulative distribution values.
-
-        Args:
-            treatment_arms (np.ndarray): The index of the treatment arm.
-            locations (np.ndarray): Scalar values to be used for computing the cumulative distribution.
-
-        Returns:
-            np.ndarray: Estimated cumulative distribution values for the input.
-        """
-        if self.outcomes is None:
-            raise ValueError(
-                "This estimator has not been trained yet. Please call fit first"
-            )
-
-        return self._compute_cumulative_distribution(
-            treatment_arms,
-            locations,
-            self.confoundings,
-            self.treatment_arms,
-            self.outcomes,
-        )[0]
-
     def _compute_cumulative_distribution(
         self,
         target_treatment_arms: np.ndarray,
diff --git a/tests/test_simple_estimator.py b/tests/test_simple_estimator.py
@@ -5,51 +5,71 @@
 
 
 class TestSimpleEstimator(unittest.TestCase):
-    def test_prediction_success(self):
+    def setUp(self):
+        self.estimator = SimpleDistributionEstimator()
+        self.confoundings = np.zeros((20, 5))
+        self.treatment_arms = np.hstack([np.zeros(10), np.ones(10)])
+        self.outcomes = np.arange(20)
+        self.estimator.fit(self.confoundings, self.treatment_arms, self.outcomes)
+
+    def test_fit(self):
+        self.assertTrue(np.array_equal(self.estimator.confoundings, self.confoundings))
+        self.assertTrue(
+            np.array_equal(self.estimator.treatment_arms, self.treatment_arms)
+        )
+        self.assertTrue(np.array_equal(self.estimator.outcomes, self.outcomes))
+
+    def test_fit_invalid_shapes(self):
         # Arrange
-        X = np.arange(20)
-        D = np.zeros(20)
-        D[:10] = 1
-        Y = np.arange(20)
-        subject = SimpleDistributionEstimator()
-        subject.fit(X, D, Y)
+        confoundings_invalid = np.array([[1, 2], [3, 4]])
+        treatment_arms_invalid = np.array([0, 1])
+        outcomes_invalid = np.array([0.5, 0.7])
+
+        # Assert
+        with self.assertRaises(ValueError):
+            self.estimator.fit(confoundings_invalid, self.treatment_arms, self.outcomes)
+
+        with self.assertRaises(ValueError):
+            self.estimator.fit(self.confoundings, treatment_arms_invalid, self.outcomes)
+
+        with self.assertRaises(ValueError):
+            self.estimator.fit(self.confoundings, self.treatment_arms, outcomes_invalid)
+
+    def test_predict(self):
+        # Arrange
+        treatment_arms_test = np.array([0, 1])
+        locations_test = np.array([3, 6])
+        expected_output = np.array([0.4, 0])
 
         # Act
-        actual = subject.predict(D, Y)
+        output = self.estimator.predict(treatment_arms_test, locations_test)
 
         # Assert
-        expected = np.array(
-            [0.1 * i for i in range(1, 11)] + [0.1 * i for i in range(1, 11)]
-        )
-        npt.assert_allclose(actual, expected)
+        np.testing.assert_array_almost_equal(output, expected_output, decimal=2)
 
     def test_prediction_fail_before_fit(self):
         # Arrange
-        D = np.zeros(20)
-        D[:10] = 1
-        Y = np.arange(20)
+        treatment_arms_test = np.array([0, 1])
+        locations_test = np.array([3, 6])
         subject = SimpleDistributionEstimator()
 
         # Act, Assert
         with self.assertRaises(ValueError) as cm:
-            subject.predict(D, Y)
+            subject.predict(treatment_arms_test, locations_test)
         self.assertEqual(
             str(cm.exception),
             "This estimator has not been trained yet. Please call fit first",
         )
 
-    def test_fit_fail_invalid_input(self):
+    def test_prediction_fail_invalid_arm(self):
         # Arrange
-        X = np.arange(20)
-        D = np.zeros(10)
-        D[:10] = 1
-        Y = np.arange(20)
-        subject = SimpleDistributionEstimator()
+        treatment_arms_invalid = np.array([2])
+        locations_test = np.array([3, 6])
 
         # Act, Assert
         with self.assertRaises(ValueError) as cm:
-            subject.fit(X, D, Y)
+            self.estimator.predict(treatment_arms_invalid, locations_test)
         self.assertEqual(
             str(cm.exception),
-            "The shape of confounding and treatment_arm should be same",
+            "This treatment_arms argument contains arms not included in the training data: {2}",
         )