Merge pull request #369 from CITCOM-project/jmafoster1/named-confidence-intervals

jmafoster1 · web-flow · commit f3d2915e0336 · 2025-12-11T14:12:51.000+01:00
Jmafoster1/named confidence intervals
diff --git a/causal_testing/estimation/logistic_regression_estimator.py b/causal_testing/estimation/logistic_regression_estimator.py
@@ -40,10 +40,20 @@ def estimate_unit_odds_ratio(self) -> EffectEstimate:
         :return: The odds ratio. Confidence intervals are not yet supported.
         """
         model = self.fit_model(self.df)
-        ci_low, ci_high = np.exp(model.conf_int(self.alpha).loc[self.base_test_case.treatment_variable.name])
-        return EffectEstimate(
+
+        treatment_columns = [
+            param
+            for param in model.params.index
+            if param == self.base_test_case.treatment_variable.name
+            or param.startswith(self.base_test_case.treatment_variable.name + "[")
+        ]
+
+        confidence_intervals = np.exp(model.conf_int(self.alpha).loc[treatment_columns])
+
+        result = EffectEstimate(
             "unit_odds_ratio",
-            pd.Series(np.exp(model.params[self.base_test_case.treatment_variable.name])),
-            pd.Series(ci_low),
-            pd.Series(ci_high),
+            pd.Series(np.exp(model.params[treatment_columns])),
+            pd.Series(confidence_intervals[0]),
+            pd.Series(confidence_intervals[1]),
         )
+        return result
diff --git a/causal_testing/main.py b/causal_testing/main.py
@@ -508,7 +508,7 @@ def parse_args(args: Optional[Sequence[str]] = None) -> argparse.Namespace:
 
     # Generation
     parser_generate = subparsers.add_parser(Command.GENERATE.value, help="Generate causal tests from a DAG")
-    parser_generate.add_argument("-D", "--dag_path", help="Path to the DAG file (.dot)", required=True)
+    parser_generate.add_argument("-D", "--dag-path", help="Path to the DAG file (.dot)", required=True)
     parser_generate.add_argument("-o", "--output", help="Path for output file (.json)", required=True)
     parser_generate.add_argument(
         "-e",
@@ -518,13 +518,13 @@ def parse_args(args: Optional[Sequence[str]] = None) -> argparse.Namespace:
     )
     parser_generate.add_argument(
         "-T",
-        "--effect_type",
+        "--effect-type",
         help="The effect type to estimate {direct, total}",
         default="direct",
     )
     parser_generate.add_argument(
         "-E",
-        "--estimate_type",
+        "--estimate-type",
         help="The estimate type to use when evaluating tests (defaults to coefficient)",
         default="coefficient",
     )
@@ -537,11 +537,11 @@ def parse_args(args: Optional[Sequence[str]] = None) -> argparse.Namespace:
 
     # Testing
     parser_test = subparsers.add_parser(Command.TEST.value, help="Run causal tests")
-    parser_test.add_argument("-D", "--dag_path", help="Path to the DAG file (.dot)", required=True)
+    parser_test.add_argument("-D", "--dag-path", help="Path to the DAG file (.dot)", required=True)
     parser_test.add_argument("-o", "--output", help="Path for output file (.json)", required=True)
     parser_test.add_argument("-i", "--ignore-cycles", help="Ignore cycles in DAG", action="store_true", default=False)
-    parser_test.add_argument("-d", "--data_paths", help="Paths to data files (.csv)", nargs="+", required=True)
-    parser_test.add_argument("-t", "--test_config", help="Path to test configuration file (.json)", required=True)
+    parser_test.add_argument("-d", "--data-paths", help="Paths to data files (.csv)", nargs="+", required=True)
+    parser_test.add_argument("-t", "--test-config", help="Path to test configuration file (.json)", required=True)
     parser_test.add_argument("-v", "--verbose", help="Enable verbose logging", action="store_true", default=False)
     parser_test.add_argument("-q", "--query", help="Query string to filter data (e.g. 'age > 18')", type=str)
     parser_test.add_argument(
@@ -553,7 +553,6 @@ def parse_args(args: Optional[Sequence[str]] = None) -> argparse.Namespace:
         dest="bootstrap_size",
         help="Number of bootstrap samples for causal test adequacy. Defaults to 100",
         type=int,
-        default=100,
     )
     parser_test.add_argument(
         "-s",
@@ -572,8 +571,12 @@ def parse_args(args: Optional[Sequence[str]] = None) -> argparse.Namespace:
     args = main_parser.parse_args(args)
 
     # Assume the user wants test adequacy if they're setting bootstrap_size
-    if hasattr(args, "bootstrap_size") and args.bootstrap_size:
+    print(args)
+    if getattr(args, "bootstrap_size", None) is not None:
         args.adequacy = True
+    if getattr(args, "adequacy", False) and getattr(args, "bootstrap_size", None) is None:
+        # Need this here rather than a default value because otherwise the above always sets adequacy to True
+        args.bootstrap_size = 100
 
     args.command = Command(args.command)
     return args
diff --git a/tests/estimation_tests/test_logistic_regression_estimator.py b/tests/estimation_tests/test_logistic_regression_estimator.py
@@ -20,4 +20,4 @@ def test_odds_ratio(self):
             BaseTestCase(Input("length_in", float), Output("completed", bool)), 65, 55, set(), df
         )
         effect_estimate = logistic_regression_estimator.estimate_unit_odds_ratio()
-        self.assertEqual(round(effect_estimate.value[0], 4), 0.8948)
+        self.assertEqual(round(effect_estimate.value.iloc[0], 4), 0.8948)
diff --git a/tests/main_tests/test_main.py b/tests/main_tests/test_main.py
@@ -128,6 +128,19 @@ def test_create_base_test_case_missing_outcome(self):
             framework.create_base_test({"treatment_variable": "test_input", "expected_effect": {"missing": "NoEffect"}})
         self.assertEqual("\"Outcome variable 'missing' not found in inputs or outputs\"", str(e.exception))
 
+    def test_unloaded_tests(self):
+        framework = CausalTestingFramework(self.paths)
+        with self.assertRaises(ValueError) as e:
+            framework.run_tests()
+        self.assertEqual("No tests loaded. Call load_tests() first.", str(e.exception))
+
+    def test_unloaded_tests_batches(self):
+        framework = CausalTestingFramework(self.paths)
+        with self.assertRaises(ValueError) as e:
+            # Need the next because of the yield statement in run_tests_in_batches
+            next(framework.run_tests_in_batches())
+        self.assertEqual("No tests loaded. Call load_tests() first.", str(e.exception))
+
     def test_ctf(self):
         framework = CausalTestingFramework(self.paths)
         framework.setup()
@@ -136,8 +149,6 @@ def test_ctf(self):
         framework.load_tests()
         results = framework.run_tests()
 
-        print(results)
-
         # Save results
         framework.save_results(results)
 
@@ -205,7 +216,30 @@ def test_ctf_batches_exception_silent(self):
                     all_results.extend(json.load(f))
 
         self.assertEqual([result["passed"] for result in all_results], [False])
-        self.assertIsNotNone([result["result"].get("error") for result in all_results])
+        self.assertIsNotNone([result.get("error") for result in all_results])
+
+    def test_ctf_exception_silent(self):
+        framework = CausalTestingFramework(self.paths, query="test_input < 0")
+        framework.setup()
+
+        # Load and run tests
+        framework.load_tests()
+
+        results = framework.run_tests(silent=True)
+
+        with open(self.test_config_path, "r", encoding="utf-8") as f:
+            test_configs = json.load(f)
+
+        tests_passed = [
+            test_case.expected_causal_effect.apply(result) if result.effect_estimate is not None else False
+            for test_config, test_case, result in zip(test_configs["tests"], framework.test_cases, results)
+        ]
+
+        self.assertEqual(tests_passed, [False])
+        self.assertEqual(
+            [result.error_message for result in results],
+            ["zero-size array to reduction operation maximum which has no identity"],
+        )
 
     def test_ctf_batches_exception(self):
         framework = CausalTestingFramework(self.paths, query="test_input < 0")
@@ -214,7 +248,7 @@ def test_ctf_batches_exception(self):
         # Load and run tests
         framework.load_tests()
         with self.assertRaises(ValueError):
-            list(framework.run_tests_in_batches())
+            next(framework.run_tests_in_batches())
 
     def test_ctf_batches_matches_run_tests(self):
         # Run the tests normally
@@ -318,11 +352,11 @@ def test_parse_args(self):
             [
                 "causal_testing",
                 "test",
-                "--dag_path",
+                "--dag-path",
                 str(self.dag_path),
-                "--data_paths",
+                "--data-paths",
                 str(self.data_paths[0]),
-                "--test_config",
+                "--test-config",
                 str(self.test_config_path),
                 "--output",
                 str(self.output_path.parent / "main.json"),
@@ -331,17 +365,110 @@ def test_parse_args(self):
             main()
             self.assertTrue((self.output_path.parent / "main.json").exists())
 
+    def test_parse_args_adequacy(self):
+        with patch(
+            "sys.argv",
+            [
+                "causal_testing",
+                "test",
+                "--dag-path",
+                str(self.dag_path),
+                "--data-paths",
+                str(self.data_paths[0]),
+                "--test-config",
+                str(self.test_config_path),
+                "--output",
+                str(self.output_path.parent / "main.json"),
+                "-a",
+            ],
+        ):
+            main()
+            with open(self.output_path.parent / "main.json") as f:
+                log = json.load(f)
+            assert all(test["result"]["bootstrap_size"] == 100 for test in log)
+
+    def test_parse_args_adequacy_batches(self):
+        with patch(
+            "sys.argv",
+            [
+                "causal_testing",
+                "test",
+                "--dag-path",
+                str(self.dag_path),
+                "--data-paths",
+                str(self.data_paths[0]),
+                "--test-config",
+                str(self.test_config_path),
+                "--output",
+                str(self.output_path.parent / "main.json"),
+                "-a",
+                "--batch-size",
+                "5",
+            ],
+        ):
+            main()
+            with open(self.output_path.parent / "main.json") as f:
+                log = json.load(f)
+            assert all(test["result"]["bootstrap_size"] == 100 for test in log)
+
+    def test_parse_args_bootstrap_size(self):
+        with patch(
+            "sys.argv",
+            [
+                "causal_testing",
+                "test",
+                "--dag-path",
+                str(self.dag_path),
+                "--data-paths",
+                str(self.data_paths[0]),
+                "--test-config",
+                str(self.test_config_path),
+                "--output",
+                str(self.output_path.parent / "main.json"),
+                "-b",
+                "50",
+            ],
+        ):
+            main()
+            with open(self.output_path.parent / "main.json") as f:
+                log = json.load(f)
+            assert all(test["result"]["bootstrap_size"] == 50 for test in log)
+
+    def test_parse_args_bootstrap_size_explicit_adequacy(self):
+        with patch(
+            "sys.argv",
+            [
+                "causal_testing",
+                "test",
+                "--dag-path",
+                str(self.dag_path),
+                "--data-paths",
+                str(self.data_paths[0]),
+                "--test-config",
+                str(self.test_config_path),
+                "--output",
+                str(self.output_path.parent / "main.json"),
+                "-a",
+                "-b",
+                "50",
+            ],
+        ):
+            main()
+            with open(self.output_path.parent / "main.json") as f:
+                log = json.load(f)
+            assert all(test["result"]["bootstrap_size"] == 50 for test in log)
+
     def test_parse_args_batches(self):
         with patch(
             "sys.argv",
             [
                 "causal_testing",
                 "test",
-                "--dag_path",
+                "--dag-path",
                 str(self.dag_path),
-                "--data_paths",
+                "--data-paths",
                 str(self.data_paths[0]),
-                "--test_config",
+                "--test-config",
                 str(self.test_config_path),
                 "--output",
                 str(self.output_path.parent / "main_batch.json"),
@@ -359,7 +486,7 @@ def test_parse_args_generation(self):
                 [
                     "causal_testing",
                     "generate",
-                    "--dag_path",
+                    "--dag-path",
                     str(self.dag_path),
                     "--output",
                     os.path.join(tmp, "tests.json"),
@@ -375,15 +502,15 @@ def test_parse_args_generation_non_default(self):
                 [
                     "causal_testing",
                     "generate",
-                    "--dag_path",
+                    "--dag-path",
                     str(self.dag_path),
                     "--output",
                     os.path.join(tmp, "tests_non_default.json"),
                     "--estimator",
                     "LogisticRegressionEstimator",
-                    "--estimate_type",
+                    "--estimate-type",
                     "unit_odds_ratio",
-                    "--effect_type",
+                    "--effect-type",
                     "total",
                 ],
             ):

Original file line number	Diff line number	Diff line change
`@@ -20,4 +20,4 @@ def test_odds_ratio(self):`
`20`	`20`	`BaseTestCase(Input("length_in", float), Output("completed", bool)), 65, 55, set(), df`
`21`	`21`	`)`
`22`	`22`	`effect_estimate = logistic_regression_estimator.estimate_unit_odds_ratio()`
`23`		`- self.assertEqual(round(effect_estimate.value[0], 4), 0.8948)`
	`23`	`+ self.assertEqual(round(effect_estimate.value.iloc[0], 4), 0.8948)`