WIP Refactor data path deprecation test

popescu-v · popescu-v · commit 36e3d9ce3fa6 · 2025-03-31T20:40:20.000+02:00
TODO:
- Fix loose ends (the failure)
- Add scenario refs and compare output scenario with them
diff --git a/tests/test_core.py b/tests/test_core.py
@@ -267,6 +267,107 @@ def test_dictionary(self):
                 domain_copy.export_khiops_dictionary_file(copy_output_kdic)
                 assert_files_equal(self, ref_kdic, copy_output_kdic)
 
+    def _build_mock_deprecated_data_path_api_method_parameters(self):
+        # Pseudo-mock data to test the creation of scenarios
+        ref_additional_data_tables = {
+            "Services": "ServicesBidon.csv",
+            "Services/Usages": "UsagesBidon.csv",
+            "Address": "AddressBidon.csv",
+        }
+        ref_output_additional_data_tables = {
+            "Services": "TransferServicesBidon.csv",
+            "Services/Usages": "TransferUsagesBidon.csv",
+            "Address": "TransferAddressBidon.csv",
+        }
+        additional_data_tables = {
+            "Customer`Services": "ServicesBidon.csv",
+            "Customer`Services`Usages": "UsagesBidon.csv",
+            "Customer`Address": "AddressBidon.csv",
+        }
+        output_additional_data_tables = {
+            "Customer`Services": "TransferServicesBidon.csv",
+            "Customer`Services`Usages": "TransferUsagesBidon.csv",
+            "Customer`Address": "TransferAddressBidon.csv",
+        }
+
+        # Store the relation method_name -> (dataset -> mock args and kwargs)
+        method_test_args = {
+            "check_database": {
+                "args": ["Customer.kdic", "Customer", "Customer.csv"],
+                "kwargs": {"additional_data_tables": copy(additional_data_tables)},
+            },
+            # We profit to test byte strings in the deploy_model test
+            "deploy_model": {
+                "args": [
+                    bytes("Customer.kdic", encoding="ascii"),
+                    bytes("Customer", encoding="ascii"),
+                    bytes("Customer.csv", encoding="ascii"),
+                    bytes("CustomerDeployed.csv", encoding="ascii"),
+                ],
+                "kwargs": {
+                    "additional_data_tables": (
+                        {
+                            bytes(key, encoding="ascii"): bytes(value, encoding="ascii")
+                            for key, value in additional_data_tables.items()
+                        }
+                    ),
+                    "output_additional_data_tables": (
+                        {
+                            bytes(key, encoding="ascii"): bytes(value, encoding="ascii")
+                            for key, value in output_additional_data_tables.items()
+                        }
+                    ),
+                },
+            },
+            "evaluate_predictor": {
+                "args": [
+                    "ModelingCustomer.kdic",
+                    "Customer",
+                    "Customer.csv",
+                    "CustomerResults/CustomerAnalysisResults.khj",
+                ],
+                "kwargs": {"additional_data_tables": copy(additional_data_tables)},
+            },
+            "train_coclustering": {
+                "args": [
+                    "Customer.kdic",
+                    "Customer",
+                    "Customer.csv",
+                    ["id_customer", "Name"],
+                    "CustomerResults/CustomerCoclusteringResults._khcj",
+                ],
+                "kwargs": {
+                    "additional_data_tables": copy(additional_data_tables),
+                },
+            },
+            "train_predictor": {
+                "args": [
+                    "Customer.kdic",
+                    "Customer",
+                    "Customer.csv",
+                    "",
+                    "CustomerResults/CustomerAnalysisResults._khj",
+                ],
+                "kwargs": {
+                    "additional_data_tables": copy(additional_data_tables),
+                },
+            },
+            "train_recoder": {
+                "args": [
+                    "Customer.kdic",
+                    "Customer",
+                    "Customer.csv",
+                    "",
+                    "CustomerResults/CustomerAnalysisResults._khj",
+                ],
+                "kwargs": {
+                    "additional_data_tables": copy(additional_data_tables),
+                },
+            },
+        }
+
+        return method_test_args
+
     def _build_mock_api_method_parameters(self):
         # Pseudo-mock data to test the creation of scenarios
         datasets = ["Adult", "SpliceJunction", "Customer"]
@@ -607,118 +708,77 @@ def test_data_path_deprecation_in_api_method(self):
         kh.set_runner(test_runner)
 
         # Obtain mock arguments for each API call
-        method_test_args = self._build_mock_api_method_parameters()
-
-        # Define legacy additional data tables path
-        str_legacy_additional_data_tables = {
-            "Customer`Services": "ServicesBidon.csv",
-            "Customer`Services`Usages": "UsagesBidon.csv",
-            "Customer`Address": "AddressBidon.csv",
-        }
+        method_test_args = self._build_mock_deprecated_data_path_api_method_parameters()
 
         # Test for each dataset mock parameters
         for method_name, method_full_args in method_test_args.items():
-            # Use bytes for deploy_model's additional_data_tables
-            if method_name == "deploy_model":
-                legacy_additional_data_tables = {
-                    bytes(key, encoding="ascii"): bytes(value, encoding="ascii")
-                    for key, value in str_legacy_additional_data_tables.items()
-                }
-            else:
-                legacy_additional_data_tables = str_legacy_additional_data_tables
             # Set the runners test name
             test_runner.test_name = method_name
 
             # Clean the directory for this method's tests
             cleanup_dir(test_runner.output_scenario_dir, "*/output/*._kh", verbose=True)
-            for dataset, dataset_method_args in method_full_args.items():
-                # Test only for the Customer dataset
-                if dataset != "Customer":
-                    continue
-
-                test_runner.subtest_name = dataset
-                with self.subTest(method=method_name):
-                    # Get the API function and its args and kwargs
-                    method = getattr(kh, method_name)
-                    dataset_args = dataset_method_args["args"]
-                    dataset_kwargs = dataset_method_args["kwargs"]
-
-                    # Skip the test if `additional_data_tables` is not an
-                    # API call kwarg
-                    if "additional_data_tables" not in dataset_kwargs:
-                        continue
-
-                    # Store current additional data_tables
-                    current_additional_data_tables = copy(
-                        dataset_kwargs["additional_data_tables"]
-                    )
-
-                    # Update the `additional_data_tables` kwargs to use
-                    # legacy paths
-                    dataset_kwargs["additional_data_tables"] = copy(
-                        legacy_additional_data_tables
+            test_runner.subtest_name = "Customer"
+            with self.subTest(method=method_name):
+                # Get the API function and its args and kwargs
+                method = getattr(kh, method_name)
+                args = method_full_args["args"]
+                kwargs = method_full_args["kwargs"]
+
+                # Test that using legacy paths entails a deprecation warning
+                with warnings.catch_warnings(record=True) as warning_list:
+                    method(*args, **kwargs)
+
+                # Check the warning message
+                if "output_additional_data_tables" in kwargs:
+                    self.assertEqual(
+                        len(warning_list),
+                        len(kwargs["additional_data_tables"])
+                        + len(kwargs["output_additional_data_tables"]),
                     )
-
-                    # Test that using legacy paths entails a deprecation warning
-                    with warnings.catch_warnings(record=True) as warning_list:
-                        method(*dataset_args, **dataset_kwargs)
-
-                    # Build current-legacy data path map
-                    legacy_to_current_data_paths = {}
-                    for (
-                        data_path,
-                        data_file_path,
-                    ) in current_additional_data_tables.items():
-                        for (
-                            leg_data_path,
-                            leg_data_file_path,
-                        ) in legacy_additional_data_tables.items():
-                            if leg_data_file_path == data_file_path:
-                                legacy_to_current_data_paths[leg_data_path] = data_path
-                                break
-
-                    # Check the warning message
+                else:
                     self.assertEqual(
-                        len(warning_list), len(legacy_additional_data_tables)
+                        len(warning_list), len(kwargs["additional_data_tables"])
                     )
-                    warning = warning_list[0]
-                    for warning in warning_list:
-                        self.assertTrue(issubclass(warning.category, UserWarning))
-                        warning_message = warning.message
-                        self.assertEqual(len(warning_message.args), 1)
-                        message = warning_message.args[0]
-                        self.assertTrue(
-                            "'`'-based dictionary data path" in message
-                            and "deprecated" in message
-                        )
-
-                        # Check legacy data path is replaced with the current
-                        # data path
-                        for legacy_data_path in legacy_additional_data_tables:
-                            expected_legacy_data_path = legacy_to_current_data_paths[
-                                legacy_data_path
-                            ]
-                            if f"'{legacy_data_path}'" in message:
-                                self.assertTrue(
-                                    f"'{expected_legacy_data_path}'" in message
-                                )
-                                break
 
+                warning = warning_list[0]
+                for warning in warning_list:
+                    self.assertTrue(issubclass(warning.category, UserWarning))
+                    warning_message = warning.message
+                    self.assertEqual(len(warning_message.args), 1)
+                    message = warning_message.args[0]
+                    self.assertTrue(
+                        "'`'-based dictionary data path" in message
+                        and "deprecated" in message
+                    )
         # Restore the default runner
         kh.set_runner(default_runner)
 
     def test_unknown_argument_in_api_method(self):
         """Tests if core.api raises ValueError when an unknown argument is passed"""
+        # Set the root directory of these tests
+        test_resources_dir = os.path.join(resources_dir(), "scenario_generation", "api")
+
+        # Use the test runner that only compares the scenarios
+        default_runner = kh.get_runner()
+        test_runner = ScenarioWriterRunner(self, test_resources_dir)
+        kh.set_runner(test_runner)
+
         # Obtain mock arguments for each API call
         method_test_args = self._build_mock_api_method_parameters()
 
         # Test for each dataset mock parameters
         for method_name, method_full_args in method_test_args.items():
+            # Set the runners test name
+            test_runner.test_name = method_name
+
+            # Clean the directory for this method's tests
+            cleanup_dir(test_runner.output_scenario_dir, "*/output/*._kh", verbose=True)
             for dataset, dataset_method_args in method_full_args.items():
                 # Test only for the Adult dataset
                 if dataset != "Adult":
                     continue
 
+                test_runner.subtest_name = dataset
                 with self.subTest(method=method_name):
                     # These methods do not have kwargs so they cannot have extra args
                     if method_name in [
@@ -742,6 +802,9 @@ def test_unknown_argument_in_api_method(self):
                     output_msg = str(context.exception)
                     self.assertEqual(output_msg, expected_msg)
 
+        # Restore the default runner
+        kh.set_runner(default_runner)
+
     def test_system_settings(self):
         """Test that the system settings are written to the scenario file"""
         # Create the root directory of these tests