From bb4c63914d75bb8127c9d0617d8fb3632b58e438 Mon Sep 17 00:00:00 2001
From: Felipe Olmos <92923444+folmos-at-orange@users.noreply.github.com>
Date: Tue, 17 Dec 2024 17:13:16 +0100
Subject: [PATCH 1/2] Rename transform_pairs parameter to transform_type_pairs

---
 doc/samples/samples_sklearn.rst      |  2 +-
 khiops/samples/samples_sklearn.ipynb |  2 +-
 khiops/samples/samples_sklearn.py    |  2 +-
 khiops/sklearn/estimators.py         | 22 ++++++++++++----------
 tests/test_sklearn.py                | 10 +++++-----
 5 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/doc/samples/samples_sklearn.rst b/doc/samples/samples_sklearn.rst
index 22fc4cfd..b280181c 100644
--- a/doc/samples/samples_sklearn.rst
+++ b/doc/samples/samples_sklearn.rst
@@ -723,7 +723,7 @@ Samples
         keep_initial_variables=True,
         transform_type_categorical="part_id",
         transform_type_numerical="part_id",
-        transform_pairs="part_id",
+        transform_type_pairs="part_id",
     )
     khe.fit(X, y)
 
diff --git a/khiops/samples/samples_sklearn.ipynb b/khiops/samples/samples_sklearn.ipynb
index e2aec0f1..11057461 100644
--- a/khiops/samples/samples_sklearn.ipynb
+++ b/khiops/samples/samples_sklearn.ipynb
@@ -865,7 +865,7 @@
     "    keep_initial_variables=True,\n",
     "    transform_type_categorical=\"part_id\",\n",
     "    transform_type_numerical=\"part_id\",\n",
-    "    transform_pairs=\"part_id\",\n",
+    "    transform_type_pairs=\"part_id\",\n",
     ")\n",
     "khe.fit(X, y)\n",
     "\n",
diff --git a/khiops/samples/samples_sklearn.py b/khiops/samples/samples_sklearn.py
index da591810..ad431d9f 100644
--- a/khiops/samples/samples_sklearn.py
+++ b/khiops/samples/samples_sklearn.py
@@ -761,7 +761,7 @@ def khiops_encoder_with_hyperparameters():
         keep_initial_variables=True,
         transform_type_categorical="part_id",
         transform_type_numerical="part_id",
-        transform_pairs="part_id",
+        transform_type_pairs="part_id",
     )
     khe.fit(X, y)
 
diff --git a/khiops/sklearn/estimators.py b/khiops/sklearn/estimators.py
index 3d5c122c..9e011ce0 100644
--- a/khiops/sklearn/estimators.py
+++ b/khiops/sklearn/estimators.py
@@ -2733,7 +2733,7 @@ class KhiopsEncoder(TransformerMixin, KhiopsSupervisedEstimator):
 
         See the documentation for the ``numerical_recoding_method`` parameter of the
         `~.api.train_recoder` function for more details.
-    transform_pairs: str, default "part_id"
+    transform_type_pairs : str, default "part_id"
         Type of transformation for bivariate features. Valid values:
             - "part_id"
             - "part_label"
@@ -2811,7 +2811,7 @@ def __init__(
         keep_initial_variables=False,
         transform_type_categorical="part_id",
         transform_type_numerical="part_id",
-        transform_pairs="part_id",
+        transform_type_pairs="part_id",
         verbose=False,
         output_dir=None,
         auto_sort=True,
@@ -2835,7 +2835,7 @@ def __init__(
         self.group_target_value = group_target_value
         self.transform_type_categorical = transform_type_categorical
         self.transform_type_numerical = transform_type_numerical
-        self.transform_pairs = transform_pairs
+        self.transform_type_pairs = transform_type_pairs
         self.informative_features_only = informative_features_only
         self.keep_initial_variables = keep_initial_variables
         self._khiops_model_prefix = "R_"
@@ -2892,12 +2892,12 @@ def _pairs_transform_method(self):
             "conditional_info": "conditional info",
             None: "none",
         }
-        if self.transform_pairs not in _transform_types:
+        if self.transform_type_pairs not in _transform_types:
             raise ValueError(
-                "'transform_pairs' must be one of the following:"
+                "'transform_type_pairs' must be one of the following:"
                 ",".join(_transform_types.keys)
             )
-        return _transform_types[self.transform_pairs]
+        return _transform_types[self.transform_type_pairs]
 
     def _fit_check_params(self, ds, **kwargs):
         # Call parent method
@@ -2931,10 +2931,12 @@ def _fit_check_params(self, ds, **kwargs):
                 "transform_type_categorical and transform_type_numerical "
                 "cannot be both None with n_trees == 0."
             )
-        # Check 'transform_pairs' parameter
-        if not isinstance(self.transform_pairs, str):
+        # Check 'transform_type_pairs' parameter
+        if not isinstance(self.transform_type_pairs, str):
             raise TypeError(
-                type_error_message("transform_pairs", self.transform_pairs, str)
+                type_error_message(
+                    "transform_type_pairs", self.transform_type_pairs, str
+                )
             )
         self._pairs_transform_method()  # Raises ValueError if invalid
 
@@ -3036,7 +3038,7 @@ def _fit_prepare_training_function_inputs(self, ds, computation_dir):
 
         del kwargs["transform_type_categorical"]
         del kwargs["transform_type_numerical"]
-        del kwargs["transform_pairs"]
+        del kwargs["transform_type_pairs"]
         del kwargs["categorical_target"]
 
         return args, kwargs
diff --git a/tests/test_sklearn.py b/tests/test_sklearn.py
index 6e6d6bd3..48a5784f 100644
--- a/tests/test_sklearn.py
+++ b/tests/test_sklearn.py
@@ -2267,7 +2267,7 @@ def test_parameter_transfer_encoder_fit_from_monotable_dataframe(self):
                 "keep_initial_variables": False,
                 "transform_type_categorical": "part_id",
                 "transform_type_numerical": "part_id",
-                "transform_pairs": "part_id",
+                "transform_type_pairs": "part_id",
             },
         )
 
@@ -2291,7 +2291,7 @@ def test_parameter_transfer_encoder_fit_from_monotable_dataframe_with_df_y(
                 "keep_initial_variables": False,
                 "transform_type_categorical": "part_id",
                 "transform_type_numerical": "part_id",
-                "transform_pairs": "part_id",
+                "transform_type_pairs": "part_id",
             },
         )
 
@@ -2313,7 +2313,7 @@ def test_parameter_transfer_encoder_fit_from_monotable_file_dataset(self):
                 "keep_initial_variables": False,
                 "transform_type_categorical": "part_id",
                 "transform_type_numerical": "part_id",
-                "transform_pairs": "part_id",
+                "transform_type_pairs": "part_id",
             },
         )
 
@@ -2336,7 +2336,7 @@ def test_parameter_transfer_encoder_fit_from_multitable_dataframe(self):
                 "keep_initial_variables": False,
                 "transform_type_categorical": "part_id",
                 "transform_type_numerical": "part_id",
-                "transform_pairs": "part_id",
+                "transform_type_pairs": "part_id",
             },
         )
 
@@ -2359,7 +2359,7 @@ def test_parameter_transfer_encoder_fit_from_multitable_file_dataset(self):
                 "keep_initial_variables": False,
                 "transform_type_categorical": "part_id",
                 "transform_type_numerical": "part_id",
-                "transform_pairs": "part_id",
+                "transform_type_pairs": "part_id",
             },
         )
 

From 3bf819ff756d68d03274d3b53f5b749478e56414 Mon Sep 17 00:00:00 2001
From: Felipe Olmos <92923444+folmos-at-orange@users.noreply.github.com>
Date: Tue, 17 Dec 2024 17:01:40 +0100
Subject: [PATCH 2/2] Control the types on sklearn internal read table

We do this only for KhiopsClassifier and KhiopsRegressor: It is critical
for KhiopsClassifier as it accepts many target types and it is trivial
in the case of KhiopsRegressor.

For KhiopsEncoder and KhiopsCoclustering is less critical and for the
first one it is very complex. We left them as TODO's.

Additionaly, we now also check in the "output type" tests that the
result of predict is correct. Before we only checked only that the
classes_ attribute was ok. This is to further ensure correctness.
---
 khiops/sklearn/dataset.py          |  32 ++++-
 khiops/sklearn/estimators.py       | 210 +++++++++++++++--------------
 tests/test_sklearn_output_types.py |  93 ++++++++++---
 3 files changed, 213 insertions(+), 122 deletions(-)

diff --git a/khiops/sklearn/dataset.py b/khiops/sklearn/dataset.py
index 26da745f..75b2fb5f 100644
--- a/khiops/sklearn/dataset.py
+++ b/khiops/sklearn/dataset.py
@@ -346,7 +346,7 @@ def get_khiops_variable_name(column_id):
     return variable_name
 
 
-def read_internal_data_table(file_path_or_stream):
+def read_internal_data_table(file_path_or_stream, column_dtypes=None):
     """Reads into a DataFrame a data table file with the internal format settings
 
     The table is read with the following settings:
@@ -357,18 +357,34 @@ def read_internal_data_table(file_path_or_stream):
     - Use `csv.QUOTE_MINIMAL`
     - double quoting enabled (quotes within quotes can be escaped with '""')
     - UTF-8 encoding
+    - User-specified dtypes (optional)
 
     Parameters
     ----------
     file_path_or_stream : str or file object
         The path of the internal data table file to be read or a readable file
         object.
+    column_dtypes : dict, optional
+        Dictionary linking column names with dtypes. See ``dtype`` parameter of the
+        `pandas.read_csv` function. If not set, then the column types are detected
+        automatically by pandas.
 
     Returns
     -------
     `pandas.DataFrame`
-        The dataframe representation.
+        The dataframe representation of the data table.
     """
+    # Change the 'U' types (Unicode strings) to 'O' because pandas does not support them
+    # in read_csv
+    if column_dtypes is not None:
+        execution_column_dtypes = {}
+        for column_name, dtype in column_dtypes.items():
+            if hasattr(dtype, "kind") and dtype.kind == "U":
+                execution_column_dtypes[column_name] = np.dtype("O")
+    else:
+        execution_column_dtypes = None
+
+    # Read and return the dataframe
     return pd.read_csv(
         file_path_or_stream,
         sep="\t",
@@ -377,6 +393,7 @@ def read_internal_data_table(file_path_or_stream):
         quoting=csv.QUOTE_MINIMAL,
         doublequote=True,
         encoding="utf-8",
+        dtype=execution_column_dtypes,
     )
 
 
@@ -1132,6 +1149,11 @@ def __repr__(self):
             f"dtypes={dtypes_str}>"
         )
 
+    def get_column_dtype(self, column_id):
+        if column_id not in self.data_source.dtypes:
+            raise KeyError(f"Column '{column_id}' not found in the dtypes field")
+        return self.data_source.dtypes[column_id]
+
     def create_table_file_for_khiops(
         self, output_dir, sort=True, target_column=None, target_column_id=None
     ):
@@ -1214,6 +1236,9 @@ def __repr__(self):
             f"dtype={dtype_str}; target={self.target_column_id}>"
         )
 
+    def get_column_dtype(self, _):
+        return self.data_source.dtype
+
     def create_table_file_for_khiops(
         self, output_dir, sort=True, target_column=None, target_column_id=None
     ):
@@ -1300,6 +1325,9 @@ def __repr__(self):
             f"dtype={dtype_str}>"
         )
 
+    def get_column_dtype(self, _):
+        return self.data_source.dtype
+
     def create_khiops_dictionary(self):
         """Creates a Khiops dictionary representing this sparse table
 
diff --git a/khiops/sklearn/estimators.py b/khiops/sklearn/estimators.py
index 9e011ce0..c69baff4 100644
--- a/khiops/sklearn/estimators.py
+++ b/khiops/sklearn/estimators.py
@@ -232,7 +232,7 @@ class KhiopsEstimator(ABC, BaseEstimator):
         The name of the column to be used as key.
         **Deprecated** will be removed in Khiops 11.
     internal_sort : bool, optional
-        *Advanced.*: See concrete estimator classes for information about this
+        *Advanced*: See concrete estimator classes for information about this
         parameter.
         **Deprecated** will be removed in Khiops 11. Use the ``auto_sort``
         estimator parameter instead.
@@ -470,7 +470,7 @@ def _transform(
         self,
         ds,
         computation_dir,
-        _transform_create_deployment_model_fun,
+        _transform_prepare_deployment_fun,
         drop_key,
         transformed_file_name,
     ):
@@ -482,11 +482,13 @@ def _transform(
         self._transform_check_dataset(ds)
 
         # Create a deployment dataset
-        # Note: The input dataset is not necessarily ready to be deployed
+        # Note: The input dataset isn't ready for deployment in the case of coclustering
         deployment_ds = self._transform_create_deployment_dataset(ds, computation_dir)
 
-        # Create a deployment dictionary
-        deployment_dictionary_domain = _transform_create_deployment_model_fun(ds)
+        # Create a deployment dictionary and the internal table column dtypes
+        deployment_dictionary_domain, internal_table_column_dtypes = (
+            _transform_prepare_deployment_fun(ds)
+        )
 
         # Deploy the model
         output_table_path = self._transform_deploy_model(
@@ -497,10 +499,36 @@ def _transform(
             transformed_file_name,
         )
 
-        # Post-process to return the correct output type
-        return self._transform_deployment_post_process(
-            deployment_ds, output_table_path, drop_key
-        )
+        # Post-process to return the correct output type and order
+        if deployment_ds.is_in_memory:
+            # Load the table as a dataframe
+            with io.BytesIO(fs.read(output_table_path)) as output_table_stream:
+                output_table_df = read_internal_data_table(
+                    output_table_stream, column_dtypes=internal_table_column_dtypes
+                )
+
+            # On multi-table:
+            # - Reorder the table to the original table order
+            #     - Because transformed data table file is sorted by key
+            # - Drop the key columns if specified
+            if deployment_ds.is_multitable:
+                key_df = deployment_ds.main_table.data_source[
+                    deployment_ds.main_table.key
+                ]
+                output_table_df_or_path = key_df.merge(
+                    output_table_df, on=deployment_ds.main_table.key
+                )
+                if drop_key:
+                    output_table_df_or_path.drop(
+                        deployment_ds.main_table.key, axis=1, inplace=True
+                    )
+            # On mono-table: Return the read dataframe as-is
+            else:
+                output_table_df_or_path = output_table_df
+        else:
+            output_table_df_or_path = output_table_path
+
+        return output_table_df_or_path
 
     def _transform_create_deployment_dataset(self, ds, _):
         """Creates if necessary a new dataset to execute the model deployment
@@ -609,44 +637,6 @@ def _transform_check_dataset(self, ds):
         if ds.table_type == FileTable and self.output_dir is None:
             raise ValueError("'output_dir' is not set but dataset is file-based")
 
-    def _transform_deployment_post_process(
-        self, deployment_ds, output_table_path, drop_key
-    ):
-        # Return a dataframe for dataframe based datasets
-        if deployment_ds.is_in_memory:
-            # Read the transformed table with the internal table settings
-            with io.BytesIO(fs.read(output_table_path)) as output_table_stream:
-                output_table_df = read_internal_data_table(output_table_stream)
-
-            # On multi-table:
-            # - Reorder the table to the original table order
-            #     - Because transformed data table file is sorted by key
-            # - Drop the key columns if specified
-            if deployment_ds.is_multitable:
-                key_df = deployment_ds.main_table.data_source[
-                    deployment_ds.main_table.key
-                ]
-                output_table_df_or_path = key_df.merge(
-                    output_table_df, on=deployment_ds.main_table.key
-                )
-                if drop_key:
-                    output_table_df_or_path.drop(
-                        deployment_ds.main_table.key, axis=1, inplace=True
-                    )
-            # On mono-table: Return the read dataframe as-is
-            else:
-                output_table_df_or_path = output_table_df
-        # Return a file path for file based datasets
-        else:
-            output_table_df_or_path = output_table_path
-
-        assert isinstance(
-            output_table_df_or_path, (str, pd.DataFrame)
-        ), type_error_message(
-            "output_table_df_or_path", output_table_df_or_path, str, pd.DataFrame
-        )
-        return output_table_df_or_path
-
     def _create_computation_dir(self, method_name):
         """Creates a temporary computation directory"""
         return kh.get_runner().create_temp_dir(
@@ -1266,7 +1256,7 @@ def predict(self, X):
             y_pred = super()._transform(
                 ds,
                 computation_dir,
-                self._transform_prepare_deployment_model_for_predict,
+                self._transform_prepare_deployment_for_predict,
                 False,
                 "predict.txt",
             )
@@ -1372,16 +1362,11 @@ def _transform_create_deployment_dataset(self, ds, computation_dir):
 
         return Dataset(deploy_dataset_spec)
 
-    def _transform_prepare_deployment_model_for_predict(self, _):
-        return self.model_.copy()
-
-    def _transform_deployment_post_process(
-        self, deployment_ds, output_table_path, drop_key
-    ):
-        assert deployment_ds.is_multitable
-        return super()._transform_deployment_post_process(
-            deployment_ds, output_table_path, drop_key
-        )
+    def _transform_prepare_deployment_for_predict(self, _):
+        # TODO: Replace the second return value (the output columns' dtypes) with a
+        #       proper value instead of `None`. In the current state, it will use pandas
+        #       type auto-detection to load the internal table into memory.
+        return self.model_.copy(), None
 
     def fit_predict(self, X, y=None, **kwargs):
         """Performs clustering on X and returns result (instead of labels)"""
@@ -1418,6 +1403,7 @@ def __init__(
         self.specific_pairs = specific_pairs
         self.all_possible_pairs = all_possible_pairs
         self.construction_rules = construction_rules
+        self._original_target_dtype = None
         self._predicted_target_meta_data_tag = None
 
         # Deprecation message for 'key' constructor parameter
@@ -1625,6 +1611,22 @@ def _fit_training_post_process(self, ds):
         # Call parent method
         super()._fit_training_post_process(ds)
 
+        # Save the target and key column dtype's
+        if ds.is_in_memory:
+            if self._original_target_dtype is None:
+                self._original_target_dtype = ds.target_column.dtype
+            if ds.main_table.key is not None:
+                self._original_key_dtypes = {}
+                for column_id in ds.main_table.key:
+                    self._original_key_dtypes[column_id] = (
+                        ds.main_table.get_column_dtype(column_id)
+                    )
+            else:
+                self._original_key_dtypes = None
+        else:
+            self._original_target_dtype = None
+            self._original_key_dtypes = None
+
         # Set the target variable name
         self.model_target_variable_name_ = get_khiops_variable_name(ds.target_column_id)
 
@@ -1800,6 +1802,7 @@ def __init__(
         )
         # Data to be specified by inherited classes
         self._predicted_target_meta_data_tag = None
+        self._predicted_target_name_prefix = None
         self.n_evaluated_features = n_evaluated_features
         self.n_selected_features = n_selected_features
 
@@ -1827,7 +1830,7 @@ def predict(self, X):
             y_pred = super()._transform(
                 ds,
                 computation_dir,
-                self._transform_prepare_deployment_model_for_predict,
+                self._transform_prepare_deployment_for_predict,
                 True,
                 "predict.txt",
             )
@@ -1855,7 +1858,7 @@ def _fit_prepare_training_function_inputs(self, ds, computation_dir):
 
         return args, kwargs
 
-    def _transform_prepare_deployment_model_for_predict(self, ds):
+    def _transform_prepare_deployment_for_predict(self, ds):
         assert (
             self._predicted_target_meta_data_tag is not None
         ), "Predicted target metadata tag is not set"
@@ -1880,7 +1883,20 @@ def _transform_prepare_deployment_model_for_predict(self, ds):
         if self.model_target_variable_name_ not in list(ds.main_table.column_ids):
             model_dictionary.remove_variable(self.model_target_variable_name_)
 
-        return model_copy
+        # Create the output column dtype dict
+        if ds.is_in_memory:
+            predicted_target_column_name = (
+                self._predicted_target_name_prefix + self.model_target_variable_name_
+            )
+            output_columns_dtype = {
+                predicted_target_column_name: self._original_target_dtype
+            }
+            if self.is_multitable_model_:
+                output_columns_dtype.update(self._original_key_dtypes)
+        else:
+            output_columns_dtype = None
+
+        return model_copy, output_columns_dtype
 
     def get_feature_used_statistics(self, modeling_report):
         # Extract, from the modeling report, names, levels, weights and importances
@@ -1895,7 +1911,7 @@ def get_feature_used_statistics(self, modeling_report):
                     for var in modeling_report.selected_variables
                 ]
             )
-        # Return empty arrays if not selected_variables is available
+        # Return empty arrays if no selected variables are available
         else:
             feature_used_names_ = np.array([], dtype=np.dtype("<U1"))
             feature_used_importances_ = np.array([])
@@ -2081,6 +2097,7 @@ def __init__(
         self.group_target_value = group_target_value
         self._khiops_model_prefix = "SNB_"
         self._predicted_target_meta_data_tag = "Prediction"
+        self._predicted_target_name_prefix = "Predicted"
 
     def __sklearn_tags__(self):
         # If we don't implement this trivial method it's not found by the sklearn. This
@@ -2189,12 +2206,6 @@ def _fit_training_post_process(self, ds):
         # Call the parent's method
         super()._fit_training_post_process(ds)
 
-        # Save the target datatype
-        if ds.is_in_memory:
-            self._original_target_dtype = ds.target_column.dtype
-        else:
-            self._original_target_dtype = None
-
         # Save class values in the order of deployment
         self.classes_ = []
         for variable in self._get_main_dictionary().variables:
@@ -2266,37 +2277,15 @@ def predict(self, X):
         # Call the parent's method
         y_pred = super().predict(X)
 
-        # Adjust the data type according to the original target type
-        # Note: String is coerced explicitly because astype does not work as expected
+        # Convert to numpy if it is in memory
         if isinstance(y_pred, pd.DataFrame):
-            # Transform to numpy.ndarray
-            y_pred = y_pred.to_numpy(copy=False).ravel()
-
-            # If integer and string just transform
-            if pd.api.types.is_integer_dtype(self._original_target_dtype):
-                y_pred = y_pred.astype(self._original_target_dtype)
-            # If str transform to str
-            # Note: If the original type is None then it was learned with a file dataset
-            elif self._original_target_dtype is None or pd.api.types.is_string_dtype(
-                self._original_target_dtype
-            ):
-                y_pred = y_pred.astype(str, copy=False)
-            # If category first coerce the type to the categories' type
-            else:
-                assert isinstance(self._original_target_dtype, pd.CategoricalDtype), (
-                    "_original_target_dtype is not categorical"
-                    f", it is '{self._original_target_dtype}'"
-                )
-                if pd.api.types.is_integer_dtype(
-                    self._original_target_dtype.categories.dtype
-                ):
-                    y_pred = y_pred.astype(
-                        self._original_target_dtype.categories.dtype, copy=False
-                    )
-                else:
-                    y_pred = y_pred.astype(str, copy=False)
+            y_pred = y_pred[
+                self._predicted_target_name_prefix + self.model_target_variable_name_
+            ].to_numpy(copy=False)
 
-        assert isinstance(y_pred, (str, np.ndarray)), "Expected str or np.array"
+        assert isinstance(y_pred, (np.ndarray, str)), type_error_message(
+            "y_pred", y_pred, np.ndarray, str
+        )
         return y_pred
 
     def predict_proba(self, X):
@@ -2342,7 +2331,7 @@ def predict_proba(self, X):
             y_probas = self._transform(
                 ds,
                 computation_dir,
-                self._transform_prepare_deployment_model_for_predict_proba,
+                self._transform_prepare_deployment_for_predict_proba,
                 True,
                 "predict_proba.txt",
             )
@@ -2365,7 +2354,7 @@ def predict_proba(self, X):
         assert isinstance(y_probas, (str, np.ndarray)), "Expected str or np.ndarray"
         return y_probas
 
-    def _transform_prepare_deployment_model_for_predict_proba(self, ds):
+    def _transform_prepare_deployment_for_predict_proba(self, ds):
         assert hasattr(
             self, "model_target_variable_name_"
         ), "Target variable name has not been set"
@@ -2388,7 +2377,17 @@ def _transform_prepare_deployment_model_for_predict_proba(self, ds):
         if self.model_target_variable_name_ not in list(ds.main_table.column_ids):
             model_dictionary.remove_variable(self.model_target_variable_name_)
 
-        return model_copy
+        if ds.is_in_memory:
+            output_columns_dtype = {}
+            if self.is_multitable_model_:
+                output_columns_dtype.update(self._original_key_dtypes)
+            for variable in model_dictionary.variables:
+                if variable.used and variable.name not in model_dictionary.key:
+                    output_columns_dtype[variable.name] = np.float64
+        else:
+            output_columns_dtype = None
+
+        return model_copy, output_columns_dtype
 
 
 # Note: scikit-learn **requires** inherit first the mixins and then other classes
@@ -2540,6 +2539,8 @@ def __init__(
         )
         self._khiops_model_prefix = "SNB_"
         self._predicted_target_meta_data_tag = "Mean"
+        self._predicted_target_name_prefix = "M"
+        self._original_target_dtype = np.float64
 
     def fit(self, X, y=None, **kwargs):
         """Fits a Selective Naive Bayes regressor according to X, y
@@ -3098,7 +3099,7 @@ def transform(self, X):
             X_transformed = super()._transform(
                 ds,
                 computation_dir,
-                self._transform_prepare_deployment_model,
+                self._transform_prepare_deployment_for_transform,
                 True,
                 "transform.txt",
             )
@@ -3110,7 +3111,7 @@ def transform(self, X):
             return X_transformed.to_numpy(copy=False)
         return X_transformed
 
-    def _transform_prepare_deployment_model(self, ds):
+    def _transform_prepare_deployment_for_transform(self, ds):
         assert hasattr(
             self, "model_target_variable_name_"
         ), "Target variable name has not been set"
@@ -3123,7 +3124,10 @@ def _transform_prepare_deployment_model(self, ds):
         if self.model_target_variable_name_ not in list(ds.main_table.column_ids):
             model_dictionary.remove_variable(self.model_target_variable_name_)
 
-        return model_copy
+        # TODO: Replace the second return value (the output columns' dtypes) with a
+        #       proper value instead of `None`. In the current state, it will use pandas
+        #       type auto-detection to load the internal table into memory.
+        return model_copy, None
 
     def fit_transform(self, X, y=None, **kwargs):
         """Fit and transforms its inputs
diff --git a/tests/test_sklearn_output_types.py b/tests/test_sklearn_output_types.py
index 868317dd..7f0c7312 100644
--- a/tests/test_sklearn_output_types.py
+++ b/tests/test_sklearn_output_types.py
@@ -7,6 +7,7 @@
 """Tests for checking the output types of predictors"""
 import unittest
 
+import numpy as np
 import pandas as pd
 from numpy.testing import assert_array_equal
 from sklearn import datasets
@@ -50,21 +51,48 @@ class KhiopsSklearnOutputTypes(unittest.TestCase):
     def setUp(self):
         KhiopsTestHelper.skip_long_test(self)
 
+    def _replace(self, array, replacement_dict):
+        return np.array([replacement_dict[value] for value in array])
+
     def test_classifier_output_types(self):
         """Test the KhiopsClassifier output types and classes of predict* methods"""
+        # Create the references for the combinations of mono/multi-table and
+        # binary/multiclass
         X, y = create_iris()
-        X_mt, X_sec_mt, _ = create_iris_mt()
-
+        raw_X_main_mt, raw_X_sec_mt, _ = create_iris_mt()
+        X_mt = {
+            "main_table": "iris_main",
+            "tables": {
+                "iris_main": (raw_X_main_mt, "Id"),
+                "iris_sec": (raw_X_sec_mt, "Id"),
+            },
+            "relations": [("iris_main", "iris_sec")],
+        }
+        khc = KhiopsClassifier(n_trees=0)
+        khc.fit(X, y)
+        y_pred = khc.predict(X)
+        y_bin = y.replace({0: 0, 1: 0, 2: 1})
+        khc.fit(X, y_bin)
+        y_bin_pred = khc.predict(X)
+        khc.fit(X_mt, y)
+        khc.export_report_file("report.khj")
+        y_mt_pred = khc.predict(X_mt)
+        khc.fit(X_mt, y_bin)
+        y_mt_bin_pred = khc.predict(X_mt)
+
+        # Create the fixtures
         fixtures = {
             "ys": {
                 "int": y,
-                "int binary": y.replace({0: 0, 1: 0, 2: 1}),
-                "string": y.replace({0: "se", 1: "vi", 2: "ve"}),
-                "string binary": y.replace({0: "vi_or_se", 1: "vi_or_se", 2: "ve"}),
-                "int as string": y.replace({0: "8", 1: "9", 2: "10"}),
-                "int as string binary": y.replace({0: "89", 1: "89", 2: "10"}),
-                "cat int": y.astype("category"),
-                "cat string": y.replace({0: "se", 1: "vi", 2: "ve"}).astype("category"),
+                "int binary": y_bin,
+                "string": self._replace(y, {0: "se", 1: "vi", 2: "ve"}),
+                "string binary": self._replace(y_bin, {0: "vi_or_se", 1: "ve"}),
+                "int as string": self._replace(y, {0: "8", 1: "9", 2: "10"}),
+                "int as string binary": self._replace(y_bin, {0: "89", 1: "10"}),
+                "cat int": pd.Series(y).astype("category"),
+                "cat string": pd.Series(
+                    self._replace(y, {0: "se", 1: "vi", 2: "ve"})
+                ).astype("category"),
             },
             "y_type_check": {
                 "int": pd.api.types.is_integer_dtype,
@@ -86,16 +114,42 @@ def test_classifier_output_types(self):
                 "cat int": column_or_1d([0, 1, 2]),
                 "cat string": column_or_1d(["se", "ve", "vi"]),
             },
-            "Xs": {
-                "mono": X,
+            "expected_y_preds": {
+                "mono": {
+                    "int": y_pred,
+                    "int binary": y_bin_pred,
+                    "string": self._replace(y_pred, {0: "se", 1: "vi", 2: "ve"}),
+                    "string binary": self._replace(
+                        y_bin_pred, {0: "vi_or_se", 1: "ve"}
+                    ),
+                    "int as string": self._replace(y_pred, {0: "8", 1: "9", 2: "10"}),
+                    "int as string binary": self._replace(
+                        y_bin_pred, {0: "89", 1: "10"}
+                    ),
+                    "cat int": y_pred,
+                    "cat string": self._replace(y_pred, {0: "se", 1: "vi", 2: "ve"}),
+                },
                 "multi": {
-                    "main_table": "iris_main",
-                    "tables": {
-                        "iris_main": (X_mt, "Id"),
-                        "iris_sec": (X_sec_mt, "Id"),
-                    },
+                    "int": y_mt_pred,
+                    "int binary": y_mt_bin_pred,
+                    "string": self._replace(y_mt_pred, {0: "se", 1: "vi", 2: "ve"}),
+                    "string binary": self._replace(
+                        y_mt_bin_pred, {0: "vi_or_se", 1: "ve"}
+                    ),
+                    "int as string": self._replace(
+                        y_mt_pred, {0: "8", 1: "9", 2: "10"}
+                    ),
+                    "int as string binary": self._replace(
+                        y_mt_bin_pred, {0: "89", 1: "10"}
+                    ),
+                    "cat int": y_mt_pred,
+                    "cat string": self._replace(y_mt_pred, {0: "se", 1: "vi", 2: "ve"}),
                 },
             },
+            "Xs": {
+                "mono": X,
+                "multi": X_mt,
+            },
         }
 
         # Test for each fixture configuration
@@ -119,9 +173,14 @@ def test_classifier_output_types(self):
                     y_pred = khc.predict(X)
                     self.assertTrue(
                         y_type_check(y_pred),
-                        f"Invalid predict return type {y_pred.dtype}.",
+                        f"'{y_type_check.__name__}' was False for "
+                        f"dtype '{y_pred.dtype}'.",
                     )
 
+                    # Check the predictions match
+                    expected_y_pred = fixtures["expected_y_preds"][dataset_type][y_type]
+                    assert_array_equal(y_pred, expected_y_pred)
+
                     # Check the dimensions of predict_proba
                     y_probas = khc.predict_proba(X)
                     self.assertEqual(len(y_probas.shape), 2)