Skip to content

Commit 7e7f044

Browse files
committed
add conditional code for compatibility sklearn < 1.2
1 parent 173d5fe commit 7e7f044

File tree

5 files changed

+21
-268
lines changed

5 files changed

+21
-268
lines changed

doc/samples/samples_sklearn.rst

Lines changed: 0 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -98,82 +98,6 @@ Samples
9898
# If you have Khiops Visualization installed you may open the report as follows
9999
# khc.export_report_file("report.khj")
100100
# kh.visualize_report("report.khj")
101-
.. autofunction:: khiops_classifier_boolean_target
102-
.. code-block:: python
103-
104-
# Imports
105-
import os
106-
import pandas as pd
107-
from khiops import core as kh
108-
from khiops.sklearn import KhiopsClassifier
109-
from sklearn.model_selection import train_test_split
110-
111-
# Load the dataset into a pandas dataframe
112-
adult_path = os.path.join(kh.get_samples_dir(), "Adult", "Adult.txt")
113-
adult_df = pd.read_csv(adult_path, sep="\t")
114-
115-
# Split the whole dataframe into train and test (70%-30%)
116-
adult_train_df, adult_test_df = train_test_split(
117-
adult_df, test_size=0.3, random_state=1
118-
)
119-
120-
# Split the dataset into:
121-
# - the X feature table
122-
# - the y target vector ("class" column)
123-
X_train = adult_train_df.drop("class", axis=1)
124-
X_test = adult_test_df.drop("class", axis=1)
125-
y_train = adult_train_df["class"]
126-
y_train.replace({"less": False, "more": True}, inplace=True)
127-
128-
# Create the classifier object
129-
khc = KhiopsClassifier()
130-
131-
# Train the classifier
132-
khc.fit(X_train, y_train)
133-
134-
# Predict the classes on the test dataset
135-
y_test_pred = khc.predict(X_test)
136-
print("Predicted classes (first 10):")
137-
print(y_test_pred[0:10])
138-
print("---")
139-
.. autofunction:: khiops_classifier_float_target
140-
.. code-block:: python
141-
142-
# Imports
143-
import os
144-
import pandas as pd
145-
from khiops import core as kh
146-
from khiops.sklearn import KhiopsClassifier
147-
from sklearn.model_selection import train_test_split
148-
149-
# Load the dataset into a pandas dataframe
150-
adult_path = os.path.join(kh.get_samples_dir(), "Adult", "Adult.txt")
151-
adult_df = pd.read_csv(adult_path, sep="\t")
152-
153-
# Split the whole dataframe into train and test (70%-30%)
154-
adult_train_df, adult_test_df = train_test_split(
155-
adult_df, test_size=0.3, random_state=1
156-
)
157-
158-
# Split the dataset into:
159-
# - the X feature table
160-
# - the y target vector ("class" column)
161-
X_train = adult_train_df.drop("class", axis=1)
162-
X_test = adult_test_df.drop("class", axis=1)
163-
y_train = adult_train_df["class"]
164-
y_train.replace({"less": 0.0, "more": 1.0}, inplace=True)
165-
166-
# Create the classifier object
167-
khc = KhiopsClassifier()
168-
169-
# Train the classifier
170-
khc.fit(X_train, y_train)
171-
172-
# Predict the classes on the test dataset
173-
y_test_pred = khc.predict(X_test)
174-
print("Predicted classes (first 10):")
175-
print(y_test_pred[0:10])
176-
print("---")
177101
.. autofunction:: khiops_classifier_multiclass
178102
.. code-block:: python
179103

khiops/samples/samples_sklearn.ipynb

Lines changed: 0 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -86,108 +86,6 @@
8686
"# kh.visualize_report(\"report.khj\")"
8787
]
8888
},
89-
{
90-
"cell_type": "markdown",
91-
"metadata": {},
92-
"source": [
93-
"### `khiops_classifier_boolean_target()`\n\n",
94-
"Trains a `.KhiopsClassifier` on a monotable dataframe\n where the target is boolean\n"
95-
]
96-
},
97-
{
98-
"cell_type": "code",
99-
"execution_count": null,
100-
"metadata": {},
101-
"outputs": [],
102-
"source": [
103-
"# Imports\n",
104-
"import os\n",
105-
"import pandas as pd\n",
106-
"from khiops import core as kh\n",
107-
"from khiops.sklearn import KhiopsClassifier\n",
108-
"from sklearn.model_selection import train_test_split\n",
109-
"\n",
110-
"# Load the dataset into a pandas dataframe\n",
111-
"adult_path = os.path.join(kh.get_samples_dir(), \"Adult\", \"Adult.txt\")\n",
112-
"adult_df = pd.read_csv(adult_path, sep=\"\\t\")\n",
113-
"\n",
114-
"# Split the whole dataframe into train and test (70%-30%)\n",
115-
"adult_train_df, adult_test_df = train_test_split(\n",
116-
" adult_df, test_size=0.3, random_state=1\n",
117-
")\n",
118-
"\n",
119-
"# Split the dataset into:\n",
120-
"# - the X feature table\n",
121-
"# - the y target vector (\"class\" column)\n",
122-
"X_train = adult_train_df.drop(\"class\", axis=1)\n",
123-
"X_test = adult_test_df.drop(\"class\", axis=1)\n",
124-
"y_train = adult_train_df[\"class\"]\n",
125-
"y_train.replace({\"less\": False, \"more\": True}, inplace=True)\n",
126-
"\n",
127-
"# Create the classifier object\n",
128-
"khc = KhiopsClassifier()\n",
129-
"\n",
130-
"# Train the classifier\n",
131-
"khc.fit(X_train, y_train)\n",
132-
"\n",
133-
"# Predict the classes on the test dataset\n",
134-
"y_test_pred = khc.predict(X_test)\n",
135-
"print(\"Predicted classes (first 10):\")\n",
136-
"print(y_test_pred[0:10])\n",
137-
"print(\"---\")"
138-
]
139-
},
140-
{
141-
"cell_type": "markdown",
142-
"metadata": {},
143-
"source": [
144-
"### `khiops_classifier_float_target()`\n\n",
145-
"Trains a `.KhiopsClassifier` on a monotable dataframe\n where the target is float\n"
146-
]
147-
},
148-
{
149-
"cell_type": "code",
150-
"execution_count": null,
151-
"metadata": {},
152-
"outputs": [],
153-
"source": [
154-
"# Imports\n",
155-
"import os\n",
156-
"import pandas as pd\n",
157-
"from khiops import core as kh\n",
158-
"from khiops.sklearn import KhiopsClassifier\n",
159-
"from sklearn.model_selection import train_test_split\n",
160-
"\n",
161-
"# Load the dataset into a pandas dataframe\n",
162-
"adult_path = os.path.join(kh.get_samples_dir(), \"Adult\", \"Adult.txt\")\n",
163-
"adult_df = pd.read_csv(adult_path, sep=\"\\t\")\n",
164-
"\n",
165-
"# Split the whole dataframe into train and test (70%-30%)\n",
166-
"adult_train_df, adult_test_df = train_test_split(\n",
167-
" adult_df, test_size=0.3, random_state=1\n",
168-
")\n",
169-
"\n",
170-
"# Split the dataset into:\n",
171-
"# - the X feature table\n",
172-
"# - the y target vector (\"class\" column)\n",
173-
"X_train = adult_train_df.drop(\"class\", axis=1)\n",
174-
"X_test = adult_test_df.drop(\"class\", axis=1)\n",
175-
"y_train = adult_train_df[\"class\"]\n",
176-
"y_train.replace({\"less\": 0.0, \"more\": 1.0}, inplace=True)\n",
177-
"\n",
178-
"# Create the classifier object\n",
179-
"khc = KhiopsClassifier()\n",
180-
"\n",
181-
"# Train the classifier\n",
182-
"khc.fit(X_train, y_train)\n",
183-
"\n",
184-
"# Predict the classes on the test dataset\n",
185-
"y_test_pred = khc.predict(X_test)\n",
186-
"print(\"Predicted classes (first 10):\")\n",
187-
"print(y_test_pred[0:10])\n",
188-
"print(\"---\")"
189-
]
190-
},
19189
{
19290
"cell_type": "markdown",
19391
"metadata": {},

khiops/samples/samples_sklearn.py

Lines changed: 0 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -89,86 +89,6 @@ def khiops_classifier():
8989
# kh.visualize_report("report.khj")
9090

9191

92-
def khiops_classifier_boolean_target():
93-
"""Trains a `.KhiopsClassifier` on a monotable dataframe
94-
where the target is boolean"""
95-
# Imports
96-
import os
97-
import pandas as pd
98-
from khiops import core as kh
99-
from khiops.sklearn import KhiopsClassifier
100-
from sklearn.model_selection import train_test_split
101-
102-
# Load the dataset into a pandas dataframe
103-
adult_path = os.path.join(kh.get_samples_dir(), "Adult", "Adult.txt")
104-
adult_df = pd.read_csv(adult_path, sep="\t")
105-
106-
# Split the whole dataframe into train and test (70%-30%)
107-
adult_train_df, adult_test_df = train_test_split(
108-
adult_df, test_size=0.3, random_state=1
109-
)
110-
111-
# Split the dataset into:
112-
# - the X feature table
113-
# - the y target vector ("class" column)
114-
X_train = adult_train_df.drop("class", axis=1)
115-
X_test = adult_test_df.drop("class", axis=1)
116-
y_train = adult_train_df["class"]
117-
y_train.replace({"less": False, "more": True}, inplace=True)
118-
119-
# Create the classifier object
120-
khc = KhiopsClassifier()
121-
122-
# Train the classifier
123-
khc.fit(X_train, y_train)
124-
125-
# Predict the classes on the test dataset
126-
y_test_pred = khc.predict(X_test)
127-
print("Predicted classes (first 10):")
128-
print(y_test_pred[0:10])
129-
print("---")
130-
131-
132-
def khiops_classifier_float_target():
133-
"""Trains a `.KhiopsClassifier` on a monotable dataframe
134-
where the target is float"""
135-
# Imports
136-
import os
137-
import pandas as pd
138-
from khiops import core as kh
139-
from khiops.sklearn import KhiopsClassifier
140-
from sklearn.model_selection import train_test_split
141-
142-
# Load the dataset into a pandas dataframe
143-
adult_path = os.path.join(kh.get_samples_dir(), "Adult", "Adult.txt")
144-
adult_df = pd.read_csv(adult_path, sep="\t")
145-
146-
# Split the whole dataframe into train and test (70%-30%)
147-
adult_train_df, adult_test_df = train_test_split(
148-
adult_df, test_size=0.3, random_state=1
149-
)
150-
151-
# Split the dataset into:
152-
# - the X feature table
153-
# - the y target vector ("class" column)
154-
X_train = adult_train_df.drop("class", axis=1)
155-
X_test = adult_test_df.drop("class", axis=1)
156-
y_train = adult_train_df["class"]
157-
y_train.replace({"less": 0.0, "more": 1.0}, inplace=True)
158-
159-
# Create the classifier object
160-
khc = KhiopsClassifier()
161-
162-
# Train the classifier
163-
khc.fit(X_train, y_train)
164-
165-
# Predict the classes on the test dataset
166-
y_test_pred = khc.predict(X_test)
167-
print("Predicted classes (first 10):")
168-
print(y_test_pred[0:10])
169-
print("---")
170-
171-
17292
def khiops_classifier_multiclass():
17393
"""Trains a multiclass `.KhiopsClassifier` on a monotable dataframe"""
17494
# Imports
@@ -1105,8 +1025,6 @@ def khiops_classifier_multitable_star_file():
11051025

11061026
exported_samples = [
11071027
khiops_classifier,
1108-
khiops_classifier_boolean_target,
1109-
khiops_classifier_float_target,
11101028
khiops_classifier_multiclass,
11111029
khiops_classifier_multitable_star,
11121030
khiops_classifier_multitable_snowflake,

khiops/sklearn/dataset.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
import numpy as np
1616
import pandas as pd
17+
import sklearn
1718
from scipy import sparse as sp
1819
from sklearn.utils import check_array
1920
from sklearn.utils.validation import column_or_1d
@@ -430,6 +431,19 @@ def write_internal_data_table(dataframe, file_path_or_stream):
430431
)
431432

432433

434+
def _column_or_1d_with_dtype(y, dtype=None):
435+
# 'dtype' has been introduced on `column_or_1d' since Scikit-learn 1.2;
436+
if sklearn.__version__ < "1.2":
437+
if pd.api.types.is_string_dtype(dtype) and y.isin(["True", "False"]).all():
438+
warnings.warn(
439+
"'y' stores strings restricted to 'True'/'False' values: "
440+
"The predict method may return a bool vector."
441+
)
442+
return column_or_1d(y, warn=True)
443+
else:
444+
return column_or_1d(y, warn=True, dtype=dtype)
445+
446+
433447
class Dataset:
434448
"""A representation of a dataset
435449
@@ -740,20 +754,20 @@ def _init_target_column(self, y):
740754
else:
741755
if hasattr(y, "dtype"):
742756
if isinstance(y.dtype, pd.CategoricalDtype):
743-
y_checked = column_or_1d(
744-
y, warn=True, dtype=y.dtype.categories.dtype
757+
y_checked = _column_or_1d_with_dtype(
758+
y, dtype=y.dtype.categories.dtype
745759
)
746760
else:
747-
y_checked = column_or_1d(y, warn=True, dtype=y.dtype)
761+
y_checked = _column_or_1d_with_dtype(y, dtype=y.dtype)
748762
elif hasattr(y, "dtypes"):
749763
if isinstance(y.dtypes[0], pd.CategoricalDtype):
750-
y_checked = column_or_1d(
751-
y, warn=True, dtype=y.dtypes[0].categories.dtype
764+
y_checked = _column_or_1d_with_dtype(
765+
y, dtype=y.dtypes[0].categories.dtype
752766
)
753767
else:
754-
y_checked = column_or_1d(y, warn=True)
768+
y_checked = _column_or_1d_with_dtype(y)
755769
else:
756-
y_checked = column_or_1d(y, warn=True)
770+
y_checked = _column_or_1d_with_dtype(y)
757771
# Check the target type coherence with those of X's tables
758772
if isinstance(
759773
self.main_table, (PandasTable, SparseTable, NumpyTable)

tests/test_sklearn_output_types.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,6 @@ def test_classifier_output_types(self):
195195

196196
# Check the return type of predict
197197
y_pred = khc.predict(X)
198-
199198
self.assertTrue(
200199
y_type_check(y_pred),
201200
f"'{y_type_check.__name__}' was False for "

0 commit comments

Comments
 (0)