Skip to content

Commit 63b4862

Browse files
committed
Add basic text features samples
1 parent 2afaa4c commit 63b4862

File tree

3 files changed

+238
-0
lines changed

3 files changed

+238
-0
lines changed

doc/samples/samples.rst

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,35 @@ Samples
212212
213213
# If you have Khiops Visualization installed you may open the report as follows
214214
# kh.visualize_report(report_file_path)
215+
.. autofunction:: train_predictor_text
216+
.. code-block:: python
217+
218+
# Imports
219+
import os
220+
from khiops import core as kh
221+
222+
# Set the file paths
223+
dictionary_file_path = os.path.join(
224+
kh.get_samples_dir(), "NegativeAirlineTweets", "NegativeAirlineTweets.kdic"
225+
)
226+
data_table_path = os.path.join(
227+
kh.get_samples_dir(), "NegativeAirlineTweets", "NegativeAirlineTweets.txt"
228+
)
229+
report_file_path = os.path.join(
230+
"kh_samples", "train_predictor_text", "AnalysisResults.khj"
231+
)
232+
233+
# Train the predictor
234+
kh.train_predictor(
235+
dictionary_file_path,
236+
"FlightNegativeTweets",
237+
data_table_path,
238+
"negativereason",
239+
report_file_path,
240+
max_trees=5,
241+
max_text_features=1000,
242+
text_features="words",
243+
)
215244
.. autofunction:: train_predictor_error_handling
216245
.. code-block:: python
217246
@@ -948,6 +977,44 @@ Samples
948977
kh.deploy_model(
949978
model_dictionary_file_path, "SNB_Adult", data_table_path, output_data_table_path
950979
)
980+
.. autofunction:: deploy_model_text
981+
.. code-block:: python
982+
983+
# Imports
984+
import os
985+
from khiops import core as kh
986+
987+
# Set the file paths
988+
dictionary_file_path = os.path.join(
989+
kh.get_samples_dir(), "NegativeAirlineTweets", "NegativeAirlineTweets.kdic"
990+
)
991+
data_table_path = os.path.join(
992+
kh.get_samples_dir(), "NegativeAirlineTweets", "NegativeAirlineTweets.txt"
993+
)
994+
output_dir = os.path.join("kh_samples", "deploy_model_text")
995+
report_file_path = os.path.join(output_dir, "AnalysisResults.khj")
996+
output_data_table_path = os.path.join(output_dir, "ScoresNegativeAirlineTweets.txt")
997+
998+
# Train the predictor
999+
_, model_dictionary_file_path = kh.train_predictor(
1000+
dictionary_file_path,
1001+
"FlightNegativeTweets",
1002+
data_table_path,
1003+
"negativereason",
1004+
report_file_path,
1005+
max_trees=5,
1006+
max_text_features=1000,
1007+
text_features="words",
1008+
)
1009+
1010+
# Deploy the model on the database
1011+
# It will score it according to the trained predictor
1012+
kh.deploy_model(
1013+
model_dictionary_file_path,
1014+
"SNB_FlightNegativeTweets",
1015+
data_table_path,
1016+
output_data_table_path,
1017+
)
9511018
.. autofunction:: deploy_model_mt
9521019
.. code-block:: python
9531020

khiops/samples/samples.ipynb

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,48 @@
278278
"# kh.visualize_report(report_file_path)"
279279
]
280280
},
281+
{
282+
"cell_type": "markdown",
283+
"metadata": {},
284+
"source": [
285+
"### `train_predictor_text()`\n\n",
286+
"Trains a predictor with just text-specific parameters\n"
287+
]
288+
},
289+
{
290+
"cell_type": "code",
291+
"execution_count": null,
292+
"metadata": {},
293+
"outputs": [],
294+
"source": [
295+
"# Imports\n",
296+
"import os\n",
297+
"from khiops import core as kh\n",
298+
"\n",
299+
"# Set the file paths\n",
300+
"dictionary_file_path = os.path.join(\n",
301+
" kh.get_samples_dir(), \"NegativeAirlineTweets\", \"NegativeAirlineTweets.kdic\"\n",
302+
")\n",
303+
"data_table_path = os.path.join(\n",
304+
" kh.get_samples_dir(), \"NegativeAirlineTweets\", \"NegativeAirlineTweets.txt\"\n",
305+
")\n",
306+
"report_file_path = os.path.join(\n",
307+
" \"kh_samples\", \"train_predictor_text\", \"AnalysisResults.khj\"\n",
308+
")\n",
309+
"\n",
310+
"# Train the predictor\n",
311+
"kh.train_predictor(\n",
312+
" dictionary_file_path,\n",
313+
" \"FlightNegativeTweets\",\n",
314+
" data_table_path,\n",
315+
" \"negativereason\",\n",
316+
" report_file_path,\n",
317+
" max_trees=5,\n",
318+
" max_text_features=1000,\n",
319+
" text_features=\"words\",\n",
320+
")"
321+
]
322+
},
281323
{
282324
"cell_type": "markdown",
283325
"metadata": {},
@@ -1248,6 +1290,57 @@
12481290
")"
12491291
]
12501292
},
1293+
{
1294+
"cell_type": "markdown",
1295+
"metadata": {},
1296+
"source": [
1297+
"### `deploy_model_text()`\n\n",
1298+
"Deploys a model learned on textual data\n It is a call to `~.api.deploy_model` with its mandatory parameters, plus\n text-specific parameters.\n\n In this example, a Selective Naive Bayes (SNB) model is deployed by applying its\n associated dictionary to the input database. The model predictions are written to\n the output database.\n \n"
1299+
]
1300+
},
1301+
{
1302+
"cell_type": "code",
1303+
"execution_count": null,
1304+
"metadata": {},
1305+
"outputs": [],
1306+
"source": [
1307+
"# Imports\n",
1308+
"import os\n",
1309+
"from khiops import core as kh\n",
1310+
"\n",
1311+
"# Set the file paths\n",
1312+
"dictionary_file_path = os.path.join(\n",
1313+
" kh.get_samples_dir(), \"NegativeAirlineTweets\", \"NegativeAirlineTweets.kdic\"\n",
1314+
")\n",
1315+
"data_table_path = os.path.join(\n",
1316+
" kh.get_samples_dir(), \"NegativeAirlineTweets\", \"NegativeAirlineTweets.txt\"\n",
1317+
")\n",
1318+
"output_dir = os.path.join(\"kh_samples\", \"deploy_model_text\")\n",
1319+
"report_file_path = os.path.join(output_dir, \"AnalysisResults.khj\")\n",
1320+
"output_data_table_path = os.path.join(output_dir, \"ScoresNegativeAirlineTweets.txt\")\n",
1321+
"\n",
1322+
"# Train the predictor\n",
1323+
"_, model_dictionary_file_path = kh.train_predictor(\n",
1324+
" dictionary_file_path,\n",
1325+
" \"FlightNegativeTweets\",\n",
1326+
" data_table_path,\n",
1327+
" \"negativereason\",\n",
1328+
" report_file_path,\n",
1329+
" max_trees=5,\n",
1330+
" max_text_features=1000,\n",
1331+
" text_features=\"words\",\n",
1332+
")\n",
1333+
"\n",
1334+
"# Deploy the model on the database\n",
1335+
"# It will score it according to the trained predictor\n",
1336+
"kh.deploy_model(\n",
1337+
" model_dictionary_file_path,\n",
1338+
" \"SNB_FlightNegativeTweets\",\n",
1339+
" data_table_path,\n",
1340+
" output_data_table_path,\n",
1341+
")"
1342+
]
1343+
},
12511344
{
12521345
"cell_type": "markdown",
12531346
"metadata": {},

khiops/samples/samples.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,36 @@ def train_predictor_file_paths():
233233
# kh.visualize_report(report_file_path)
234234

235235

236+
def train_predictor_text():
237+
"""Trains a predictor with just text-specific parameters"""
238+
# Imports
239+
import os
240+
from khiops import core as kh
241+
242+
# Set the file paths
243+
dictionary_file_path = os.path.join(
244+
kh.get_samples_dir(), "NegativeAirlineTweets", "NegativeAirlineTweets.kdic"
245+
)
246+
data_table_path = os.path.join(
247+
kh.get_samples_dir(), "NegativeAirlineTweets", "NegativeAirlineTweets.txt"
248+
)
249+
report_file_path = os.path.join(
250+
"kh_samples", "train_predictor_text", "AnalysisResults.khj"
251+
)
252+
253+
# Train the predictor
254+
kh.train_predictor(
255+
dictionary_file_path,
256+
"FlightNegativeTweets",
257+
data_table_path,
258+
"negativereason",
259+
report_file_path,
260+
max_trees=5,
261+
max_text_features=1000,
262+
text_features="words",
263+
)
264+
265+
236266
def train_predictor_error_handling():
237267
"""Shows how to handle errors when training a predictor
238268
@@ -1059,6 +1089,52 @@ def deploy_model():
10591089
)
10601090

10611091

1092+
def deploy_model_text():
1093+
"""Deploys a model learned on textual data
1094+
It is a call to `~.api.deploy_model` with its mandatory parameters, plus
1095+
text-specific parameters.
1096+
1097+
In this example, a Selective Naive Bayes (SNB) model is deployed by applying its
1098+
associated dictionary to the input database. The model predictions are written to
1099+
the output database.
1100+
"""
1101+
# Imports
1102+
import os
1103+
from khiops import core as kh
1104+
1105+
# Set the file paths
1106+
dictionary_file_path = os.path.join(
1107+
kh.get_samples_dir(), "NegativeAirlineTweets", "NegativeAirlineTweets.kdic"
1108+
)
1109+
data_table_path = os.path.join(
1110+
kh.get_samples_dir(), "NegativeAirlineTweets", "NegativeAirlineTweets.txt"
1111+
)
1112+
output_dir = os.path.join("kh_samples", "deploy_model_text")
1113+
report_file_path = os.path.join(output_dir, "AnalysisResults.khj")
1114+
output_data_table_path = os.path.join(output_dir, "ScoresNegativeAirlineTweets.txt")
1115+
1116+
# Train the predictor
1117+
_, model_dictionary_file_path = kh.train_predictor(
1118+
dictionary_file_path,
1119+
"FlightNegativeTweets",
1120+
data_table_path,
1121+
"negativereason",
1122+
report_file_path,
1123+
max_trees=5,
1124+
max_text_features=1000,
1125+
text_features="words",
1126+
)
1127+
1128+
# Deploy the model on the database
1129+
# It will score it according to the trained predictor
1130+
kh.deploy_model(
1131+
model_dictionary_file_path,
1132+
"SNB_FlightNegativeTweets",
1133+
data_table_path,
1134+
output_data_table_path,
1135+
)
1136+
1137+
10621138
def deploy_model_mt():
10631139
"""Deploys a multi-table classifier in the simplest way possible
10641140
@@ -1811,6 +1887,7 @@ def build_deployed_dictionary():
18111887
export_dictionary_files,
18121888
train_predictor,
18131889
train_predictor_file_paths,
1890+
train_predictor_text,
18141891
train_predictor_error_handling,
18151892
train_predictor_mt,
18161893
train_predictor_mt_with_specific_rules,
@@ -1829,6 +1906,7 @@ def build_deployed_dictionary():
18291906
train_recoder_with_multiple_parameters,
18301907
train_recoder_mt_flatten,
18311908
deploy_model,
1909+
deploy_model_text,
18321910
deploy_model_mt,
18331911
deploy_model_mt_with_interpretation,
18341912
deploy_model_mt_snowflake,

0 commit comments

Comments
 (0)