Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
---
name: Tests
env:
DEFAULT_SAMPLES_REVISION: 10.2.4
DEFAULT_SAMPLES_REVISION: 11.0.0
DEFAULT_KHIOPS_DESKTOP_REVISION: 10.6.0-b.0
on:
workflow_dispatch:
inputs:
samples-revision:
default: 10.2.4
default: 11.0.0
description: Git Tag/Branch/Commit for the khiops-samples Repo
image-tag:
default: 10.6.0-b.0.0
Expand Down
67 changes: 67 additions & 0 deletions doc/samples/samples.rst
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,35 @@ Samples

# If you have Khiops Visualization installed you may open the report as follows
# kh.visualize_report(report_file_path)
.. autofunction:: train_predictor_text
.. code-block:: python

# Imports
import os
from khiops import core as kh

# Set the file paths
dictionary_file_path = os.path.join(
kh.get_samples_dir(), "NegativeAirlineTweets", "NegativeAirlineTweets.kdic"
)
data_table_path = os.path.join(
kh.get_samples_dir(), "NegativeAirlineTweets", "NegativeAirlineTweets.txt"
)
report_file_path = os.path.join(
"kh_samples", "train_predictor_text", "AnalysisResults.khj"
)

# Train the predictor
kh.train_predictor(
dictionary_file_path,
"FlightNegativeTweets",
data_table_path,
"negativereason",
report_file_path,
max_trees=5,
max_text_features=1000,
text_features="words",
)
.. autofunction:: train_predictor_error_handling
.. code-block:: python

Expand Down Expand Up @@ -948,6 +977,44 @@ Samples
kh.deploy_model(
model_dictionary_file_path, "SNB_Adult", data_table_path, output_data_table_path
)
.. autofunction:: deploy_model_text
.. code-block:: python

# Imports
import os
from khiops import core as kh

# Set the file paths
dictionary_file_path = os.path.join(
kh.get_samples_dir(), "NegativeAirlineTweets", "NegativeAirlineTweets.kdic"
)
data_table_path = os.path.join(
kh.get_samples_dir(), "NegativeAirlineTweets", "NegativeAirlineTweets.txt"
)
output_dir = os.path.join("kh_samples", "deploy_model_text")
report_file_path = os.path.join(output_dir, "AnalysisResults.khj")
output_data_table_path = os.path.join(output_dir, "ScoresNegativeAirlineTweets.txt")

# Train the predictor
_, model_dictionary_file_path = kh.train_predictor(
dictionary_file_path,
"FlightNegativeTweets",
data_table_path,
"negativereason",
report_file_path,
max_trees=5,
max_text_features=1000,
text_features="words",
)

# Deploy the model on the database
# It will score it according to the trained predictor
kh.deploy_model(
model_dictionary_file_path,
"SNB_FlightNegativeTweets",
data_table_path,
output_data_table_path,
)
.. autofunction:: deploy_model_mt
.. code-block:: python

Expand Down
13 changes: 11 additions & 2 deletions khiops/core/dictionary.py
Comment thread
folmos-at-orange marked this conversation as resolved.
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def _format_name(name):
def _quote_value(value):
"""Double-quotes a string

Categorical and metadata values are quoted with this method.
Categorical, Text and metadata values are quoted with this method.
"""
if isinstance(value, str):
quoted_value = '"' + value.replace('"', '""') + '"'
Expand Down Expand Up @@ -1075,7 +1075,16 @@ def is_native(self):
``True`` if a variables comes directly from a data column.

"""
base_types = ["Categorical", "Numerical", "Time", "Date", "Timestamp"]
base_types = [
"Categorical",
"Numerical",
"Time",
"Date",
"Timestamp",
"TimestampTZ",
"Text",
"TextList",
]
if self.variable_block is None:
return self.rule == "" and self.type in base_types
return self.variable_block.rule == ""
Expand Down
93 changes: 93 additions & 0 deletions khiops/samples/samples.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,48 @@
"# kh.visualize_report(report_file_path)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### `train_predictor_text()`\n\n",
"Trains a predictor with just text-specific parameters\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Imports\n",
"import os\n",
"from khiops import core as kh\n",
"\n",
"# Set the file paths\n",
"dictionary_file_path = os.path.join(\n",
" kh.get_samples_dir(), \"NegativeAirlineTweets\", \"NegativeAirlineTweets.kdic\"\n",
")\n",
"data_table_path = os.path.join(\n",
" kh.get_samples_dir(), \"NegativeAirlineTweets\", \"NegativeAirlineTweets.txt\"\n",
")\n",
"report_file_path = os.path.join(\n",
" \"kh_samples\", \"train_predictor_text\", \"AnalysisResults.khj\"\n",
")\n",
"\n",
"# Train the predictor\n",
"kh.train_predictor(\n",
" dictionary_file_path,\n",
" \"FlightNegativeTweets\",\n",
" data_table_path,\n",
" \"negativereason\",\n",
" report_file_path,\n",
" max_trees=5,\n",
" max_text_features=1000,\n",
" text_features=\"words\",\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -1248,6 +1290,57 @@
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### `deploy_model_text()`\n\n",
"Deploys a model learned on textual data\n It is a call to `~.api.deploy_model` with its mandatory parameters, plus\n text-specific parameters.\n\n In this example, a Selective Naive Bayes (SNB) model is deployed by applying its\n associated dictionary to the input database. The model predictions are written to\n the output database.\n \n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Imports\n",
"import os\n",
"from khiops import core as kh\n",
"\n",
"# Set the file paths\n",
"dictionary_file_path = os.path.join(\n",
" kh.get_samples_dir(), \"NegativeAirlineTweets\", \"NegativeAirlineTweets.kdic\"\n",
")\n",
"data_table_path = os.path.join(\n",
" kh.get_samples_dir(), \"NegativeAirlineTweets\", \"NegativeAirlineTweets.txt\"\n",
")\n",
"output_dir = os.path.join(\"kh_samples\", \"deploy_model_text\")\n",
"report_file_path = os.path.join(output_dir, \"AnalysisResults.khj\")\n",
"output_data_table_path = os.path.join(output_dir, \"ScoresNegativeAirlineTweets.txt\")\n",
"\n",
"# Train the predictor\n",
"_, model_dictionary_file_path = kh.train_predictor(\n",
" dictionary_file_path,\n",
" \"FlightNegativeTweets\",\n",
" data_table_path,\n",
" \"negativereason\",\n",
" report_file_path,\n",
" max_trees=5,\n",
" max_text_features=1000,\n",
" text_features=\"words\",\n",
")\n",
"\n",
"# Deploy the model on the database\n",
"# It will score it according to the trained predictor\n",
"kh.deploy_model(\n",
" model_dictionary_file_path,\n",
" \"SNB_FlightNegativeTweets\",\n",
" data_table_path,\n",
" output_data_table_path,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down
78 changes: 78 additions & 0 deletions khiops/samples/samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,36 @@ def train_predictor_file_paths():
# kh.visualize_report(report_file_path)


def train_predictor_text():
"""Trains a predictor with just text-specific parameters"""
# Imports
import os
from khiops import core as kh

# Set the file paths
dictionary_file_path = os.path.join(
kh.get_samples_dir(), "NegativeAirlineTweets", "NegativeAirlineTweets.kdic"
)
data_table_path = os.path.join(
kh.get_samples_dir(), "NegativeAirlineTweets", "NegativeAirlineTweets.txt"
)
report_file_path = os.path.join(
"kh_samples", "train_predictor_text", "AnalysisResults.khj"
)

# Train the predictor
kh.train_predictor(
dictionary_file_path,
"FlightNegativeTweets",
data_table_path,
"negativereason",
report_file_path,
max_trees=5,
max_text_features=1000,
text_features="words",
)


def train_predictor_error_handling():
"""Shows how to handle errors when training a predictor

Expand Down Expand Up @@ -1059,6 +1089,52 @@ def deploy_model():
)


def deploy_model_text():
"""Deploys a model learned on textual data
It is a call to `~.api.deploy_model` with its mandatory parameters, plus
text-specific parameters.

In this example, a Selective Naive Bayes (SNB) model is deployed by applying its
associated dictionary to the input database. The model predictions are written to
the output database.
"""
# Imports
import os
from khiops import core as kh

# Set the file paths
dictionary_file_path = os.path.join(
kh.get_samples_dir(), "NegativeAirlineTweets", "NegativeAirlineTweets.kdic"
)
data_table_path = os.path.join(
kh.get_samples_dir(), "NegativeAirlineTweets", "NegativeAirlineTweets.txt"
)
output_dir = os.path.join("kh_samples", "deploy_model_text")
report_file_path = os.path.join(output_dir, "AnalysisResults.khj")
output_data_table_path = os.path.join(output_dir, "ScoresNegativeAirlineTweets.txt")

# Train the predictor
_, model_dictionary_file_path = kh.train_predictor(
dictionary_file_path,
"FlightNegativeTweets",
data_table_path,
"negativereason",
report_file_path,
max_trees=5,
max_text_features=1000,
text_features="words",
)

# Deploy the model on the database
# It will score it according to the trained predictor
kh.deploy_model(
model_dictionary_file_path,
"SNB_FlightNegativeTweets",
data_table_path,
output_data_table_path,
)


def deploy_model_mt():
"""Deploys a multi-table classifier in the simplest way possible

Expand Down Expand Up @@ -1811,6 +1887,7 @@ def build_deployed_dictionary():
export_dictionary_files,
train_predictor,
train_predictor_file_paths,
train_predictor_text,
train_predictor_error_handling,
train_predictor_mt,
train_predictor_mt_with_specific_rules,
Expand All @@ -1829,6 +1906,7 @@ def build_deployed_dictionary():
train_recoder_with_multiple_parameters,
train_recoder_mt_flatten,
deploy_model,
deploy_model_text,
deploy_model_mt,
deploy_model_mt_with_interpretation,
deploy_model_mt_snowflake,
Expand Down