From 060fff4f5e6c54e3c3b667870faa5b8c4dc057c3 Mon Sep 17 00:00:00 2001 From: Popescu V <136721202+popescu-v@users.noreply.github.com> Date: Tue, 27 May 2025 14:50:05 +0200 Subject: [PATCH 1/3] Fix typo in sample function output directory --- doc/samples/samples.rst | 2 +- khiops/samples/samples.ipynb | 2 +- khiops/samples/samples.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/samples/samples.rst b/doc/samples/samples.rst index 23e7a4c6..18295b39 100644 --- a/doc/samples/samples.rst +++ b/doc/samples/samples.rst @@ -128,7 +128,7 @@ Samples # Set the file paths dictionary_file_path = os.path.join(kh.get_samples_dir(), "Adult", "Adult.kdic") - output_dir = os.path.join("kh_samples", "export_dictionary_file") + output_dir = os.path.join("kh_samples", "export_dictionary_files") output_dictionary_file_path = os.path.join(output_dir, "ModifiedAdult.kdic") output_dictionary_json_path = os.path.join(output_dir, "ModifiedAdult.kdicj") alt_output_dictionary_json_path = os.path.join(output_dir, "AltModifiedAdult.kdicj") diff --git a/khiops/samples/samples.ipynb b/khiops/samples/samples.ipynb index f0b1feeb..6fa86326 100644 --- a/khiops/samples/samples.ipynb +++ b/khiops/samples/samples.ipynb @@ -166,7 +166,7 @@ "\n", "# Set the file paths\n", "dictionary_file_path = os.path.join(kh.get_samples_dir(), \"Adult\", \"Adult.kdic\")\n", - "output_dir = os.path.join(\"kh_samples\", \"export_dictionary_file\")\n", + "output_dir = os.path.join(\"kh_samples\", \"export_dictionary_files\")\n", "output_dictionary_file_path = os.path.join(output_dir, \"ModifiedAdult.kdic\")\n", "output_dictionary_json_path = os.path.join(output_dir, \"ModifiedAdult.kdicj\")\n", "alt_output_dictionary_json_path = os.path.join(output_dir, \"AltModifiedAdult.kdicj\")\n", diff --git a/khiops/samples/samples.py b/khiops/samples/samples.py index 532bb4dc..ddabd5e4 100644 --- a/khiops/samples/samples.py +++ b/khiops/samples/samples.py @@ -145,7 +145,7 @@ def export_dictionary_files(): # Set the file paths dictionary_file_path = os.path.join(kh.get_samples_dir(), "Adult", "Adult.kdic") - output_dir = os.path.join("kh_samples", "export_dictionary_file") + output_dir = os.path.join("kh_samples", "export_dictionary_files") output_dictionary_file_path = os.path.join(output_dir, "ModifiedAdult.kdic") output_dictionary_json_path = os.path.join(output_dir, "ModifiedAdult.kdicj") alt_output_dictionary_json_path = os.path.join(output_dir, "AltModifiedAdult.kdicj") From 0287d7d6d8b077e7fc72e7bf9382f8fc0e79896d Mon Sep 17 00:00:00 2001 From: Popescu V <136721202+popescu-v@users.noreply.github.com> Date: Wed, 21 May 2025 20:07:19 +0200 Subject: [PATCH 2/3] Update interpret_predictor to the latest Core alpha 10.7.3-a.0 - drop lever variables - reinstate max_variable_importances - add global vs. individual variable importance computation suppport --- doc/samples/samples.rst | 5 ++- khiops/core/api.py | 37 +++++++------------ .../internals/tasks/interpret_predictor.py | 18 ++++----- khiops/samples/samples.ipynb | 7 ++-- khiops/samples/samples.py | 9 +++-- 5 files changed, 33 insertions(+), 43 deletions(-) diff --git a/doc/samples/samples.rst b/doc/samples/samples.rst index 18295b39..4780d318 100644 --- a/doc/samples/samples.rst +++ b/doc/samples/samples.rst @@ -1064,7 +1064,7 @@ Samples dictionary_file_path = os.path.join(accidents_dir, "Accidents.kdic") accidents_table_path = os.path.join(accidents_dir, "Accidents.txt") vehicles_table_path = os.path.join(accidents_dir, "Vehicles.txt") - output_dir = os.path.join("kh_samples", "deploy_model_mt") + output_dir = os.path.join("kh_samples", "deploy_model_mt_with_interpretation") report_file_path = os.path.join(output_dir, "AnalysisResults.khj") interpretor_file_path = os.path.join(output_dir, "InterpretationModel.kdic") output_data_table_path = os.path.join(output_dir, "InterpretedAccidents.txt") @@ -1088,7 +1088,8 @@ Samples model_dictionary_file_path, "SNB_Accident", interpretor_file_path, - reinforcement_target_value="NonLethal", + max_variable_importances=3, + importance_ranking="Individual", ) # Deploy the interpretation model on the database diff --git a/khiops/core/api.py b/khiops/core/api.py index a3a494a6..68a8ea38 100644 --- a/khiops/core/api.py +++ b/khiops/core/api.py @@ -375,16 +375,6 @@ def _preprocess_task_arguments(task_args): if isinstance(task_args["selection_value"], (int, float)): task_args["selection_value"] = str(task_args["selection_value"]) - # Discard the max_variable_importances interpretation parameters - if "max_variable_importances" in task_args: - if task_args["max_variable_importances"] is not None: - warnings.warn( - "The 'max_variable_importances' parameter of the " - "'khiops.core.api.interpret_predictor' function is not supported " - " yet. All model variables' importances are computed." - ) - del task_args["max_variable_importances"] - # Detect and replace deprecated data-path syntax on additional_data_tables # Mutate task_args in the process for data_path_task_arg_name in ( @@ -880,9 +870,8 @@ def interpret_predictor( dictionary_file_path_or_domain, predictor_dictionary_name, interpretor_file_path, - max_variable_importances=None, - reinforcement_target_value="", - reinforcement_lever_variables=None, + max_variable_importances=100, + importance_ranking="Global", log_file_path=None, output_scenario_path=None, task_file_path=None, @@ -905,18 +894,17 @@ def interpret_predictor( Name of the predictor dictionary used while building the interpretation model. interpretor_file_path : str Path to the interpretor dictionary file. - max_variable_importances : int, optional + max_variable_importances : int, default 100 Maximum number of variable importances to be selected in the interpretation - model. If not set, then all the variables in the prediction model are - considered. - ..note:: Not currently supported; not taken into account if set. - reinforcement_target_value : str, default "" - If this target value is specified, then its probability of occurrence is - tentatively increased. - reinforcement_lever_variables : list of str, optional - The names of variables to use as lever variables while building the - interpretation model. Min length: 0. Max length: the total number of variables - in the prediction model. If not specified, all variables are used. + model. If the predictor contains fewer variables than this number, then + all the variables of the predictor are considered. + importance_ranking : str, default "Global" + Ranking of the Shapley values produced by the interpretor. Ca be one of: + + - "Global": predictor variables are ranked by decreasing global importance. + + - "Individual": predictor variables are ranked by decreasing individual + Shapley value. ... : See :ref:`core-api-common-params`. @@ -931,6 +919,7 @@ def interpret_predictor( -------- See the following functions of the ``samples.py`` documentation script: - `samples.interpret_predictor()` + - `samples.deploy_model_mt_with_interpretation()` """ # Save the task arguments # WARNING: Do not move this line, see the top of the "tasks" section for details diff --git a/khiops/core/internals/tasks/interpret_predictor.py b/khiops/core/internals/tasks/interpret_predictor.py index b466b8f2..bc2118e7 100644 --- a/khiops/core/internals/tasks/interpret_predictor.py +++ b/khiops/core/internals/tasks/interpret_predictor.py @@ -6,7 +6,7 @@ ###################################################################################### """interpret_predictor task family""" from khiops.core.internals import task as tm -from khiops.core.internals.types import ListType, StringLikeType +from khiops.core.internals.types import IntType, StringLikeType # Disable long lines to have readable scenarios # pylint: disable=line-too-long @@ -21,8 +21,8 @@ ("interpretor_file_path", StringLikeType), ], [ - ("reinforcement_target_value", StringLikeType, ""), - ("reinforcement_lever_variables", ListType(StringLikeType), None), + ("max_variable_importances", IntType, 100), + ("importance_ranking", StringLikeType, "Global"), ], ["dictionary_file_path", "interpretor_file_path"], # pylint: disable=line-too-long @@ -38,14 +38,12 @@ // Interpret model LearningTools.InterpretPredictor - HowParameter.HowClass __reinforcement_target_value__ - __DICT__ - __reinforcement_lever_variables__ - HowParameter.leverVariablesSpecView.UnselectAll - HowParameter.leverVariablesSpecView.AttributeSpecs.List.Key - HowParameter.leverVariablesSpecView.AttributeSpecs.Used - __END_DICT__ + // Number of predictor variables exploited in the interpretation model + ContributionAttributeNumber __max_variable_importances__ + + // Ranking of the Shapley value produced by the interpretation model + ShapleyValueRanking __importance_ranking__ // Build interpretation dictionary BuildInterpretationClass diff --git a/khiops/samples/samples.ipynb b/khiops/samples/samples.ipynb index 6fa86326..fd7da16f 100644 --- a/khiops/samples/samples.ipynb +++ b/khiops/samples/samples.ipynb @@ -1396,7 +1396,7 @@ "metadata": {}, "source": [ "### `deploy_model_mt_with_interpretation()`\n\n", - "Deploys a multi-table interpretor in the simplest way possible\n\n It is a call to `~.api.deploy_model` with additional parameters to handle\n multi-table deployment.\n\n In this example, a Selective Naive Bayes (SNB) interpretation model is\n deployed by applying its associated dictionary to the input database.\n The model variable importances are written to the output data table.\n \n" + "Deploys a multi-table interpretor in the simplest way possible\n\n It is a call to `~.api.deploy_model` with additional parameters related to\n the variable importances.\n\n In this example, a Selective Naive Bayes (SNB) interpretation model is\n deployed by applying its associated dictionary to the input database.\n The model variable importances are written to the output data table.\n \n" ] }, { @@ -1414,7 +1414,7 @@ "dictionary_file_path = os.path.join(accidents_dir, \"Accidents.kdic\")\n", "accidents_table_path = os.path.join(accidents_dir, \"Accidents.txt\")\n", "vehicles_table_path = os.path.join(accidents_dir, \"Vehicles.txt\")\n", - "output_dir = os.path.join(\"kh_samples\", \"deploy_model_mt\")\n", + "output_dir = os.path.join(\"kh_samples\", \"deploy_model_mt_with_interpretation\")\n", "report_file_path = os.path.join(output_dir, \"AnalysisResults.khj\")\n", "interpretor_file_path = os.path.join(output_dir, \"InterpretationModel.kdic\")\n", "output_data_table_path = os.path.join(output_dir, \"InterpretedAccidents.txt\")\n", @@ -1438,7 +1438,8 @@ " model_dictionary_file_path,\n", " \"SNB_Accident\",\n", " interpretor_file_path,\n", - " reinforcement_target_value=\"NonLethal\",\n", + " max_variable_importances=3,\n", + " importance_ranking=\"Individual\",\n", ")\n", "\n", "# Deploy the interpretation model on the database\n", diff --git a/khiops/samples/samples.py b/khiops/samples/samples.py index ddabd5e4..0f2dc681 100644 --- a/khiops/samples/samples.py +++ b/khiops/samples/samples.py @@ -1184,8 +1184,8 @@ def deploy_model_mt(): def deploy_model_mt_with_interpretation(): """Deploys a multi-table interpretor in the simplest way possible - It is a call to `~.api.deploy_model` with additional parameters to handle - multi-table deployment. + It is a call to `~.api.deploy_model` with additional parameters related to + the variable importances. In this example, a Selective Naive Bayes (SNB) interpretation model is deployed by applying its associated dictionary to the input database. @@ -1200,7 +1200,7 @@ def deploy_model_mt_with_interpretation(): dictionary_file_path = os.path.join(accidents_dir, "Accidents.kdic") accidents_table_path = os.path.join(accidents_dir, "Accidents.txt") vehicles_table_path = os.path.join(accidents_dir, "Vehicles.txt") - output_dir = os.path.join("kh_samples", "deploy_model_mt") + output_dir = os.path.join("kh_samples", "deploy_model_mt_with_interpretation") report_file_path = os.path.join(output_dir, "AnalysisResults.khj") interpretor_file_path = os.path.join(output_dir, "InterpretationModel.kdic") output_data_table_path = os.path.join(output_dir, "InterpretedAccidents.txt") @@ -1224,7 +1224,8 @@ def deploy_model_mt_with_interpretation(): model_dictionary_file_path, "SNB_Accident", interpretor_file_path, - reinforcement_target_value="NonLethal", + max_variable_importances=3, + importance_ranking="Individual", ) # Deploy the interpretation model on the database From 4f0bc4d68b23cc078529affe0f4966b4482257f6 Mon Sep 17 00:00:00 2001 From: Popescu V <136721202+popescu-v@users.noreply.github.com> Date: Thu, 22 May 2025 16:47:52 +0200 Subject: [PATCH 3/3] Add reinforce_predictor Core API function as supported by 10.7.3-a.0 Also add relevant samples of its usage. --- doc/samples/samples.rst | 78 +++++++++++++ khiops/core/api.py | 63 +++++++++++ khiops/core/internals/tasks/__init__.py | 2 + .../internals/tasks/reinforce_predictor.py | 60 ++++++++++ khiops/samples/samples.ipynb | 104 +++++++++++++++++ khiops/samples/samples.py | 106 ++++++++++++++++++ 6 files changed, 413 insertions(+) create mode 100644 khiops/core/internals/tasks/reinforce_predictor.py diff --git a/doc/samples/samples.rst b/doc/samples/samples.rst index 4780d318..4a6c096f 100644 --- a/doc/samples/samples.rst +++ b/doc/samples/samples.rst @@ -686,6 +686,37 @@ Samples kh.interpret_predictor(predictor_file_path, "SNB_Adult", interpretor_file_path) print(f"The interpretation model is '{interpretor_file_path}'") +.. autofunction:: reinforce_predictor +.. code-block:: python + + # Imports + import os + from khiops import core as kh + + dictionary_file_path = os.path.join(kh.get_samples_dir(), "Adult", "Adult.kdic") + data_table_path = os.path.join(kh.get_samples_dir(), "Adult", "Adult.txt") + output_dir = os.path.join("kh_samples", "reinforce_predictor") + analysis_report_file_path = os.path.join(output_dir, "AnalysisResults.khj") + reinforced_predictor_file_path = os.path.join(output_dir, "ReinforcedAdultModel.kdic") + + # Build prediction model + _, predictor_file_path = kh.train_predictor( + dictionary_file_path, + "Adult", + data_table_path, + "class", + analysis_report_file_path, + ) + + # Build reinforced predictor + kh.reinforce_predictor( + predictor_file_path, + "SNB_Adult", + reinforced_predictor_file_path, + reinforcement_lever_variables=["occupation"], + ) + + print(f"The reinforced predictor is '{reinforced_predictor_file_path}'") .. autofunction:: multiple_train_predictor .. code-block:: python @@ -1102,6 +1133,53 @@ Samples output_data_table_path, additional_data_tables={"Vehicles": vehicles_table_path}, ) +.. autofunction:: deploy_reinforced_model_mt +.. code-block:: python + + # Imports + import os + from khiops import core as kh + + # Set the file paths + accidents_dir = os.path.join(kh.get_samples_dir(), "AccidentsSummary") + dictionary_file_path = os.path.join(accidents_dir, "Accidents.kdic") + accidents_table_path = os.path.join(accidents_dir, "Accidents.txt") + vehicles_table_path = os.path.join(accidents_dir, "Vehicles.txt") + output_dir = os.path.join("kh_samples", "deploy_reinforced_model_mt") + report_file_path = os.path.join(output_dir, "AnalysisResults.khj") + reinforced_predictor_file_path = os.path.join(output_dir, "ReinforcedModel.kdic") + output_data_table_path = os.path.join(output_dir, "ReinforcedAccidents.txt") + + # Train the predictor (see train_predictor_mt for details) + _, model_dictionary_file_path = kh.train_predictor( + dictionary_file_path, + "Accident", + accidents_table_path, + "Gravity", + report_file_path, + additional_data_tables={"Vehicles": vehicles_table_path}, + max_trees=0, + ) + + # Reinforce the predictor + kh.reinforce_predictor( + model_dictionary_file_path, + "SNB_Accident", + reinforced_predictor_file_path, + reinforcement_target_value="NonLethal", + reinforcement_lever_variables=["InAgglomeration", "CollisionType"], + ) + + # Deploy the reinforced model on the database + # Besides the mandatory parameters, it is specified: + # - A python dictionary linking data paths to file paths for non-root tables + kh.deploy_model( + reinforced_predictor_file_path, + "Reinforcement_SNB_Accident", + accidents_table_path, + output_data_table_path, + additional_data_tables={"Vehicles": vehicles_table_path}, + ) .. autofunction:: deploy_model_mt_snowflake .. code-block:: python diff --git a/khiops/core/api.py b/khiops/core/api.py index 68a8ea38..c0e04c4c 100644 --- a/khiops/core/api.py +++ b/khiops/core/api.py @@ -929,6 +929,69 @@ def interpret_predictor( _run_task("interpret_predictor", task_args) +def reinforce_predictor( + dictionary_file_path_or_domain, + predictor_dictionary_name, + reinforced_predictor_file_path, + reinforcement_target_value="", + reinforcement_lever_variables=None, + log_file_path=None, + output_scenario_path=None, + task_file_path=None, + trace=False, + stdout_file_path="", + stderr_file_path="", + max_cores=None, + memory_limit_mb=None, + temp_dir="", + scenario_prologue="", + **kwargs, +): + r"""Builds a reinforced predictor from a predictor + + A reinforced predictor is a model which increases the importance of specified lever + variables in order to increase the probability of occurrence of the specified target + value. + + Parameters + ---------- + dictionary_file_path_or_domain : str or `.DictionaryDomain` + Path of a Khiops dictionary file or a DictionaryDomain object. + predictor_dictionary_name : str + Name of the predictor dictionary used while building the reinforced predictor. + reinforced_predictor_file_path : str + Path to the reinforced predictor dictionary file. + reinforcement_target_value : str, default "" + If this target value is specified, then its probability of occurrence is + tentatively increased. + reinforcement_lever_variables : list of str + The names of variables to use as lever variables while building the + reinforced predictor. Min length: 1. Max length: the total number of variables + in the prediction model. + ... : + See :ref:`core-api-common-params`. + + Raises + ------ + `ValueError` + Invalid values of an argument + `TypeError` + Invalid type of an argument + + Examples + -------- + See the following functions of the ``samples.py`` documentation script: + - `samples.reinforce_predictor()` + - `samples.deploy_reinforced_model_mt()` + """ + # Save the task arguments + # WARNING: Do not move this line, see the top of the "tasks" section for details + task_args = locals() + + # Run the task + _run_task("reinforce_predictor", task_args) + + def evaluate_predictor( dictionary_file_path_or_domain, train_dictionary_name, diff --git a/khiops/core/internals/tasks/__init__.py b/khiops/core/internals/tasks/__init__.py index a20a024f..2e48f39a 100644 --- a/khiops/core/internals/tasks/__init__.py +++ b/khiops/core/internals/tasks/__init__.py @@ -22,6 +22,7 @@ extract_keys_from_data_table, interpret_predictor, prepare_coclustering_deployment, + reinforce_predictor, simplify_coclustering, sort_data_table, train_coclustering, @@ -43,6 +44,7 @@ extract_clusters, extract_keys_from_data_table, interpret_predictor, + reinforce_predictor, prepare_coclustering_deployment, simplify_coclustering, sort_data_table, diff --git a/khiops/core/internals/tasks/reinforce_predictor.py b/khiops/core/internals/tasks/reinforce_predictor.py new file mode 100644 index 00000000..46e4cad7 --- /dev/null +++ b/khiops/core/internals/tasks/reinforce_predictor.py @@ -0,0 +1,60 @@ +###################################################################################### +# Copyright (c) 2023-2025 Orange. All rights reserved. # +# This software is distributed under the BSD 3-Clause-clear License, the text of # +# which is available at https://spdx.org/licenses/BSD-3-Clause-Clear.html or # +# see the "LICENSE.md" file for more details. # +###################################################################################### +"""reinforce_predictor task family""" +from khiops.core.internals import task as tm +from khiops.core.internals.types import ListType, StringLikeType + +# Disable long lines to have readable scenarios +# pylint: disable=line-too-long +TASKS = [ + tm.KhiopsTask( + "reinforce_predictor", + "khiops", + "10.7.3-a.0", + [ + ("dictionary_file_path", StringLikeType), + ("predictor_dictionary_name", StringLikeType), + ("reinforced_predictor_file_path", StringLikeType), + ], + [ + ("reinforcement_target_value", StringLikeType, ""), + ("reinforcement_lever_variables", ListType(StringLikeType), None), + ], + ["dictionary_file_path", "reinforced_predictor_file_path"], + # pylint: disable=line-too-long + # fmt: off + """ + // Dictionary file and class settings + ClassManagement.OpenFile + ClassFileName __dictionary_file_path__ + OK + + // Reinforcement settings + TrainDatabase.ClassName __predictor_dictionary_name__ + + // Reinforce model + LearningTools.ReinforcePredictor + ReinforcedTargetValue __reinforcement_target_value__ + + LeverAttributes.UnselectAll + __DICT__ + __reinforcement_lever_variables__ + LeverAttributes.List.Key + LeverAttributes.Used + __END_DICT__ + + // Build reinforced predictor + BuildReinforcementClass + + // Output settings + ClassFileName __reinforced_predictor_file_path__ + OK + Exit + """, + # fmt: on + ), +] diff --git a/khiops/samples/samples.ipynb b/khiops/samples/samples.ipynb index fd7da16f..24736d82 100644 --- a/khiops/samples/samples.ipynb +++ b/khiops/samples/samples.ipynb @@ -908,6 +908,50 @@ "print(f\"The interpretation model is '{interpretor_file_path}'\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### `reinforce_predictor()`\n\n", + "Builds reinforced predictor for existing predictor\n\n The reinforced predictor produces the following reinforcement variables for the\n specified target value to reinforce (i.e. whose probability of occurrence is\n tentatively increased):\n\n - initial score, containing the conditional probability of the target value before\n reinforcement\n - four variables are output in decreasing reinforcement value: name of the lever\n variable, reinforcement part, final score after reinforcement, and class change\n tag.\n\n It calls `~.api.train_predictor` and `~.api.reinforce_predictor` only with\n their mandatory parameters.\n \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Imports\n", + "import os\n", + "from khiops import core as kh\n", + "\n", + "dictionary_file_path = os.path.join(kh.get_samples_dir(), \"Adult\", \"Adult.kdic\")\n", + "data_table_path = os.path.join(kh.get_samples_dir(), \"Adult\", \"Adult.txt\")\n", + "output_dir = os.path.join(\"kh_samples\", \"reinforce_predictor\")\n", + "analysis_report_file_path = os.path.join(output_dir, \"AnalysisResults.khj\")\n", + "reinforced_predictor_file_path = os.path.join(output_dir, \"ReinforcedAdultModel.kdic\")\n", + "\n", + "# Build prediction model\n", + "_, predictor_file_path = kh.train_predictor(\n", + " dictionary_file_path,\n", + " \"Adult\",\n", + " data_table_path,\n", + " \"class\",\n", + " analysis_report_file_path,\n", + ")\n", + "\n", + "# Build reinforced predictor\n", + "kh.reinforce_predictor(\n", + " predictor_file_path,\n", + " \"SNB_Adult\",\n", + " reinforced_predictor_file_path,\n", + " reinforcement_lever_variables=[\"occupation\"],\n", + ")\n", + "\n", + "print(f\"The reinforced predictor is '{reinforced_predictor_file_path}'\")" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -1454,6 +1498,66 @@ ")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### `deploy_reinforced_model_mt()`\n\n", + "Deploys a multi-table reinforced model in the simplest way possible\n\n It is a call to `~.api.deploy_model` with additional parameters related to\n the lever variables.\n\n In this example, a reinforced Selective Naive Bayes (SNB) model is\n deployed by applying its associated dictionary to the input database.\n The reinforced model predictions are written to the output data table.\n \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Imports\n", + "import os\n", + "from khiops import core as kh\n", + "\n", + "# Set the file paths\n", + "accidents_dir = os.path.join(kh.get_samples_dir(), \"AccidentsSummary\")\n", + "dictionary_file_path = os.path.join(accidents_dir, \"Accidents.kdic\")\n", + "accidents_table_path = os.path.join(accidents_dir, \"Accidents.txt\")\n", + "vehicles_table_path = os.path.join(accidents_dir, \"Vehicles.txt\")\n", + "output_dir = os.path.join(\"kh_samples\", \"deploy_reinforced_model_mt\")\n", + "report_file_path = os.path.join(output_dir, \"AnalysisResults.khj\")\n", + "reinforced_predictor_file_path = os.path.join(output_dir, \"ReinforcedModel.kdic\")\n", + "output_data_table_path = os.path.join(output_dir, \"ReinforcedAccidents.txt\")\n", + "\n", + "# Train the predictor (see train_predictor_mt for details)\n", + "_, model_dictionary_file_path = kh.train_predictor(\n", + " dictionary_file_path,\n", + " \"Accident\",\n", + " accidents_table_path,\n", + " \"Gravity\",\n", + " report_file_path,\n", + " additional_data_tables={\"Vehicles\": vehicles_table_path},\n", + " max_trees=0,\n", + ")\n", + "\n", + "# Reinforce the predictor\n", + "kh.reinforce_predictor(\n", + " model_dictionary_file_path,\n", + " \"SNB_Accident\",\n", + " reinforced_predictor_file_path,\n", + " reinforcement_target_value=\"NonLethal\",\n", + " reinforcement_lever_variables=[\"InAgglomeration\", \"CollisionType\"],\n", + ")\n", + "\n", + "# Deploy the reinforced model on the database\n", + "# Besides the mandatory parameters, it is specified:\n", + "# - A python dictionary linking data paths to file paths for non-root tables\n", + "kh.deploy_model(\n", + " reinforced_predictor_file_path,\n", + " \"Reinforcement_SNB_Accident\",\n", + " accidents_table_path,\n", + " output_data_table_path,\n", + " additional_data_tables={\"Vehicles\": vehicles_table_path},\n", + ")" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/khiops/samples/samples.py b/khiops/samples/samples.py index 0f2dc681..76a01f53 100644 --- a/khiops/samples/samples.py +++ b/khiops/samples/samples.py @@ -845,6 +845,54 @@ def interpret_predictor(): print(f"The interpretation model is '{interpretor_file_path}'") +def reinforce_predictor(): + """Builds reinforced predictor for existing predictor + + The reinforced predictor produces the following reinforcement variables for the + specified target value to reinforce (i.e. whose probability of occurrence is + tentatively increased): + + - initial score, containing the conditional probability of the target value before + reinforcement + - four variables are output in decreasing reinforcement value: name of the lever + variable, reinforcement part, final score after reinforcement, and class change + tag. + + It calls `~.api.train_predictor` and `~.api.reinforce_predictor` only with + their mandatory parameters. + """ + # Imports + import os + from khiops import core as kh + + dictionary_file_path = os.path.join(kh.get_samples_dir(), "Adult", "Adult.kdic") + data_table_path = os.path.join(kh.get_samples_dir(), "Adult", "Adult.txt") + output_dir = os.path.join("kh_samples", "reinforce_predictor") + analysis_report_file_path = os.path.join(output_dir, "AnalysisResults.khj") + reinforced_predictor_file_path = os.path.join( + output_dir, "ReinforcedAdultModel.kdic" + ) + + # Build prediction model + _, predictor_file_path = kh.train_predictor( + dictionary_file_path, + "Adult", + data_table_path, + "class", + analysis_report_file_path, + ) + + # Build reinforced predictor + kh.reinforce_predictor( + predictor_file_path, + "SNB_Adult", + reinforced_predictor_file_path, + reinforcement_lever_variables=["occupation"], + ) + + print(f"The reinforced predictor is '{reinforced_predictor_file_path}'") + + def evaluate_predictor(): """Evaluates a predictor in the simplest way possible @@ -1240,6 +1288,62 @@ def deploy_model_mt_with_interpretation(): ) +def deploy_reinforced_model_mt(): + """Deploys a multi-table reinforced model in the simplest way possible + + It is a call to `~.api.deploy_model` with additional parameters related to + the lever variables. + + In this example, a reinforced Selective Naive Bayes (SNB) model is + deployed by applying its associated dictionary to the input database. + The reinforced model predictions are written to the output data table. + """ + # Imports + import os + from khiops import core as kh + + # Set the file paths + accidents_dir = os.path.join(kh.get_samples_dir(), "AccidentsSummary") + dictionary_file_path = os.path.join(accidents_dir, "Accidents.kdic") + accidents_table_path = os.path.join(accidents_dir, "Accidents.txt") + vehicles_table_path = os.path.join(accidents_dir, "Vehicles.txt") + output_dir = os.path.join("kh_samples", "deploy_reinforced_model_mt") + report_file_path = os.path.join(output_dir, "AnalysisResults.khj") + reinforced_predictor_file_path = os.path.join(output_dir, "ReinforcedModel.kdic") + output_data_table_path = os.path.join(output_dir, "ReinforcedAccidents.txt") + + # Train the predictor (see train_predictor_mt for details) + _, model_dictionary_file_path = kh.train_predictor( + dictionary_file_path, + "Accident", + accidents_table_path, + "Gravity", + report_file_path, + additional_data_tables={"Vehicles": vehicles_table_path}, + max_trees=0, + ) + + # Reinforce the predictor + kh.reinforce_predictor( + model_dictionary_file_path, + "SNB_Accident", + reinforced_predictor_file_path, + reinforcement_target_value="NonLethal", + reinforcement_lever_variables=["InAgglomeration", "CollisionType"], + ) + + # Deploy the reinforced model on the database + # Besides the mandatory parameters, it is specified: + # - A python dictionary linking data paths to file paths for non-root tables + kh.deploy_model( + reinforced_predictor_file_path, + "Reinforcement_SNB_Accident", + accidents_table_path, + output_data_table_path, + additional_data_tables={"Vehicles": vehicles_table_path}, + ) + + def deploy_model_mt_snowflake(): """Deploys a classifier model on a dataset with a snowflake schema""" # Imports @@ -1900,6 +2004,7 @@ def build_deployed_dictionary(): train_predictor_detect_format, train_predictor_with_cross_validation, interpret_predictor, + reinforce_predictor, multiple_train_predictor, evaluate_predictor, access_predictor_evaluation_report, @@ -1910,6 +2015,7 @@ def build_deployed_dictionary(): deploy_model_text, deploy_model_mt, deploy_model_mt_with_interpretation, + deploy_reinforced_model_mt, deploy_model_mt_snowflake, deploy_model_expert, deploy_classifier_for_metrics,