diff --git a/doc/samples/samples.rst b/doc/samples/samples.rst index 4a6c096f..333f04dd 100644 --- a/doc/samples/samples.rst +++ b/doc/samples/samples.rst @@ -56,6 +56,80 @@ Samples kh.build_dictionary_from_data_table( data_table_path, dictionary_name, dictionary_file_path ) +.. autofunction:: create_dictionary_domain +.. code-block:: python + + # Imports + import os + from khiops import core as kh + + # Create a Root dictionary + root_dictionary = kh.Dictionary( + json_data={"name": "dict_from_scratch", "root": True, "key": ["Id"]} + ) + + # Start with simple variables to declare + simple_variables = [ + {"name": "Id", "type": "Categorical"}, + {"name": "Num", "type": "Numerical"}, + {"name": "text", "type": "Text"}, + {"name": "hour", "type": "Time"}, + {"name": "date", "type": "Date"}, + {"name": "ambiguous_ts", "type": "Timestamp"}, + {"name": "ts", "type": "TimestampTZ"}, + ] + for var_spec in simple_variables: + var = kh.Variable() + var.name = var_spec["name"] + var.type = var_spec["type"] + root_dictionary.add_variable(var) + + # Create a second dictionary + second_dictionary = kh.Dictionary( + json_data={"name": "Service", "key": ["Id", "id_product"]} + ) + second_dictionary.add_variable( + kh.Variable(json_data={"name": "Id", "type": "Categorical"}) + ) + second_dictionary.add_variable( + kh.Variable(json_data={"name": "id_product", "type": "Categorical"}) + ) + # Create a third dictionary + third_dictionary = kh.Dictionary(json_data={"name": "Address", "key": ["Id"]}) + third_dictionary.add_variable( + kh.Variable(json_data={"name": "StreetNumber", "type": "Numerical"}) + ) + third_dictionary.add_variable( + kh.Variable(json_data={"name": "StreetName", "type": "Categorical"}) + ) + third_dictionary.add_variable( + kh.Variable(json_data={"name": "id_city", "type": "Categorical"}) + ) + + # Add the variables used in a multi-table context in the first dictionary. + # They link the root dictionary to the additional ones + root_dictionary.add_variable( + kh.Variable(json_data={"name": "Services", "type": "Table(Service)"}) + ) + root_dictionary.add_variable( + kh.Variable(json_data={"name": "Address", "type": "Entity(Address)"}) + ) + + # Create a DictionaryDomain (set of dictionaries) + dictionary_domain = kh.DictionaryDomain() + dictionary_domain.add_dictionary(root_dictionary) + dictionary_domain.add_dictionary(second_dictionary) + dictionary_domain.add_dictionary(third_dictionary) + + output_dir = os.path.join("kh_samples", "create_dictionary_domain") + dictionary_file_path = os.path.join(output_dir, "dict_from_scratch.kdic") + + # Create the output directory if needed + if not os.path.isdir(output_dir): + os.mkdir(output_dir) + + # Write the dictionary domain to a file + dictionary_domain.export_khiops_dictionary_file(dictionary_file_path) .. autofunction:: detect_data_table_format .. code-block:: python diff --git a/khiops/core/dictionary.py b/khiops/core/dictionary.py index 71fb63ef..007c6118 100644 --- a/khiops/core/dictionary.py +++ b/khiops/core/dictionary.py @@ -971,6 +971,17 @@ class Variable: True if the variable is used. type : str Variable type. + It can be either native (``Categorical``, ``Numerical``, ``Time``, + ``Date``, ``Timestamp``, ``TimestampTZ``, ``Text``), + internal (``TextList``, ``Structure``) + + - See https://khiops.org/11.0.0-b.0/api-docs/kdic/text-list-rules/ + - See https://khiops.org/11.0.0-b.0/api-docs/kdic/structures-introduction/ + + or relational (``Entity`` - 0-1 relationship, ``Table`` - 0-n relationship) + + - See https://khiops.org/11.0.0-b.0/tutorials/kdic_multi_table/ + object_type : str Type complement for the ``Table`` and ``Entity`` types. structure_type : str @@ -989,6 +1000,11 @@ class Variable: List of variable comments. meta_data : `MetaData` Variable metadata. + + Examples + -------- + See the following function of the ``samples.py`` documentation script: + - `samples.create_dictionary_domain()` """ def __init__(self, json_data=None): diff --git a/khiops/samples/samples.ipynb b/khiops/samples/samples.ipynb index 24736d82..4a7cfc92 100644 --- a/khiops/samples/samples.ipynb +++ b/khiops/samples/samples.ipynb @@ -57,6 +57,93 @@ ")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### `create_dictionary_domain()`\n\n", + "Creates a dictionary domain from scratch\n\n This dictionary domain contains a set of dictionaries,\n with all possible variable types.\n \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Imports\n", + "import os\n", + "from khiops import core as kh\n", + "\n", + "# Create a Root dictionary\n", + "root_dictionary = kh.Dictionary(\n", + " json_data={\"name\": \"dict_from_scratch\", \"root\": True, \"key\": [\"Id\"]}\n", + ")\n", + "\n", + "# Start with simple variables to declare\n", + "simple_variables = [\n", + " {\"name\": \"Id\", \"type\": \"Categorical\"},\n", + " {\"name\": \"Num\", \"type\": \"Numerical\"},\n", + " {\"name\": \"text\", \"type\": \"Text\"},\n", + " {\"name\": \"hour\", \"type\": \"Time\"},\n", + " {\"name\": \"date\", \"type\": \"Date\"},\n", + " {\"name\": \"ambiguous_ts\", \"type\": \"Timestamp\"},\n", + " {\"name\": \"ts\", \"type\": \"TimestampTZ\"},\n", + "]\n", + "for var_spec in simple_variables:\n", + " var = kh.Variable()\n", + " var.name = var_spec[\"name\"]\n", + " var.type = var_spec[\"type\"]\n", + " root_dictionary.add_variable(var)\n", + "\n", + "# Create a second dictionary\n", + "second_dictionary = kh.Dictionary(\n", + " json_data={\"name\": \"Service\", \"key\": [\"Id\", \"id_product\"]}\n", + ")\n", + "second_dictionary.add_variable(\n", + " kh.Variable(json_data={\"name\": \"Id\", \"type\": \"Categorical\"})\n", + ")\n", + "second_dictionary.add_variable(\n", + " kh.Variable(json_data={\"name\": \"id_product\", \"type\": \"Categorical\"})\n", + ")\n", + "# Create a third dictionary\n", + "third_dictionary = kh.Dictionary(json_data={\"name\": \"Address\", \"key\": [\"Id\"]})\n", + "third_dictionary.add_variable(\n", + " kh.Variable(json_data={\"name\": \"StreetNumber\", \"type\": \"Numerical\"})\n", + ")\n", + "third_dictionary.add_variable(\n", + " kh.Variable(json_data={\"name\": \"StreetName\", \"type\": \"Categorical\"})\n", + ")\n", + "third_dictionary.add_variable(\n", + " kh.Variable(json_data={\"name\": \"id_city\", \"type\": \"Categorical\"})\n", + ")\n", + "\n", + "# Add the variables used in a multi-table context in the first dictionary.\n", + "# They link the root dictionary to the additional ones\n", + "root_dictionary.add_variable(\n", + " kh.Variable(json_data={\"name\": \"Services\", \"type\": \"Table(Service)\"})\n", + ")\n", + "root_dictionary.add_variable(\n", + " kh.Variable(json_data={\"name\": \"Address\", \"type\": \"Entity(Address)\"})\n", + ")\n", + "\n", + "# Create a DictionaryDomain (set of dictionaries)\n", + "dictionary_domain = kh.DictionaryDomain()\n", + "dictionary_domain.add_dictionary(root_dictionary)\n", + "dictionary_domain.add_dictionary(second_dictionary)\n", + "dictionary_domain.add_dictionary(third_dictionary)\n", + "\n", + "output_dir = os.path.join(\"kh_samples\", \"create_dictionary_domain\")\n", + "dictionary_file_path = os.path.join(output_dir, \"dict_from_scratch.kdic\")\n", + "\n", + "# Create the output directory if needed\n", + "if not os.path.isdir(output_dir):\n", + " os.mkdir(output_dir)\n", + "\n", + "# Write the dictionary domain to a file\n", + "dictionary_domain.export_khiops_dictionary_file(dictionary_file_path)" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/khiops/samples/samples.py b/khiops/samples/samples.py index 76a01f53..57babefe 100644 --- a/khiops/samples/samples.py +++ b/khiops/samples/samples.py @@ -65,6 +65,85 @@ def build_dictionary_from_data_table(): ) +def create_dictionary_domain(): + """Creates a dictionary domain from scratch + + This dictionary domain contains a set of dictionaries, + with all possible variable types. + """ + # Imports + import os + from khiops import core as kh + + # Create a Root dictionary + root_dictionary = kh.Dictionary( + json_data={"name": "dict_from_scratch", "root": True, "key": ["Id"]} + ) + + # Start with simple variables to declare + simple_variables = [ + {"name": "Id", "type": "Categorical"}, + {"name": "Num", "type": "Numerical"}, + {"name": "text", "type": "Text"}, + {"name": "hour", "type": "Time"}, + {"name": "date", "type": "Date"}, + {"name": "ambiguous_ts", "type": "Timestamp"}, + {"name": "ts", "type": "TimestampTZ"}, + ] + for var_spec in simple_variables: + var = kh.Variable() + var.name = var_spec["name"] + var.type = var_spec["type"] + root_dictionary.add_variable(var) + + # Create a second dictionary + second_dictionary = kh.Dictionary( + json_data={"name": "Service", "key": ["Id", "id_product"]} + ) + second_dictionary.add_variable( + kh.Variable(json_data={"name": "Id", "type": "Categorical"}) + ) + second_dictionary.add_variable( + kh.Variable(json_data={"name": "id_product", "type": "Categorical"}) + ) + # Create a third dictionary + third_dictionary = kh.Dictionary(json_data={"name": "Address", "key": ["Id"]}) + third_dictionary.add_variable( + kh.Variable(json_data={"name": "StreetNumber", "type": "Numerical"}) + ) + third_dictionary.add_variable( + kh.Variable(json_data={"name": "StreetName", "type": "Categorical"}) + ) + third_dictionary.add_variable( + kh.Variable(json_data={"name": "id_city", "type": "Categorical"}) + ) + + # Add the variables used in a multi-table context in the first dictionary. + # They link the root dictionary to the additional ones + root_dictionary.add_variable( + kh.Variable(json_data={"name": "Services", "type": "Table(Service)"}) + ) + root_dictionary.add_variable( + kh.Variable(json_data={"name": "Address", "type": "Entity(Address)"}) + ) + + # Create a DictionaryDomain (set of dictionaries) + dictionary_domain = kh.DictionaryDomain() + dictionary_domain.add_dictionary(root_dictionary) + dictionary_domain.add_dictionary(second_dictionary) + dictionary_domain.add_dictionary(third_dictionary) + + output_dir = os.path.join("kh_samples", "create_dictionary_domain") + dictionary_file_path = os.path.join(output_dir, "dict_from_scratch.kdic") + + # Create the output directory if needed + if not os.path.isdir(output_dir): + os.mkdir(output_dir) + + # Write the dictionary domain to a file + dictionary_domain.export_khiops_dictionary_file(dictionary_file_path) + + def detect_data_table_format(): """Detects the format of a data table with and without a dictionary file @@ -1987,6 +2066,7 @@ def build_deployed_dictionary(): exported_samples = [ get_khiops_version, build_dictionary_from_data_table, + create_dictionary_domain, detect_data_table_format, check_database, export_dictionary_files,