Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 18 additions & 38 deletions doc/samples/samples_sklearn.rst
Original file line number Diff line number Diff line change
Expand Up @@ -168,9 +168,8 @@ Samples

# Create the dataset spec and the target
X = {
"main_table": "Accidents",
"tables": {
"Accidents": (accidents_df.drop("Gravity", axis=1), "AccidentId"),
"main_table": (accidents_df.drop("Gravity", axis=1), ["AccidentId"]),
"additional_data_tables": {
"Vehicles": (vehicles_df, ["AccidentId", "VehicleId"]),
},
}
Expand Down Expand Up @@ -224,18 +223,12 @@ Samples

# Build the multi-table dataset spec (drop the target column "Gravity")
X = {
"main_table": "Accidents",
"tables": {
"Accidents": (accidents_df.drop("Gravity", axis=1), "AccidentId"),
"main_table": (accidents_df.drop("Gravity", axis=1), ["AccidentId"]),
"additional_data_tables": {
"Vehicles": (vehicles_df, ["AccidentId", "VehicleId"]),
"Users": (users_df, ["AccidentId", "VehicleId"]),
"Places": (places_df, "AccidentId"),
"Vehicles/Users": (users_df, ["AccidentId", "VehicleId"]),
"Places": (places_df, ["AccidentId"], True),
},
"relations": [
("Accidents", "Vehicles"),
("Vehicles", "Users"),
("Accidents", "Places", True),
],
}

# Load the target variable "Gravity"
Expand Down Expand Up @@ -411,16 +404,14 @@ Samples
# Create the dataset multitable specification for the train/test split
# We specify each table with a name and a tuple (dataframe, key_columns)
X_train = {
"main_table": "Accidents",
"tables": {
"Accidents": (X_train_main, "AccidentId"),
"main_table": (X_train_main, ["AccidentId"]),
"additional_data_tables": {
"Vehicles": (X_train_secondary, ["AccidentId", "VehicleId"]),
},
}
X_test = {
"main_table": "Accidents",
"tables": {
"Accidents": (X_test_main, "AccidentId"),
"main_table": (X_test_main, ["AccidentId"]),
"additional_data_tables": {
"Vehicles": (X_test_secondary, ["AccidentId", "VehicleId"]),
},
}
Expand Down Expand Up @@ -557,9 +548,8 @@ Samples

# Build the multi-table dataset spec (drop the target column "Gravity")
X = {
"main_table": "Accidents",
"tables": {
"Accidents": (accidents_df.drop("Gravity", axis=1), "AccidentId"),
"main_table": (accidents_df.drop("Gravity", axis=1), ["AccidentId"]),
"additional_data_tables": {
"Vehicles": (vehicles_df, ["AccidentId", "VehicleId"]),
},
}
Expand Down Expand Up @@ -596,18 +586,12 @@ Samples

# Build the multi-table dataset spec (drop the target column "Gravity")
X = {
"main_table": "Accidents",
"tables": {
"Accidents": (accidents_df.drop("Gravity", axis=1), "AccidentId"),
"main_table": (accidents_df.drop("Gravity", axis=1), ["AccidentId"]),
"additional_data_tables": {
"Vehicles": (vehicles_df, ["AccidentId", "VehicleId"]),
"Users": (users_df, ["AccidentId", "VehicleId"]),
"Places": (places_df, "AccidentId"),
"Vehicles/Users": (users_df, ["AccidentId", "VehicleId"]),
"Places": (places_df, ["AccidentId"], True),
},
"relations": [
("Accidents", "Vehicles"),
("Vehicles", "Users"),
("Accidents", "Places", True),
],
}

# Load the target variable "Gravity"
Expand Down Expand Up @@ -701,14 +685,10 @@ Samples

# Build the multi-table dataset spec (drop the target column "Gravity")
X = {
"main_table": "Accidents",
"tables": {
"Accidents": (accidents_df.drop("Gravity", axis=1), "AccidentId"),
"main_table": (accidents_df.drop("Gravity", axis=1), ["AccidentId"]),
"additional_data_tables": {
"Vehicles": (vehicles_df, ["AccidentId", "VehicleId"]),
},
"relations": [
("Accidents", "Vehicles"),
],
}

# Load the target variable "Gravity"
Expand Down
16 changes: 14 additions & 2 deletions khiops/core/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@


def _build_multi_table_dictionary_domain(
dictionary_domain, root_dictionary_name, secondary_table_variable_name
dictionary_domain,
root_dictionary_name,
secondary_table_variable_name,
update_secondary_table_name=False,
):
"""Builds a multi-table dictionary domain from a dictionary with a key
Parameters
Expand All @@ -34,6 +37,9 @@ def _build_multi_table_dictionary_domain(
Name for the new root dictionary
secondary_table_variable_name : str
Name, in the root dictionary, for the "table" variable of the secondary table.
update_secondary_table_name : bool, default `False`
If ``True``, then update the secondary table name according to the
secondary table variable name. If not set, keep original table name.

Returns
-------
Expand Down Expand Up @@ -103,11 +109,17 @@ def _build_multi_table_dictionary_domain(
target_variable = Variable()
target_variable.name = secondary_table_variable_name
target_variable.type = "Table"
target_variable.object_type = root_source_dictionary.name
if update_secondary_table_name:
target_variable.object_type = secondary_table_variable_name
else:
target_variable.object_type = root_source_dictionary.name
root_target_dictionary.add_variable(target_variable)

# Build secondary target dictionary, by copying root source dictionary
secondary_target_dictionary = root_source_dictionary.copy()
secondary_target_dictionary.root = False
if update_secondary_table_name:
secondary_target_dictionary.name = secondary_table_variable_name

# Build target domain and add dictionaries to it
target_domain = DictionaryDomain()
Expand Down
56 changes: 18 additions & 38 deletions khiops/samples/samples_sklearn.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -180,9 +180,8 @@
"\n",
"# Create the dataset spec and the target\n",
"X = {\n",
" \"main_table\": \"Accidents\",\n",
" \"tables\": {\n",
" \"Accidents\": (accidents_df.drop(\"Gravity\", axis=1), \"AccidentId\"),\n",
" \"main_table\": (accidents_df.drop(\"Gravity\", axis=1), [\"AccidentId\"]),\n",
" \"additional_data_tables\": {\n",
" \"Vehicles\": (vehicles_df, [\"AccidentId\", \"VehicleId\"]),\n",
" },\n",
"}\n",
Expand Down Expand Up @@ -249,18 +248,12 @@
"\n",
"# Build the multi-table dataset spec (drop the target column \"Gravity\")\n",
"X = {\n",
" \"main_table\": \"Accidents\",\n",
" \"tables\": {\n",
" \"Accidents\": (accidents_df.drop(\"Gravity\", axis=1), \"AccidentId\"),\n",
" \"main_table\": (accidents_df.drop(\"Gravity\", axis=1), [\"AccidentId\"]),\n",
" \"additional_data_tables\": {\n",
" \"Vehicles\": (vehicles_df, [\"AccidentId\", \"VehicleId\"]),\n",
" \"Users\": (users_df, [\"AccidentId\", \"VehicleId\"]),\n",
" \"Places\": (places_df, \"AccidentId\"),\n",
" \"Vehicles/Users\": (users_df, [\"AccidentId\", \"VehicleId\"]),\n",
" \"Places\": (places_df, [\"AccidentId\"], True),\n",
" },\n",
" \"relations\": [\n",
" (\"Accidents\", \"Vehicles\"),\n",
" (\"Vehicles\", \"Users\"),\n",
" (\"Accidents\", \"Places\", True),\n",
" ],\n",
"}\n",
"\n",
"# Load the target variable \"Gravity\"\n",
Expand Down Expand Up @@ -475,16 +468,14 @@
"# Create the dataset multitable specification for the train/test split\n",
"# We specify each table with a name and a tuple (dataframe, key_columns)\n",
"X_train = {\n",
" \"main_table\": \"Accidents\",\n",
" \"tables\": {\n",
" \"Accidents\": (X_train_main, \"AccidentId\"),\n",
" \"main_table\": (X_train_main, [\"AccidentId\"]),\n",
" \"additional_data_tables\": {\n",
" \"Vehicles\": (X_train_secondary, [\"AccidentId\", \"VehicleId\"]),\n",
" },\n",
"}\n",
"X_test = {\n",
" \"main_table\": \"Accidents\",\n",
" \"tables\": {\n",
" \"Accidents\": (X_test_main, \"AccidentId\"),\n",
" \"main_table\": (X_test_main, [\"AccidentId\"]),\n",
" \"additional_data_tables\": {\n",
" \"Vehicles\": (X_test_secondary, [\"AccidentId\", \"VehicleId\"]),\n",
" },\n",
"}\n",
Expand Down Expand Up @@ -660,9 +651,8 @@
"\n",
"# Build the multi-table dataset spec (drop the target column \"Gravity\")\n",
"X = {\n",
" \"main_table\": \"Accidents\",\n",
" \"tables\": {\n",
" \"Accidents\": (accidents_df.drop(\"Gravity\", axis=1), \"AccidentId\"),\n",
" \"main_table\": (accidents_df.drop(\"Gravity\", axis=1), [\"AccidentId\"]),\n",
" \"additional_data_tables\": {\n",
" \"Vehicles\": (vehicles_df, [\"AccidentId\", \"VehicleId\"]),\n",
" },\n",
"}\n",
Expand Down Expand Up @@ -712,18 +702,12 @@
"\n",
"# Build the multi-table dataset spec (drop the target column \"Gravity\")\n",
"X = {\n",
" \"main_table\": \"Accidents\",\n",
" \"tables\": {\n",
" \"Accidents\": (accidents_df.drop(\"Gravity\", axis=1), \"AccidentId\"),\n",
" \"main_table\": (accidents_df.drop(\"Gravity\", axis=1), [\"AccidentId\"]),\n",
" \"additional_data_tables\": {\n",
" \"Vehicles\": (vehicles_df, [\"AccidentId\", \"VehicleId\"]),\n",
" \"Users\": (users_df, [\"AccidentId\", \"VehicleId\"]),\n",
" \"Places\": (places_df, \"AccidentId\"),\n",
" \"Vehicles/Users\": (users_df, [\"AccidentId\", \"VehicleId\"]),\n",
" \"Places\": (places_df, [\"AccidentId\"], True),\n",
" },\n",
" \"relations\": [\n",
" (\"Accidents\", \"Vehicles\"),\n",
" (\"Vehicles\", \"Users\"),\n",
" (\"Accidents\", \"Places\", True),\n",
" ],\n",
"}\n",
"\n",
"# Load the target variable \"Gravity\"\n",
Expand Down Expand Up @@ -843,14 +827,10 @@
"\n",
"# Build the multi-table dataset spec (drop the target column \"Gravity\")\n",
"X = {\n",
" \"main_table\": \"Accidents\",\n",
" \"tables\": {\n",
" \"Accidents\": (accidents_df.drop(\"Gravity\", axis=1), \"AccidentId\"),\n",
" \"main_table\": (accidents_df.drop(\"Gravity\", axis=1), [\"AccidentId\"]),\n",
" \"additional_data_tables\": {\n",
" \"Vehicles\": (vehicles_df, [\"AccidentId\", \"VehicleId\"]),\n",
" },\n",
" \"relations\": [\n",
" (\"Accidents\", \"Vehicles\"),\n",
" ],\n",
"}\n",
"\n",
"# Load the target variable \"Gravity\"\n",
Expand Down
56 changes: 18 additions & 38 deletions khiops/samples/samples_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,9 +163,8 @@ def khiops_classifier_multitable_star():

# Create the dataset spec and the target
X = {
"main_table": "Accidents",
"tables": {
"Accidents": (accidents_df.drop("Gravity", axis=1), "AccidentId"),
"main_table": (accidents_df.drop("Gravity", axis=1), ["AccidentId"]),
"additional_data_tables": {
"Vehicles": (vehicles_df, ["AccidentId", "VehicleId"]),
},
}
Expand Down Expand Up @@ -224,18 +223,12 @@ def khiops_classifier_multitable_snowflake():

# Build the multi-table dataset spec (drop the target column "Gravity")
X = {
"main_table": "Accidents",
"tables": {
"Accidents": (accidents_df.drop("Gravity", axis=1), "AccidentId"),
"main_table": (accidents_df.drop("Gravity", axis=1), ["AccidentId"]),
"additional_data_tables": {
"Vehicles": (vehicles_df, ["AccidentId", "VehicleId"]),
"Users": (users_df, ["AccidentId", "VehicleId"]),
"Places": (places_df, "AccidentId"),
"Vehicles/Users": (users_df, ["AccidentId", "VehicleId"]),
"Places": (places_df, ["AccidentId"], True),
},
"relations": [
("Accidents", "Vehicles"),
("Vehicles", "Users"),
("Accidents", "Places", True),
],
}

# Load the target variable "Gravity"
Expand Down Expand Up @@ -416,16 +409,14 @@ def khiops_classifier_with_hyperparameters():
# Create the dataset multitable specification for the train/test split
# We specify each table with a name and a tuple (dataframe, key_columns)
X_train = {
"main_table": "Accidents",
"tables": {
"Accidents": (X_train_main, "AccidentId"),
"main_table": (X_train_main, ["AccidentId"]),
"additional_data_tables": {
"Vehicles": (X_train_secondary, ["AccidentId", "VehicleId"]),
},
}
X_test = {
"main_table": "Accidents",
"tables": {
"Accidents": (X_test_main, "AccidentId"),
"main_table": (X_test_main, ["AccidentId"]),
"additional_data_tables": {
"Vehicles": (X_test_secondary, ["AccidentId", "VehicleId"]),
},
}
Expand Down Expand Up @@ -578,9 +569,8 @@ def khiops_encoder_multitable_star():

# Build the multi-table dataset spec (drop the target column "Gravity")
X = {
"main_table": "Accidents",
"tables": {
"Accidents": (accidents_df.drop("Gravity", axis=1), "AccidentId"),
"main_table": (accidents_df.drop("Gravity", axis=1), ["AccidentId"]),
"additional_data_tables": {
"Vehicles": (vehicles_df, ["AccidentId", "VehicleId"]),
},
}
Expand Down Expand Up @@ -622,18 +612,12 @@ def khiops_encoder_multitable_snowflake():

# Build the multi-table dataset spec (drop the target column "Gravity")
X = {
"main_table": "Accidents",
"tables": {
"Accidents": (accidents_df.drop("Gravity", axis=1), "AccidentId"),
"main_table": (accidents_df.drop("Gravity", axis=1), ["AccidentId"]),
"additional_data_tables": {
"Vehicles": (vehicles_df, ["AccidentId", "VehicleId"]),
"Users": (users_df, ["AccidentId", "VehicleId"]),
"Places": (places_df, "AccidentId"),
"Vehicles/Users": (users_df, ["AccidentId", "VehicleId"]),
"Places": (places_df, ["AccidentId"], True),
},
"relations": [
("Accidents", "Vehicles"),
("Vehicles", "Users"),
("Accidents", "Places", True),
],
}

# Load the target variable "Gravity"
Expand Down Expand Up @@ -739,14 +723,10 @@ def khiops_encoder_with_hyperparameters():

# Build the multi-table dataset spec (drop the target column "Gravity")
X = {
"main_table": "Accidents",
"tables": {
"Accidents": (accidents_df.drop("Gravity", axis=1), "AccidentId"),
"main_table": (accidents_df.drop("Gravity", axis=1), ["AccidentId"]),
"additional_data_tables": {
"Vehicles": (vehicles_df, ["AccidentId", "VehicleId"]),
},
"relations": [
("Accidents", "Vehicles"),
],
}

# Load the target variable "Gravity"
Expand Down
Loading