KhiopsML
diff --git a/‎doc/samples/samples_sklearn.rst‎
Lines changed: 0 additions & 153 deletions b/‎doc/samples/samples_sklearn.rst‎
Lines changed: 0 additions & 153 deletions
diff --git a/‎khiops/samples/samples_sklearn.ipynb‎
Lines changed: 0 additions & 179 deletions b/‎khiops/samples/samples_sklearn.ipynb‎
Lines changed: 0 additions & 179 deletions
@@ -803,156 +803,3 @@ Samples
     print("Predicted clusters (only three at most)")
     print(X_clusters)
     print("---")
-.. autofunction:: khiops_classifier_multitable_list
-.. code-block:: python
-
-    # Imports
-    import os
-    import pandas as pd
-    from khiops import core as kh
-    from khiops.sklearn import KhiopsClassifier
-    from sklearn import metrics
-    from sklearn.model_selection import train_test_split
-
-    # Load the root table of the dataset into a pandas dataframe
-    accidents_data_dir = os.path.join(kh.get_samples_dir(), "AccidentsSummary")
-    accidents_df = pd.read_csv(
-        os.path.join(accidents_data_dir, "Accidents.txt"),
-        sep="\t",
-    )
-    X = accidents_df.drop("Gravity", axis=1)
-    y = accidents_df["Gravity"]
-
-    # Split the dataset into train and test
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)
-
-    # Load the secondary table of the dataset into a pandas dataframe
-    vehicles_df = pd.read_csv(os.path.join(accidents_data_dir, "Vehicles.txt"), sep="\t")
-
-    # Split the secondary dataframe with the keys of the split root dataframe
-    X_train_ids = X_train["AccidentId"].to_frame()
-    X_test_ids = X_test["AccidentId"].to_frame()
-    X_train_secondary = X_train_ids.merge(vehicles_df, on="AccidentId")
-    X_test_secondary = X_test_ids.merge(vehicles_df, on="AccidentId")
-
-    # Create the classifier specifying the key column name
-    khc = KhiopsClassifier(key="AccidentId")
-
-    # Train the classifier
-    khc.fit([X_train, X_train_secondary], y_train)
-
-    # Predict the class on the test dataset
-    y_test_pred = khc.predict([X_test, X_test_secondary])
-    print("Predicted classes (first 10):")
-    print(y_test_pred[:10])
-    print("---")
-
-    # Predict the class probability on the test dataset
-    y_test_probas = khc.predict_proba([X_test, X_test_secondary])
-    print("Predicted class probabilities (first 10):")
-    print(y_test_probas[:10])
-    print("---")
-
-    # Evaluate accuracy and auc metrics on the test dataset
-    test_accuracy = metrics.accuracy_score(y_test, y_test_pred)
-    test_auc = metrics.roc_auc_score(y_test, y_test_probas[:, 1])
-    print(f"Test accuracy = {test_accuracy}")
-    print(f"Test auc      = {test_auc}")
-.. autofunction:: khiops_classifier_multitable_star_file
-.. code-block:: python
-
-    # Imports
-    import os
-    import pandas as pd
-    from khiops import core as kh
-    from khiops.sklearn import KhiopsClassifier
-    from sklearn import metrics
-    from sklearn.model_selection import train_test_split
-
-    # Create output directory
-    results_dir = os.path.join("kh_samples", "khiops_classifier_multitable_star_file")
-    if not os.path.exists("kh_samples"):
-        os.mkdir("kh_samples")
-        os.mkdir(results_dir)
-    else:
-        if not os.path.exists(results_dir):
-            os.mkdir(results_dir)
-
-    # Load the root table of the dataset into a pandas dataframe
-    accidents_dataset_path = os.path.join(kh.get_samples_dir(), "AccidentsSummary")
-    accidents_df = pd.read_csv(
-        os.path.join(accidents_dataset_path, "Accidents.txt"),
-        sep="\t",
-    )
-
-    # Split the root dataframe into train and test
-    X_train_main, X_test_main = train_test_split(
-        accidents_df, test_size=0.3, random_state=1
-    )
-
-    # Load the secondary table of the dataset into a pandas dataframe
-    vehicles_df = pd.read_csv(
-        os.path.join(accidents_dataset_path, "Vehicles.txt"), sep="\t"
-    )
-
-    # Split the secondary dataframe with the keys of the split root dataframe
-    X_train_ids = X_train_main["AccidentId"].to_frame()
-    X_test_ids = X_test_main["AccidentId"].to_frame()
-    X_train_secondary = X_train_ids.merge(vehicles_df, on="AccidentId")
-    X_test_secondary = X_test_ids.merge(vehicles_df, on="AccidentId")
-
-    # Write the train and test dataset sets to disk
-    # For the test file we remove the target column from the main table
-    X_train_main_path = os.path.join(results_dir, "X_train_main.txt")
-    X_train_main.to_csv(X_train_main_path, sep="\t", header=True, index=False)
-    X_train_secondary_path = os.path.join(results_dir, "X_train_secondary.txt")
-    X_train_secondary.to_csv(X_train_secondary_path, sep="\t", header=True, index=False)
-    X_test_main_path = os.path.join(results_dir, "X_test_main.txt")
-    y_test = X_test_main.sort_values("AccidentId")["Gravity"]
-    X_test_main.drop(columns="Gravity").to_csv(
-        X_test_main_path, sep="\t", header=True, index=False
-    )
-    X_test_secondary_path = os.path.join(results_dir, "X_test_secondary.txt")
-    X_test_secondary.to_csv(X_test_secondary_path, sep="\t", header=True, index=False)
-
-    # Define the dictionary of train
-    X_train = {
-        "main_table": "Accidents",
-        "tables": {
-            "Accidents": (X_train_main_path, "AccidentId"),
-            "Vehicles": (X_train_secondary_path, ["AccidentId", "VehicleId"]),
-        },
-        "format": ("\t", True),
-    }
-    X_test = {
-        "main_table": "Accidents",
-        "tables": {
-            "Accidents": (X_test_main_path, "AccidentId"),
-            "Vehicles": (X_test_secondary_path, ["AccidentId", "VehicleId"]),
-        },
-        "format": ("\t", True),
-    }
-
-    # Create the classifier and fit it
-    khc = KhiopsClassifier(output_dir=results_dir)
-    khc.fit(X_train, y="Gravity")
-
-    # Predict the class in addition to the class probabilities on the test dataset
-    y_test_pred_path = khc.predict(X_test)
-    y_test_pred = pd.read_csv(y_test_pred_path, sep="\t")
-    print("Predicted classes (first 10):")
-    print(y_test_pred["PredictedGravity"].head(10))
-    print("---")
-
-    y_test_probas_path = khc.predict_proba(X_test)
-    y_test_probas = pd.read_csv(y_test_probas_path, sep="\t")
-    proba_columns = [col for col in y_test_probas if col.startswith("Prob")]
-    print("Predicted class probabilities (first 10):")
-    print(y_test_probas[proba_columns].head(10))
-    print("---")
-
-    # Evaluate accuracy and auc metrics on the test dataset
-    test_accuracy = metrics.accuracy_score(y_test, y_test_pred["PredictedGravity"])
-    test_auc = metrics.roc_auc_score(y_test, y_test_probas["ProbGravityLethal"])
-    print(f"Test accuracy = {test_accuracy}")
-    print(f"Test auc      = {test_auc}")
@@ -972,185 +972,6 @@
     "print(X_clusters)\n",
     "print(\"---\")"
    ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### `khiops_classifier_multitable_list()`\n\n",
-    "Trains a KhiopsClassifier using a list dataset specification\n\n    .. warning::\n        This dataset input method is **Deprecated** and will be removed in Khiops 11.\n    \n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Imports\n",
-    "import os\n",
-    "import pandas as pd\n",
-    "from khiops import core as kh\n",
-    "from khiops.sklearn import KhiopsClassifier\n",
-    "from sklearn import metrics\n",
-    "from sklearn.model_selection import train_test_split\n",
-    "\n",
-    "# Load the root table of the dataset into a pandas dataframe\n",
-    "accidents_data_dir = os.path.join(kh.get_samples_dir(), \"AccidentsSummary\")\n",
-    "accidents_df = pd.read_csv(\n",
-    "    os.path.join(accidents_data_dir, \"Accidents.txt\"),\n",
-    "    sep=\"\\t\",\n",
-    ")\n",
-    "X = accidents_df.drop(\"Gravity\", axis=1)\n",
-    "y = accidents_df[\"Gravity\"]\n",
-    "\n",
-    "# Split the dataset into train and test\n",
-    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)\n",
-    "\n",
-    "# Load the secondary table of the dataset into a pandas dataframe\n",
-    "vehicles_df = pd.read_csv(os.path.join(accidents_data_dir, \"Vehicles.txt\"), sep=\"\\t\")\n",
-    "\n",
-    "# Split the secondary dataframe with the keys of the split root dataframe\n",
-    "X_train_ids = X_train[\"AccidentId\"].to_frame()\n",
-    "X_test_ids = X_test[\"AccidentId\"].to_frame()\n",
-    "X_train_secondary = X_train_ids.merge(vehicles_df, on=\"AccidentId\")\n",
-    "X_test_secondary = X_test_ids.merge(vehicles_df, on=\"AccidentId\")\n",
-    "\n",
-    "# Create the classifier specifying the key column name\n",
-    "khc = KhiopsClassifier(key=\"AccidentId\")\n",
-    "\n",
-    "# Train the classifier\n",
-    "khc.fit([X_train, X_train_secondary], y_train)\n",
-    "\n",
-    "# Predict the class on the test dataset\n",
-    "y_test_pred = khc.predict([X_test, X_test_secondary])\n",
-    "print(\"Predicted classes (first 10):\")\n",
-    "print(y_test_pred[:10])\n",
-    "print(\"---\")\n",
-    "\n",
-    "# Predict the class probability on the test dataset\n",
-    "y_test_probas = khc.predict_proba([X_test, X_test_secondary])\n",
-    "print(\"Predicted class probabilities (first 10):\")\n",
-    "print(y_test_probas[:10])\n",
-    "print(\"---\")\n",
-    "\n",
-    "# Evaluate accuracy and auc metrics on the test dataset\n",
-    "test_accuracy = metrics.accuracy_score(y_test, y_test_pred)\n",
-    "test_auc = metrics.roc_auc_score(y_test, y_test_probas[:, 1])\n",
-    "print(f\"Test accuracy = {test_accuracy}\")\n",
-    "print(f\"Test auc      = {test_auc}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### `khiops_classifier_multitable_star_file()`\n\n",
-    "Trains a `.KhiopsClassifier` with a file path based dataset\n\n    .. warning::\n        This dataset input method is **Deprecated** and will be removed in Khiops 11.\n        If you need to handle large datasets that do not easily fit into memory then you\n        may use the `~.khiops.core` API directly, which allows to specify file paths\n        directly.\n    \n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Imports\n",
-    "import os\n",
-    "import pandas as pd\n",
-    "from khiops import core as kh\n",
-    "from khiops.sklearn import KhiopsClassifier\n",
-    "from sklearn import metrics\n",
-    "from sklearn.model_selection import train_test_split\n",
-    "\n",
-    "# Create output directory\n",
-    "results_dir = os.path.join(\"kh_samples\", \"khiops_classifier_multitable_star_file\")\n",
-    "if not os.path.exists(\"kh_samples\"):\n",
-    "    os.mkdir(\"kh_samples\")\n",
-    "    os.mkdir(results_dir)\n",
-    "else:\n",
-    "    if not os.path.exists(results_dir):\n",
-    "        os.mkdir(results_dir)\n",
-    "\n",
-    "# Load the root table of the dataset into a pandas dataframe\n",
-    "accidents_dataset_path = os.path.join(kh.get_samples_dir(), \"AccidentsSummary\")\n",
-    "accidents_df = pd.read_csv(\n",
-    "    os.path.join(accidents_dataset_path, \"Accidents.txt\"),\n",
-    "    sep=\"\\t\",\n",
-    ")\n",
-    "\n",
-    "# Split the root dataframe into train and test\n",
-    "X_train_main, X_test_main = train_test_split(\n",
-    "    accidents_df, test_size=0.3, random_state=1\n",
-    ")\n",
-    "\n",
-    "# Load the secondary table of the dataset into a pandas dataframe\n",
-    "vehicles_df = pd.read_csv(\n",
-    "    os.path.join(accidents_dataset_path, \"Vehicles.txt\"), sep=\"\\t\"\n",
-    ")\n",
-    "\n",
-    "# Split the secondary dataframe with the keys of the split root dataframe\n",
-    "X_train_ids = X_train_main[\"AccidentId\"].to_frame()\n",
-    "X_test_ids = X_test_main[\"AccidentId\"].to_frame()\n",
-    "X_train_secondary = X_train_ids.merge(vehicles_df, on=\"AccidentId\")\n",
-    "X_test_secondary = X_test_ids.merge(vehicles_df, on=\"AccidentId\")\n",
-    "\n",
-    "# Write the train and test dataset sets to disk\n",
-    "# For the test file we remove the target column from the main table\n",
-    "X_train_main_path = os.path.join(results_dir, \"X_train_main.txt\")\n",
-    "X_train_main.to_csv(X_train_main_path, sep=\"\\t\", header=True, index=False)\n",
-    "X_train_secondary_path = os.path.join(results_dir, \"X_train_secondary.txt\")\n",
-    "X_train_secondary.to_csv(X_train_secondary_path, sep=\"\\t\", header=True, index=False)\n",
-    "X_test_main_path = os.path.join(results_dir, \"X_test_main.txt\")\n",
-    "y_test = X_test_main.sort_values(\"AccidentId\")[\"Gravity\"]\n",
-    "X_test_main.drop(columns=\"Gravity\").to_csv(\n",
-    "    X_test_main_path, sep=\"\\t\", header=True, index=False\n",
-    ")\n",
-    "X_test_secondary_path = os.path.join(results_dir, \"X_test_secondary.txt\")\n",
-    "X_test_secondary.to_csv(X_test_secondary_path, sep=\"\\t\", header=True, index=False)\n",
-    "\n",
-    "# Define the dictionary of train\n",
-    "X_train = {\n",
-    "    \"main_table\": \"Accidents\",\n",
-    "    \"tables\": {\n",
-    "        \"Accidents\": (X_train_main_path, \"AccidentId\"),\n",
-    "        \"Vehicles\": (X_train_secondary_path, [\"AccidentId\", \"VehicleId\"]),\n",
-    "    },\n",
-    "    \"format\": (\"\\t\", True),\n",
-    "}\n",
-    "X_test = {\n",
-    "    \"main_table\": \"Accidents\",\n",
-    "    \"tables\": {\n",
-    "        \"Accidents\": (X_test_main_path, \"AccidentId\"),\n",
-    "        \"Vehicles\": (X_test_secondary_path, [\"AccidentId\", \"VehicleId\"]),\n",
-    "    },\n",
-    "    \"format\": (\"\\t\", True),\n",
-    "}\n",
-    "\n",
-    "# Create the classifier and fit it\n",
-    "khc = KhiopsClassifier(output_dir=results_dir)\n",
-    "khc.fit(X_train, y=\"Gravity\")\n",
-    "\n",
-    "# Predict the class in addition to the class probabilities on the test dataset\n",
-    "y_test_pred_path = khc.predict(X_test)\n",
-    "y_test_pred = pd.read_csv(y_test_pred_path, sep=\"\\t\")\n",
-    "print(\"Predicted classes (first 10):\")\n",
-    "print(y_test_pred[\"PredictedGravity\"].head(10))\n",
-    "print(\"---\")\n",
-    "\n",
-    "y_test_probas_path = khc.predict_proba(X_test)\n",
-    "y_test_probas = pd.read_csv(y_test_probas_path, sep=\"\\t\")\n",
-    "proba_columns = [col for col in y_test_probas if col.startswith(\"Prob\")]\n",
-    "print(\"Predicted class probabilities (first 10):\")\n",
-    "print(y_test_probas[proba_columns].head(10))\n",
-    "print(\"---\")\n",
-    "\n",
-    "# Evaluate accuracy and auc metrics on the test dataset\n",
-    "test_accuracy = metrics.accuracy_score(y_test, y_test_pred[\"PredictedGravity\"])\n",
-    "test_auc = metrics.roc_auc_score(y_test, y_test_probas[\"ProbGravityLethal\"])\n",
-    "print(f\"Test accuracy = {test_accuracy}\")\n",
-    "print(f\"Test auc      = {test_auc}\")"
-   ]
   }
  ],
  "metadata": {},