diff --git a/docs/examples/Multiple_Models_Interface_With_Error_Analysis.ipynb b/docs/examples/Multiple_Models_Interface_With_Error_Analysis.ipynb
index 13659c88..5b293086 100644
--- a/docs/examples/Multiple_Models_Interface_With_Error_Analysis.ipynb
+++ b/docs/examples/Multiple_Models_Interface_With_Error_Analysis.ipynb
@@ -208,7 +208,7 @@
"\n",
"* **n_estimators**: int, the number of estimators for bootstrap to compute subgroup stability metrics.\n",
"\n",
- "* **computation_mode**: str, 'default' or 'error_analysis'. Name of the computation mode. When a default computation mode measures metrics for sex_priv and sex_dis, an `error_analysis` mode measures metrics for (sex_priv, sex_priv_correct, sex_priv_incorrect) and (sex_dis, sex_dis_correct, sex_dis_incorrect). Therefore, a user can analyze how a model is certain about its incorrect predictions.\n",
+ "* **computation_mode**: str, 'default', 'error_analysis', or 'no_bootstrap'. Name of the computation mode. When a default computation mode measures metrics for sex_priv and sex_dis, an `error_analysis` mode measures metrics for (sex_priv, sex_priv_correct, sex_priv_incorrect) and (sex_dis, sex_dis_correct, sex_dis_incorrect). Therefore, a user can analyze how a model is certain about its incorrect predictions.\n",
"\n",
"* **sensitive_attributes_dct**: dict, a dictionary where keys are sensitive attribute names (including intersectional attributes), and values are disadvantaged values for these attributes. Intersectional attributes must include '&' between sensitive attributes. You do not need to specify disadvantaged values for intersectional groups since they will be derived from disadvantaged values in sensitive_attributes_dct for each separate sensitive attribute in this intersectional pair.\n",
"\n",
diff --git a/docs/examples/Multiple_Models_Interface_With_No_Bootstrap.ipynb b/docs/examples/Multiple_Models_Interface_With_No_Bootstrap.ipynb
new file mode 100644
index 00000000..cb38cd86
--- /dev/null
+++ b/docs/examples/Multiple_Models_Interface_With_No_Bootstrap.ipynb
@@ -0,0 +1,1313 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "id": "248cbed8",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-12-14T13:03:01.510778Z",
+ "start_time": "2024-12-14T13:03:01.021557Z"
+ }
+ },
+ "source": [
+ "%matplotlib inline\n",
+ "%load_ext autoreload\n",
+ "%autoreload 2"
+ ],
+ "outputs": [],
+ "execution_count": 1
+ },
+ {
+ "cell_type": "code",
+ "id": "7ec6cd08",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-12-14T13:03:01.527295Z",
+ "start_time": "2024-12-14T13:03:01.518339Z"
+ }
+ },
+ "source": [
+ "import os\n",
+ "import warnings\n",
+ "warnings.filterwarnings('ignore')\n",
+ "os.environ[\"PYTHONWARNINGS\"] = \"ignore\""
+ ],
+ "outputs": [],
+ "execution_count": 2
+ },
+ {
+ "cell_type": "code",
+ "id": "b8cb69f2",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-12-14T13:03:01.541905Z",
+ "start_time": "2024-12-14T13:03:01.532512Z"
+ }
+ },
+ "source": [
+ "cur_folder_name = os.getcwd().split('/')[-1]\n",
+ "if cur_folder_name != \"Virny\":\n",
+ " os.chdir(\"../..\")\n",
+ "\n",
+ "print('Current location: ', os.getcwd())"
+ ],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Current location: /Users/denys_herasymuk/UCU/4course_2term/Bachelor_Thesis/Code/Virny\n"
+ ]
+ }
+ ],
+ "execution_count": 3
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a578f2ab",
+ "metadata": {},
+ "source": "# Multiple Models Interface With No Bootstrap"
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2251a923",
+ "metadata": {},
+ "source": [
+ "In this example, we are going to conduct a deep performance profiling for 4 models. The only difference with the multiple models interface tutorial is the use of the `no_bootstrap` computation mode. This mode is useful in the case we do not need to measure stability abd uncertainty metrics, but want to measure only accuracy and classic fairness metrics. The computation mode disables the bootstrap approach and greatly speed-up metric computation.\n",
+ "\n",
+ "For that, we will use `compute_metrics_with_config` interface that can compute metrics for multiple models. Thus, we will need to do the next steps:\n",
+ "\n",
+ "* Initialize input variables\n",
+ "\n",
+ "* Compute subgroup metrics\n",
+ "\n",
+ "* Perform disparity metrics composition using the Metric Composer\n",
+ "\n",
+ "* Create static visualizations using the Metric Visualizer"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "606df34d",
+ "metadata": {},
+ "source": [
+ "## Import dependencies"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "id": "7a9241de",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-12-14T13:03:04.498252Z",
+ "start_time": "2024-12-14T13:03:01.672151Z"
+ }
+ },
+ "source": [
+ "import os\n",
+ "from pprint import pprint\n",
+ "from datetime import datetime, timezone\n",
+ "\n",
+ "from xgboost import XGBClassifier\n",
+ "from sklearn.tree import DecisionTreeClassifier\n",
+ "from sklearn.ensemble import RandomForestClassifier\n",
+ "from sklearn.linear_model import LogisticRegression\n",
+ "\n",
+ "from sklearn.compose import ColumnTransformer\n",
+ "from sklearn.preprocessing import OneHotEncoder\n",
+ "from sklearn.preprocessing import StandardScaler\n",
+ "\n",
+ "from virny.utils.custom_initializers import create_config_obj, read_model_metric_dfs, create_models_config_from_tuned_params_df\n",
+ "from virny.user_interfaces.multiple_models_api import compute_metrics_with_config\n",
+ "from virny.preprocessing.basic_preprocessing import preprocess_dataset\n",
+ "from virny.custom_classes.metrics_composer import MetricsComposer\n",
+ "from virny.utils.model_tuning_utils import tune_ML_models\n",
+ "from virny.datasets import CompasWithoutSensitiveAttrsDataset"
+ ],
+ "outputs": [],
+ "execution_count": 4
+ },
+ {
+ "cell_type": "markdown",
+ "id": "75699f5f",
+ "metadata": {},
+ "source": [
+ "## Initialize Input Variables"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e86f6556",
+ "metadata": {},
+ "source": [
+ "Based on the library flow, we need to create 3 input objects for a user interface:\n",
+ "\n",
+ "* A **config yaml** that is a file with configuration parameters for different user interfaces for metric computation.\n",
+ "\n",
+ "* A **dataset class** that is a wrapper above the user’s raw dataset that includes its descriptive attributes like a target column, numerical columns, categorical columns, etc. This class must be inherited from the BaseDataset class, which was created for user convenience.\n",
+ "\n",
+ "* Finally, a **models config** that is a Python dictionary, where keys are model names and values are initialized models for analysis. This dictionary helps conduct audits for different analysis modes and analyze different types of models."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "DATASET_SPLIT_SEED = 42\n",
+ "MODELS_TUNING_SEED = 42\n",
+ "TEST_SET_FRACTION = 0.2"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2024-12-14T13:03:04.522770Z",
+ "start_time": "2024-12-14T13:03:04.502996Z"
+ }
+ },
+ "id": "ce359a052925eb3a",
+ "outputs": [],
+ "execution_count": 5
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "models_params_for_tuning = {\n",
+ " 'DecisionTreeClassifier': {\n",
+ " 'model': DecisionTreeClassifier(random_state=MODELS_TUNING_SEED),\n",
+ " 'params': {\n",
+ " \"max_depth\": [20, 30],\n",
+ " \"min_samples_split\" : [0.1],\n",
+ " \"max_features\": ['sqrt'],\n",
+ " \"criterion\": [\"gini\", \"entropy\"]\n",
+ " }\n",
+ " },\n",
+ " 'LogisticRegression': {\n",
+ " 'model': LogisticRegression(random_state=MODELS_TUNING_SEED),\n",
+ " 'params': {\n",
+ " 'penalty': ['l2'],\n",
+ " 'C' : [0.0001, 0.1, 1, 100],\n",
+ " 'solver': ['newton-cg', 'lbfgs'],\n",
+ " 'max_iter': [250],\n",
+ " }\n",
+ " },\n",
+ " 'RandomForestClassifier': {\n",
+ " 'model': RandomForestClassifier(random_state=MODELS_TUNING_SEED),\n",
+ " 'params': {\n",
+ " \"max_depth\": [6, 10],\n",
+ " \"min_samples_leaf\": [1],\n",
+ " \"n_estimators\": [50, 100],\n",
+ " \"max_features\": [0.6]\n",
+ " }\n",
+ " },\n",
+ " 'XGBClassifier': {\n",
+ " 'model': XGBClassifier(random_state=MODELS_TUNING_SEED, verbosity=0),\n",
+ " 'params': {\n",
+ " 'learning_rate': [0.1],\n",
+ " 'n_estimators': [200],\n",
+ " 'max_depth': [5, 7],\n",
+ " 'lambda': [10, 100]\n",
+ " }\n",
+ " }\n",
+ "}"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2024-12-14T13:03:04.545549Z",
+ "start_time": "2024-12-14T13:03:04.527336Z"
+ }
+ },
+ "id": "2ece07ab7e3a9acc",
+ "outputs": [],
+ "execution_count": 6
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "### Create a config object"
+ ],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "1090a686532d96f5"
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "`compute_metrics_with_config` interface requires that your **yaml file** includes the following parameters:\n",
+ "\n",
+ "* **dataset_name**: str, a name of your dataset; it will be used to name files with metrics.\n",
+ "\n",
+ "* **computation_mode**: str, 'default', 'error_analysis', or 'no_bootstrap'. Name of the computation mode.\n",
+ "\n",
+ "* **random_state**: int, a seed to control the randomness of the whole model evaluation pipeline.\n",
+ "\n",
+ "* **sensitive_attributes_dct**: dict, a dictionary where keys are sensitive attribute names (including intersectional attributes), and values are disadvantaged values for these attributes. Intersectional attributes must include '&' between sensitive attributes. You do not need to specify disadvantaged values for intersectional groups since they will be derived from disadvantaged values in sensitive_attributes_dct for each separate sensitive attribute in this intersectional pair.\n",
+ "\n",
+ "Note that disadvantaged value in a sensitive attribute dictionary must be **the same as in the original dataset**. For example, when distinct values of the _sex_ column in the original dataset are 'F' and 'M', and after pre-processing they became 0 and 1 respectively, you still need to set a disadvantaged value as 'F' or 'M' in the sensitive attribute dictionary."
+ ],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "d0a03b8f5c5d0ea7"
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "ROOT_DIR = os.path.join('docs', 'examples')\n",
+ "config_yaml_path = os.path.join(ROOT_DIR, 'experiment_config.yaml')\n",
+ "config_yaml_content = \"\"\"\n",
+ "dataset_name: COMPAS_Without_Sensitive_Attributes\n",
+ "computation_mode: no_bootstrap\n",
+ "random_state: 42\n",
+ "sensitive_attributes_dct: {'sex': 1, 'race': 'African-American', 'sex&race': None}\n",
+ "\"\"\"\n",
+ "\n",
+ "with open(config_yaml_path, 'w', encoding='utf-8') as f:\n",
+ " f.write(config_yaml_content)"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2024-12-14T13:03:04.571877Z",
+ "start_time": "2024-12-14T13:03:04.552978Z"
+ }
+ },
+ "id": "af22ee06f1e3eb1a",
+ "outputs": [],
+ "execution_count": 7
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "config = create_config_obj(config_yaml_path=config_yaml_path)\n",
+ "SAVE_RESULTS_DIR_PATH = os.path.join(ROOT_DIR, 'results', f'{config.dataset_name}_Metrics_{datetime.now(timezone.utc).strftime(\"%Y%m%d__%H%M%S\")}')"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2024-12-14T13:03:04.601486Z",
+ "start_time": "2024-12-14T13:03:04.580723Z"
+ }
+ },
+ "id": "65181f72484bb92b",
+ "outputs": [],
+ "execution_count": 8
+ },
+ {
+ "cell_type": "markdown",
+ "id": "74f57422",
+ "metadata": {},
+ "source": [
+ "### Preprocess the dataset and create a BaseFlowDataset class"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "eed149cd",
+ "metadata": {},
+ "source": [
+ "Based on the BaseDataset class, your **dataset class** should include the following attributes:\n",
+ "\n",
+ "* **Obligatory attributes**: dataset, target, features, numerical_columns, categorical_columns\n",
+ "\n",
+ "* **Optional attributes**: X_data, y_data, columns_with_nulls\n",
+ "\n",
+ "For more details, please refer to the library documentation."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "id": "6c55c6a0",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-12-14T13:03:04.642822Z",
+ "start_time": "2024-12-14T13:03:04.606712Z"
+ }
+ },
+ "source": [
+ "data_loader = CompasWithoutSensitiveAttrsDataset()\n",
+ "data_loader.X_data[data_loader.X_data.columns[:5]].head()"
+ ],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ " juv_fel_count juv_misd_count juv_other_count priors_count \\\n",
+ "0 0.0 -2.340451 1.0 -15.010999 \n",
+ "1 0.0 0.000000 0.0 0.000000 \n",
+ "2 0.0 0.000000 0.0 0.000000 \n",
+ "3 0.0 0.000000 0.0 6.000000 \n",
+ "4 0.0 0.000000 0.0 7.513697 \n",
+ "\n",
+ " age_cat_25 - 45 \n",
+ "0 1 \n",
+ "1 1 \n",
+ "2 0 \n",
+ "3 1 \n",
+ "4 1 "
+ ],
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " juv_fel_count | \n",
+ " juv_misd_count | \n",
+ " juv_other_count | \n",
+ " priors_count | \n",
+ " age_cat_25 - 45 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 0.0 | \n",
+ " -2.340451 | \n",
+ " 1.0 | \n",
+ " -15.010999 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 0.0 | \n",
+ " 0.000000 | \n",
+ " 0.0 | \n",
+ " 0.000000 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 0.0 | \n",
+ " 0.000000 | \n",
+ " 0.0 | \n",
+ " 0.000000 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 0.0 | \n",
+ " 0.000000 | \n",
+ " 0.0 | \n",
+ " 6.000000 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 0.0 | \n",
+ " 0.000000 | \n",
+ " 0.0 | \n",
+ " 7.513697 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "execution_count": 9
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "column_transformer = ColumnTransformer(transformers=[\n",
+ " ('categorical_features', OneHotEncoder(handle_unknown='ignore', sparse_output=False), data_loader.categorical_columns),\n",
+ " ('numerical_features', StandardScaler(), data_loader.numerical_columns),\n",
+ "])"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2024-12-14T13:03:04.667830Z",
+ "start_time": "2024-12-14T13:03:04.649199Z"
+ }
+ },
+ "id": "ebbef5eaf9dc0943",
+ "outputs": [],
+ "execution_count": 10
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "base_flow_dataset = preprocess_dataset(data_loader=data_loader, \n",
+ " column_transformer=column_transformer,\n",
+ " sensitive_attributes_dct=config.sensitive_attributes_dct,\n",
+ " test_set_fraction=TEST_SET_FRACTION,\n",
+ " dataset_split_seed=DATASET_SPLIT_SEED)"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2024-12-14T13:03:04.712448Z",
+ "start_time": "2024-12-14T13:03:04.683336Z"
+ }
+ },
+ "id": "97ed4609effbf53f",
+ "outputs": [],
+ "execution_count": 11
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "### Tune models and create a models config for metrics computation"
+ ],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "d538119a04cb3d80"
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "tuned_params_df, models_config = tune_ML_models(models_params_for_tuning, base_flow_dataset, config.dataset_name, n_folds=3)\n",
+ "tuned_params_df"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2024-12-14T13:03:07.249834Z",
+ "start_time": "2024-12-14T13:03:04.743779Z"
+ }
+ },
+ "id": "782741c190a4690b",
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "2024/12/14, 15:03:04: Tuning DecisionTreeClassifier...\n",
+ "2024/12/14, 15:03:06: Tuning for DecisionTreeClassifier is finished [F1 score = 0.6554846983071245, Accuracy = 0.6575048862828714]\n",
+ "\n",
+ "2024/12/14, 15:03:06: Tuning LogisticRegression...\n",
+ "2024/12/14, 15:03:06: Tuning for LogisticRegression is finished [F1 score = 0.6483823116804864, Accuracy = 0.6520611566087312]\n",
+ "\n",
+ "2024/12/14, 15:03:06: Tuning RandomForestClassifier...\n",
+ "2024/12/14, 15:03:06: Tuning for RandomForestClassifier is finished [F1 score = 0.6582739175359259, Accuracy = 0.6601120816372682]\n",
+ "\n",
+ "2024/12/14, 15:03:06: Tuning XGBClassifier...\n",
+ "2024/12/14, 15:03:07: Tuning for XGBClassifier is finished [F1 score = 0.6649018515640065, Accuracy = 0.6669791262841636]\n",
+ "\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ " Dataset_Name Model_Name F1_Score \\\n",
+ "0 COMPAS_Without_Sensitive_Attributes DecisionTreeClassifier 0.655485 \n",
+ "1 COMPAS_Without_Sensitive_Attributes LogisticRegression 0.648382 \n",
+ "2 COMPAS_Without_Sensitive_Attributes RandomForestClassifier 0.658274 \n",
+ "3 COMPAS_Without_Sensitive_Attributes XGBClassifier 0.664902 \n",
+ "\n",
+ " Accuracy_Score Model_Best_Params \n",
+ "0 0.657505 {'criterion': 'gini', 'max_depth': 20, 'max_fe... \n",
+ "1 0.652061 {'C': 1, 'max_iter': 250, 'penalty': 'l2', 'so... \n",
+ "2 0.660112 {'max_depth': 10, 'max_features': 0.6, 'min_sa... \n",
+ "3 0.666979 {'lambda': 100, 'learning_rate': 0.1, 'max_dep... "
+ ],
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Dataset_Name | \n",
+ " Model_Name | \n",
+ " F1_Score | \n",
+ " Accuracy_Score | \n",
+ " Model_Best_Params | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " COMPAS_Without_Sensitive_Attributes | \n",
+ " DecisionTreeClassifier | \n",
+ " 0.655485 | \n",
+ " 0.657505 | \n",
+ " {'criterion': 'gini', 'max_depth': 20, 'max_fe... | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " COMPAS_Without_Sensitive_Attributes | \n",
+ " LogisticRegression | \n",
+ " 0.648382 | \n",
+ " 0.652061 | \n",
+ " {'C': 1, 'max_iter': 250, 'penalty': 'l2', 'so... | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " COMPAS_Without_Sensitive_Attributes | \n",
+ " RandomForestClassifier | \n",
+ " 0.658274 | \n",
+ " 0.660112 | \n",
+ " {'max_depth': 10, 'max_features': 0.6, 'min_sa... | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " COMPAS_Without_Sensitive_Attributes | \n",
+ " XGBClassifier | \n",
+ " 0.664902 | \n",
+ " 0.666979 | \n",
+ " {'lambda': 100, 'learning_rate': 0.1, 'max_dep... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "execution_count": 12
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "now = datetime.now(timezone.utc)\n",
+ "date_time_str = now.strftime(\"%Y%m%d__%H%M%S\")\n",
+ "tuned_df_path = os.path.join(ROOT_DIR, 'results', 'models_tuning', f'tuning_results_{config.dataset_name}_{date_time_str}.csv')\n",
+ "tuned_params_df.to_csv(tuned_df_path, sep=\",\", columns=tuned_params_df.columns, float_format=\"%.4f\", index=False)"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2024-12-14T13:03:07.325010Z",
+ "start_time": "2024-12-14T13:03:07.299713Z"
+ }
+ },
+ "id": "21ccc879c5c3e215",
+ "outputs": [],
+ "execution_count": 13
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Create models_config from the saved tuned_params_df for higher reliability"
+ ],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "2da2057228e94ae5"
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "models_config = create_models_config_from_tuned_params_df(models_params_for_tuning, tuned_df_path)\n",
+ "pprint(models_config)"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2024-12-14T13:03:07.406748Z",
+ "start_time": "2024-12-14T13:03:07.380171Z"
+ }
+ },
+ "id": "3b15f202741fa2ae",
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'DecisionTreeClassifier': DecisionTreeClassifier(max_depth=20, max_features='sqrt', min_samples_split=0.1,\n",
+ " random_state=42),\n",
+ " 'LogisticRegression': LogisticRegression(C=1, max_iter=250, random_state=42, solver='newton-cg'),\n",
+ " 'RandomForestClassifier': RandomForestClassifier(max_depth=10, max_features=0.6, random_state=42),\n",
+ " 'XGBClassifier': XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
+ " colsample_bylevel=None, colsample_bynode=None,\n",
+ " colsample_bytree=None, device=None, early_stopping_rounds=None,\n",
+ " enable_categorical=False, eval_metric=None, feature_types=None,\n",
+ " gamma=None, grow_policy=None, importance_type=None,\n",
+ " interaction_constraints=None, lambda=100, learning_rate=0.1,\n",
+ " max_bin=None, max_cat_threshold=None, max_cat_to_onehot=None,\n",
+ " max_delta_step=None, max_depth=5, max_leaves=None,\n",
+ " min_child_weight=None, missing=nan, monotone_constraints=None,\n",
+ " multi_strategy=None, n_estimators=200, n_jobs=None,\n",
+ " num_parallel_tree=None, ...)}\n"
+ ]
+ }
+ ],
+ "execution_count": 14
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f445b64a",
+ "metadata": {},
+ "source": [
+ "## Subgroup Metric Computation"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c3530f06",
+ "metadata": {},
+ "source": [
+ "After that we need to input the _BaseFlowDataset_ object, models config, and config yaml to a metric computation interface and execute it. The interface uses subgroup analyzers to compute different sets of metrics for each privileged and disadvantaged group. As for now, our library supports **Subgroup Variance Analyzer** and **Subgroup Error Analyzer**, but it is easily extensible to any other analyzers. When the variance and error analyzers complete metric computation, their metrics are combined, returned in a matrix format, and stored in a file if defined."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "id": "197eadaa",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-12-14T13:03:08.416589Z",
+ "start_time": "2024-12-14T13:03:07.470832Z"
+ }
+ },
+ "source": [
+ "metrics_dct = compute_metrics_with_config(base_flow_dataset, config, models_config, SAVE_RESULTS_DIR_PATH,\n",
+ " notebook_logs_stdout=True,\n",
+ " with_predict_proba=False)"
+ ],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Analyze multiple models: 0%| | 0/4 [00:00, ?it/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "70ce67b59cb54a2189aca8c529681a17"
+ }
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "execution_count": 15
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d6a8625a",
+ "metadata": {},
+ "source": [
+ "Look at several columns in top rows of computed metrics"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "id": "bea94683",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-12-14T13:03:08.479643Z",
+ "start_time": "2024-12-14T13:03:08.456241Z"
+ }
+ },
+ "source": [
+ "sample_model_metrics_df = metrics_dct[list(models_config.keys())[0]]\n",
+ "sample_model_metrics_df[sample_model_metrics_df.columns[:6]].head(20)"
+ ],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ " Metric overall sex_priv sex_dis race_priv race_dis\n",
+ "0 TPR 0.615711 0.493333 0.638889 0.517007 0.660494\n",
+ "1 TNR 0.723077 0.830882 0.690423 0.786517 0.669811\n",
+ "2 PPV 0.641593 0.616667 0.645408 0.571429 0.670846\n",
+ "3 FNR 0.384289 0.506667 0.361111 0.482993 0.339506\n",
+ "4 FPR 0.276923 0.169118 0.309577 0.213483 0.330189\n",
+ "5 Accuracy 0.675189 0.710900 0.666272 0.690821 0.665109\n",
+ "6 F1 0.628386 0.548148 0.642132 0.542857 0.665630\n",
+ "7 Selection-Rate 0.428030 0.284360 0.463905 0.321256 0.496885\n",
+ "8 Sample_Size 1056.000000 211.000000 845.000000 414.000000 642.000000"
+ ],
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Metric | \n",
+ " overall | \n",
+ " sex_priv | \n",
+ " sex_dis | \n",
+ " race_priv | \n",
+ " race_dis | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " TPR | \n",
+ " 0.615711 | \n",
+ " 0.493333 | \n",
+ " 0.638889 | \n",
+ " 0.517007 | \n",
+ " 0.660494 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " TNR | \n",
+ " 0.723077 | \n",
+ " 0.830882 | \n",
+ " 0.690423 | \n",
+ " 0.786517 | \n",
+ " 0.669811 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " PPV | \n",
+ " 0.641593 | \n",
+ " 0.616667 | \n",
+ " 0.645408 | \n",
+ " 0.571429 | \n",
+ " 0.670846 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " FNR | \n",
+ " 0.384289 | \n",
+ " 0.506667 | \n",
+ " 0.361111 | \n",
+ " 0.482993 | \n",
+ " 0.339506 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " FPR | \n",
+ " 0.276923 | \n",
+ " 0.169118 | \n",
+ " 0.309577 | \n",
+ " 0.213483 | \n",
+ " 0.330189 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " Accuracy | \n",
+ " 0.675189 | \n",
+ " 0.710900 | \n",
+ " 0.666272 | \n",
+ " 0.690821 | \n",
+ " 0.665109 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " F1 | \n",
+ " 0.628386 | \n",
+ " 0.548148 | \n",
+ " 0.642132 | \n",
+ " 0.542857 | \n",
+ " 0.665630 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " Selection-Rate | \n",
+ " 0.428030 | \n",
+ " 0.284360 | \n",
+ " 0.463905 | \n",
+ " 0.321256 | \n",
+ " 0.496885 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " Sample_Size | \n",
+ " 1056.000000 | \n",
+ " 211.000000 | \n",
+ " 845.000000 | \n",
+ " 414.000000 | \n",
+ " 642.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "execution_count": 16
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a7ff67e9",
+ "metadata": {},
+ "source": [
+ "## Disparity Metric Composition"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "274c97e2",
+ "metadata": {},
+ "source": [
+ "To compose disparity metrics, the Metric Composer should be applied. **Metric Composer** is responsible for the second stage of the model audit. Currently, it computes our custom error disparity, stability disparity, and uncertainty disparity metrics, but extending it for new disparity metrics is very simple. We noticed that more and more disparity metrics have appeared during the last decade, but most of them are based on the same group specific metrics. Hence, such a separation of group specific and disparity metrics computation allows us to experiment with different combinations of group specific metrics and avoid group metrics recomputation for a new set of disparity metrics."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "id": "f94a20dc",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-12-14T13:03:08.528522Z",
+ "start_time": "2024-12-14T13:03:08.504889Z"
+ }
+ },
+ "source": [
+ "models_metrics_dct = read_model_metric_dfs(SAVE_RESULTS_DIR_PATH, model_names=list(models_config.keys()))"
+ ],
+ "outputs": [],
+ "execution_count": 17
+ },
+ {
+ "cell_type": "code",
+ "id": "b04d06cf",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-12-14T13:03:08.565733Z",
+ "start_time": "2024-12-14T13:03:08.545162Z"
+ }
+ },
+ "source": [
+ "metrics_composer = MetricsComposer(models_metrics_dct, config.sensitive_attributes_dct)"
+ ],
+ "outputs": [],
+ "execution_count": 18
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e1a23ece",
+ "metadata": {},
+ "source": [
+ "Compute composed metrics"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "id": "be6ace22",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-12-14T13:03:08.607374Z",
+ "start_time": "2024-12-14T13:03:08.575786Z"
+ }
+ },
+ "source": [
+ "models_composed_metrics_df = metrics_composer.compose_metrics()"
+ ],
+ "outputs": [],
+ "execution_count": 19
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "models_composed_metrics_df"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2024-12-14T13:03:08.677252Z",
+ "start_time": "2024-12-14T13:03:08.653618Z"
+ }
+ },
+ "id": "a286da0406c6401d",
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ " Metric sex race sex&race \\\n",
+ "0 Accuracy_Difference -0.044628 -0.025712 -0.052464 \n",
+ "1 Equalized_Odds_FNR -0.145556 -0.143487 -0.139463 \n",
+ "2 Equalized_Odds_FPR 0.140459 0.116706 0.172376 \n",
+ "3 Statistical_Parity_Difference 0.179545 0.175629 0.212669 \n",
+ "4 Disparate_Impact 1.631400 1.546694 1.661918 \n",
+ "5 Equalized_Odds_TNR -0.140459 -0.116706 -0.172376 \n",
+ "6 Equalized_Odds_TPR 0.145556 0.143487 0.139463 \n",
+ "7 Accuracy_Difference -0.004364 0.000226 -0.016544 \n",
+ "8 Equalized_Odds_FNR -0.187374 -0.264613 -0.242125 \n",
+ "9 Equalized_Odds_FPR 0.100583 0.156102 0.187169 \n",
+ "10 Statistical_Parity_Difference 0.172439 0.249560 0.267602 \n",
+ "11 Disparate_Impact 1.596469 1.906296 1.914016 \n",
+ "12 Equalized_Odds_TNR -0.100583 -0.156102 -0.187169 \n",
+ "13 Equalized_Odds_TPR 0.187374 0.264613 0.242125 \n",
+ "14 Accuracy_Difference -0.032822 -0.018872 -0.024306 \n",
+ "15 Equalized_Odds_FNR -0.071010 -0.167486 -0.155195 \n",
+ "16 Equalized_Odds_FPR 0.084321 0.132888 0.146995 \n",
+ "17 Statistical_Parity_Difference 0.125034 0.205224 0.218215 \n",
+ "18 Disparate_Impact 1.361401 1.638817 1.648481 \n",
+ "19 Equalized_Odds_TNR -0.084321 -0.132888 -0.146995 \n",
+ "20 Equalized_Odds_TPR 0.071010 0.167486 0.155195 \n",
+ "21 Accuracy_Difference 0.005093 0.008398 -0.005309 \n",
+ "22 Equalized_Odds_FNR -0.126869 -0.168115 -0.153447 \n",
+ "23 Equalized_Odds_FPR 0.057137 0.086602 0.111180 \n",
+ "24 Statistical_Parity_Difference 0.127401 0.176554 0.195473 \n",
+ "25 Disparate_Impact 1.368242 1.518393 1.558798 \n",
+ "26 Equalized_Odds_TNR -0.057137 -0.086602 -0.111180 \n",
+ "27 Equalized_Odds_TPR 0.126869 0.168115 0.153447 \n",
+ "\n",
+ " Model_Name \n",
+ "0 DecisionTreeClassifier \n",
+ "1 DecisionTreeClassifier \n",
+ "2 DecisionTreeClassifier \n",
+ "3 DecisionTreeClassifier \n",
+ "4 DecisionTreeClassifier \n",
+ "5 DecisionTreeClassifier \n",
+ "6 DecisionTreeClassifier \n",
+ "7 LogisticRegression \n",
+ "8 LogisticRegression \n",
+ "9 LogisticRegression \n",
+ "10 LogisticRegression \n",
+ "11 LogisticRegression \n",
+ "12 LogisticRegression \n",
+ "13 LogisticRegression \n",
+ "14 RandomForestClassifier \n",
+ "15 RandomForestClassifier \n",
+ "16 RandomForestClassifier \n",
+ "17 RandomForestClassifier \n",
+ "18 RandomForestClassifier \n",
+ "19 RandomForestClassifier \n",
+ "20 RandomForestClassifier \n",
+ "21 XGBClassifier \n",
+ "22 XGBClassifier \n",
+ "23 XGBClassifier \n",
+ "24 XGBClassifier \n",
+ "25 XGBClassifier \n",
+ "26 XGBClassifier \n",
+ "27 XGBClassifier "
+ ],
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Metric | \n",
+ " sex | \n",
+ " race | \n",
+ " sex&race | \n",
+ " Model_Name | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Accuracy_Difference | \n",
+ " -0.044628 | \n",
+ " -0.025712 | \n",
+ " -0.052464 | \n",
+ " DecisionTreeClassifier | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Equalized_Odds_FNR | \n",
+ " -0.145556 | \n",
+ " -0.143487 | \n",
+ " -0.139463 | \n",
+ " DecisionTreeClassifier | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " Equalized_Odds_FPR | \n",
+ " 0.140459 | \n",
+ " 0.116706 | \n",
+ " 0.172376 | \n",
+ " DecisionTreeClassifier | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Statistical_Parity_Difference | \n",
+ " 0.179545 | \n",
+ " 0.175629 | \n",
+ " 0.212669 | \n",
+ " DecisionTreeClassifier | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Disparate_Impact | \n",
+ " 1.631400 | \n",
+ " 1.546694 | \n",
+ " 1.661918 | \n",
+ " DecisionTreeClassifier | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " Equalized_Odds_TNR | \n",
+ " -0.140459 | \n",
+ " -0.116706 | \n",
+ " -0.172376 | \n",
+ " DecisionTreeClassifier | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " Equalized_Odds_TPR | \n",
+ " 0.145556 | \n",
+ " 0.143487 | \n",
+ " 0.139463 | \n",
+ " DecisionTreeClassifier | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " Accuracy_Difference | \n",
+ " -0.004364 | \n",
+ " 0.000226 | \n",
+ " -0.016544 | \n",
+ " LogisticRegression | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " Equalized_Odds_FNR | \n",
+ " -0.187374 | \n",
+ " -0.264613 | \n",
+ " -0.242125 | \n",
+ " LogisticRegression | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " Equalized_Odds_FPR | \n",
+ " 0.100583 | \n",
+ " 0.156102 | \n",
+ " 0.187169 | \n",
+ " LogisticRegression | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " Statistical_Parity_Difference | \n",
+ " 0.172439 | \n",
+ " 0.249560 | \n",
+ " 0.267602 | \n",
+ " LogisticRegression | \n",
+ "
\n",
+ " \n",
+ " | 11 | \n",
+ " Disparate_Impact | \n",
+ " 1.596469 | \n",
+ " 1.906296 | \n",
+ " 1.914016 | \n",
+ " LogisticRegression | \n",
+ "
\n",
+ " \n",
+ " | 12 | \n",
+ " Equalized_Odds_TNR | \n",
+ " -0.100583 | \n",
+ " -0.156102 | \n",
+ " -0.187169 | \n",
+ " LogisticRegression | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " Equalized_Odds_TPR | \n",
+ " 0.187374 | \n",
+ " 0.264613 | \n",
+ " 0.242125 | \n",
+ " LogisticRegression | \n",
+ "
\n",
+ " \n",
+ " | 14 | \n",
+ " Accuracy_Difference | \n",
+ " -0.032822 | \n",
+ " -0.018872 | \n",
+ " -0.024306 | \n",
+ " RandomForestClassifier | \n",
+ "
\n",
+ " \n",
+ " | 15 | \n",
+ " Equalized_Odds_FNR | \n",
+ " -0.071010 | \n",
+ " -0.167486 | \n",
+ " -0.155195 | \n",
+ " RandomForestClassifier | \n",
+ "
\n",
+ " \n",
+ " | 16 | \n",
+ " Equalized_Odds_FPR | \n",
+ " 0.084321 | \n",
+ " 0.132888 | \n",
+ " 0.146995 | \n",
+ " RandomForestClassifier | \n",
+ "
\n",
+ " \n",
+ " | 17 | \n",
+ " Statistical_Parity_Difference | \n",
+ " 0.125034 | \n",
+ " 0.205224 | \n",
+ " 0.218215 | \n",
+ " RandomForestClassifier | \n",
+ "
\n",
+ " \n",
+ " | 18 | \n",
+ " Disparate_Impact | \n",
+ " 1.361401 | \n",
+ " 1.638817 | \n",
+ " 1.648481 | \n",
+ " RandomForestClassifier | \n",
+ "
\n",
+ " \n",
+ " | 19 | \n",
+ " Equalized_Odds_TNR | \n",
+ " -0.084321 | \n",
+ " -0.132888 | \n",
+ " -0.146995 | \n",
+ " RandomForestClassifier | \n",
+ "
\n",
+ " \n",
+ " | 20 | \n",
+ " Equalized_Odds_TPR | \n",
+ " 0.071010 | \n",
+ " 0.167486 | \n",
+ " 0.155195 | \n",
+ " RandomForestClassifier | \n",
+ "
\n",
+ " \n",
+ " | 21 | \n",
+ " Accuracy_Difference | \n",
+ " 0.005093 | \n",
+ " 0.008398 | \n",
+ " -0.005309 | \n",
+ " XGBClassifier | \n",
+ "
\n",
+ " \n",
+ " | 22 | \n",
+ " Equalized_Odds_FNR | \n",
+ " -0.126869 | \n",
+ " -0.168115 | \n",
+ " -0.153447 | \n",
+ " XGBClassifier | \n",
+ "
\n",
+ " \n",
+ " | 23 | \n",
+ " Equalized_Odds_FPR | \n",
+ " 0.057137 | \n",
+ " 0.086602 | \n",
+ " 0.111180 | \n",
+ " XGBClassifier | \n",
+ "
\n",
+ " \n",
+ " | 24 | \n",
+ " Statistical_Parity_Difference | \n",
+ " 0.127401 | \n",
+ " 0.176554 | \n",
+ " 0.195473 | \n",
+ " XGBClassifier | \n",
+ "
\n",
+ " \n",
+ " | 25 | \n",
+ " Disparate_Impact | \n",
+ " 1.368242 | \n",
+ " 1.518393 | \n",
+ " 1.558798 | \n",
+ " XGBClassifier | \n",
+ "
\n",
+ " \n",
+ " | 26 | \n",
+ " Equalized_Odds_TNR | \n",
+ " -0.057137 | \n",
+ " -0.086602 | \n",
+ " -0.111180 | \n",
+ " XGBClassifier | \n",
+ "
\n",
+ " \n",
+ " | 27 | \n",
+ " Equalized_Odds_TPR | \n",
+ " 0.126869 | \n",
+ " 0.168115 | \n",
+ " 0.153447 | \n",
+ " XGBClassifier | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "execution_count": 20
+ },
+ {
+ "cell_type": "code",
+ "id": "2326c129",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-12-14T13:03:08.707240Z",
+ "start_time": "2024-12-14T13:03:08.705849Z"
+ }
+ },
+ "source": [],
+ "outputs": [],
+ "execution_count": null
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.10"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/examples/experiment_config.yaml b/docs/examples/experiment_config.yaml
index 6a134740..b0679c67 100644
--- a/docs/examples/experiment_config.yaml
+++ b/docs/examples/experiment_config.yaml
@@ -1,6 +1,5 @@
+dataset_name: COMPAS_Without_Sensitive_Attributes
+computation_mode: no_bootstrap
random_state: 42
-dataset_name: diabetes
-bootstrap_fraction: 0.8
-n_estimators: 10 # Better to input the higher number of estimators than 100; this is only for this use case example
-sensitive_attributes_dct: {'Gender': 'Female'}
+sensitive_attributes_dct: {'sex': 1, 'race': 'African-American', 'sex&race': None}
diff --git a/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130143/Metrics_COMPAS_Without_Sensitive_Attributes_DecisionTreeClassifier_1_Estimators_20241214__130145.csv b/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130143/Metrics_COMPAS_Without_Sensitive_Attributes_DecisionTreeClassifier_1_Estimators_20241214__130145.csv
new file mode 100644
index 00000000..1acd72f2
--- /dev/null
+++ b/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130143/Metrics_COMPAS_Without_Sensitive_Attributes_DecisionTreeClassifier_1_Estimators_20241214__130145.csv
@@ -0,0 +1,10 @@
+Metric,overall,sex_priv,sex_dis,race_priv,race_dis,sex&race_priv,sex&race_dis,Model_Name,Virny_Random_State,Model_Params,Runtime_in_Mins
+TPR,0.6157112526539278,0.49333333333333335,0.6388888888888888,0.5170068027210885,0.6604938271604939,0.5319148936170213,0.6713780918727915,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.001245
+TNR,0.7230769230769231,0.8308823529411765,0.6904231625835189,0.7865168539325843,0.6698113207547169,0.7958579881656804,0.6234817813765182,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.001245
+PPV,0.6415929203539823,0.6166666666666667,0.6454081632653061,0.5714285714285714,0.670846394984326,0.591715976331361,0.6713780918727915,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.001245
+FNR,0.3842887473460722,0.5066666666666667,0.3611111111111111,0.48299319727891155,0.3395061728395062,0.46808510638297873,0.3286219081272085,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.001245
+FPR,0.27692307692307694,0.16911764705882354,0.30957683741648107,0.21348314606741572,0.330188679245283,0.20414201183431951,0.3765182186234818,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.001245
+Accuracy,0.6751893939393939,0.7109004739336493,0.6662721893491125,0.6908212560386473,0.6651090342679128,0.7015209125475285,0.6490566037735849,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.001245
+F1,0.628385698808234,0.5481481481481482,0.6421319796954315,0.5428571428571428,0.6656298600311042,0.5602240896358543,0.6713780918727915,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.001245
+Selection-Rate,0.42803030303030304,0.2843601895734597,0.463905325443787,0.321256038647343,0.4968847352024922,0.32129277566539927,0.5339622641509434,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.001245
+Sample_Size,1056.0,211.0,845.0,414.0,642.0,526.0,530.0,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.001245
diff --git a/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130143/Metrics_COMPAS_Without_Sensitive_Attributes_LogisticRegression_1_Estimators_20241214__130145.csv b/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130143/Metrics_COMPAS_Without_Sensitive_Attributes_LogisticRegression_1_Estimators_20241214__130145.csv
new file mode 100644
index 00000000..b34d607c
--- /dev/null
+++ b/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130143/Metrics_COMPAS_Without_Sensitive_Attributes_LogisticRegression_1_Estimators_20241214__130145.csv
@@ -0,0 +1,10 @@
+Metric,overall,sex_priv,sex_dis,race_priv,race_dis,sex&race_priv,sex&race_dis,Model_Name,Virny_Random_State,Model_Params,Runtime_in_Mins
+TPR,0.6242038216560509,0.4666666666666667,0.6540404040404041,0.4421768707482993,0.7067901234567902,0.4787234042553192,0.7208480565371025,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.00319945
+TNR,0.7316239316239316,0.8088235294117647,0.7082405345211581,0.8164794007490637,0.660377358490566,0.8106508875739645,0.6234817813765182,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.00319945
+PPV,0.6518847006651884,0.5737704918032787,0.6641025641025641,0.5701754385964912,0.6795252225519288,0.5844155844155844,0.6868686868686869,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.00319945
+FNR,0.37579617834394907,0.5333333333333333,0.34595959595959597,0.5578231292517006,0.2932098765432099,0.5212765957446809,0.2791519434628975,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.00319945
+FPR,0.26837606837606837,0.19117647058823528,0.29175946547884185,0.18352059925093633,0.33962264150943394,0.1893491124260355,0.3765182186234818,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.00319945
+Accuracy,0.6837121212121212,0.6872037914691943,0.6828402366863905,0.6835748792270532,0.6838006230529595,0.6920152091254753,0.6754716981132075,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.00319945
+F1,0.6377440347071583,0.5147058823529411,0.6590330788804071,0.49808429118773945,0.6928895612708018,0.5263157894736842,0.7034482758620689,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.00319945
+Selection-Rate,0.4270833333333333,0.2890995260663507,0.46153846153846156,0.2753623188405797,0.5249221183800623,0.29277566539923955,0.560377358490566,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.00319945
+Sample_Size,1056.0,211.0,845.0,414.0,642.0,526.0,530.0,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.00319945
diff --git a/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130143/Metrics_COMPAS_Without_Sensitive_Attributes_RandomForestClassifier_1_Estimators_20241214__130145.csv b/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130143/Metrics_COMPAS_Without_Sensitive_Attributes_RandomForestClassifier_1_Estimators_20241214__130145.csv
new file mode 100644
index 00000000..b4a90a0b
--- /dev/null
+++ b/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130143/Metrics_COMPAS_Without_Sensitive_Attributes_RandomForestClassifier_1_Estimators_20241214__130145.csv
@@ -0,0 +1,10 @@
+Metric,overall,sex_priv,sex_dis,race_priv,race_dis,sex&race_priv,sex&race_dis,Model_Name,Virny_Random_State,Model_Params,Runtime_in_Mins
+TPR,0.673036093418259,0.6133333333333333,0.6843434343434344,0.5578231292517006,0.7253086419753086,0.5797872340425532,0.734982332155477,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005280633333333334
+TNR,0.7367521367521368,0.8014705882352942,0.7171492204899778,0.8089887640449438,0.6761006289308176,0.7988165680473372,0.6518218623481782,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005280633333333334
+PPV,0.673036093418259,0.6301369863013698,0.6809045226130653,0.6165413533834586,0.6952662721893491,0.615819209039548,0.7074829931972789,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005280633333333334
+FNR,0.32696390658174096,0.38666666666666666,0.31565656565656564,0.4421768707482993,0.27469135802469136,0.42021276595744683,0.26501766784452296,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005280633333333334
+FPR,0.26324786324786326,0.19852941176470587,0.2828507795100223,0.19101123595505617,0.3238993710691824,0.20118343195266272,0.3481781376518219,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005280633333333334
+Accuracy,0.7083333333333334,0.7345971563981043,0.7017751479289941,0.7198067632850241,0.7009345794392523,0.720532319391635,0.6962264150943396,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005280633333333334
+F1,0.673036093418259,0.6216216216216216,0.6826196473551638,0.5857142857142857,0.7099697885196374,0.5972602739726027,0.7209705372616985,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005280633333333334
+Selection-Rate,0.4460227272727273,0.3459715639810427,0.4710059171597633,0.321256038647343,0.5264797507788161,0.3365019011406844,0.5547169811320755,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005280633333333334
+Sample_Size,1056.0,211.0,845.0,414.0,642.0,526.0,530.0,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005280633333333334
diff --git a/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130143/Metrics_COMPAS_Without_Sensitive_Attributes_XGBClassifier_1_Estimators_20241214__130145.csv b/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130143/Metrics_COMPAS_Without_Sensitive_Attributes_XGBClassifier_1_Estimators_20241214__130145.csv
new file mode 100644
index 00000000..1cd21a91
--- /dev/null
+++ b/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130143/Metrics_COMPAS_Without_Sensitive_Attributes_XGBClassifier_1_Estimators_20241214__130145.csv
@@ -0,0 +1,10 @@
+Metric,overall,sex_priv,sex_dis,race_priv,race_dis,sex&race_priv,sex&race_dis,Model_Name,Virny_Random_State,Model_Params,Runtime_in_Mins
+TPR,0.6666666666666666,0.56,0.6868686868686869,0.5510204081632653,0.7191358024691358,0.574468085106383,0.7279151943462897,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.007809566666666667
+TNR,0.7282051282051282,0.7720588235294118,0.7149220489977728,0.7752808988764045,0.6886792452830188,0.7751479289940828,0.6639676113360324,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.007809566666666667
+PPV,0.6638477801268499,0.5753424657534246,0.68,0.574468085106383,0.7018072289156626,0.5869565217391305,0.71280276816609,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.007809566666666667
+FNR,0.3333333333333333,0.44,0.31313131313131315,0.4489795918367347,0.2808641975308642,0.425531914893617,0.27208480565371024,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.007809566666666667
+FPR,0.2717948717948718,0.22794117647058823,0.28507795100222716,0.2247191011235955,0.3113207547169811,0.22485207100591717,0.3360323886639676,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.007809566666666667
+Accuracy,0.7007575757575758,0.6966824644549763,0.7017751479289941,0.6956521739130435,0.7040498442367601,0.7034220532319392,0.6981132075471698,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.007809566666666667
+F1,0.6652542372881356,0.5675675675675675,0.6834170854271356,0.5625,0.7103658536585366,0.5806451612903226,0.7202797202797203,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.007809566666666667
+Selection-Rate,0.4479166666666667,0.3459715639810427,0.47337278106508873,0.34057971014492755,0.5171339563862928,0.34980988593155893,0.5452830188679245,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.007809566666666667
+Sample_Size,1056.0,211.0,845.0,414.0,642.0,526.0,530.0,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.007809566666666667
diff --git a/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130304/Metrics_COMPAS_Without_Sensitive_Attributes_DecisionTreeClassifier_1_Estimators_20241214__130307.csv b/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130304/Metrics_COMPAS_Without_Sensitive_Attributes_DecisionTreeClassifier_1_Estimators_20241214__130307.csv
new file mode 100644
index 00000000..dbeb7662
--- /dev/null
+++ b/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130304/Metrics_COMPAS_Without_Sensitive_Attributes_DecisionTreeClassifier_1_Estimators_20241214__130307.csv
@@ -0,0 +1,10 @@
+Metric,overall,sex_priv,sex_dis,race_priv,race_dis,sex&race_priv,sex&race_dis,Model_Name,Virny_Random_State,Model_Params,Runtime_in_Mins
+TPR,0.6157112526539278,0.49333333333333335,0.6388888888888888,0.5170068027210885,0.6604938271604939,0.5319148936170213,0.6713780918727915,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.0013096833333333332
+TNR,0.7230769230769231,0.8308823529411765,0.6904231625835189,0.7865168539325843,0.6698113207547169,0.7958579881656804,0.6234817813765182,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.0013096833333333332
+PPV,0.6415929203539823,0.6166666666666667,0.6454081632653061,0.5714285714285714,0.670846394984326,0.591715976331361,0.6713780918727915,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.0013096833333333332
+FNR,0.3842887473460722,0.5066666666666667,0.3611111111111111,0.48299319727891155,0.3395061728395062,0.46808510638297873,0.3286219081272085,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.0013096833333333332
+FPR,0.27692307692307694,0.16911764705882354,0.30957683741648107,0.21348314606741572,0.330188679245283,0.20414201183431951,0.3765182186234818,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.0013096833333333332
+Accuracy,0.6751893939393939,0.7109004739336493,0.6662721893491125,0.6908212560386473,0.6651090342679128,0.7015209125475285,0.6490566037735849,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.0013096833333333332
+F1,0.628385698808234,0.5481481481481482,0.6421319796954315,0.5428571428571428,0.6656298600311042,0.5602240896358543,0.6713780918727915,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.0013096833333333332
+Selection-Rate,0.42803030303030304,0.2843601895734597,0.463905325443787,0.321256038647343,0.4968847352024922,0.32129277566539927,0.5339622641509434,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.0013096833333333332
+Sample_Size,1056.0,211.0,845.0,414.0,642.0,526.0,530.0,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.0013096833333333332
diff --git a/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130304/Metrics_COMPAS_Without_Sensitive_Attributes_LogisticRegression_1_Estimators_20241214__130307.csv b/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130304/Metrics_COMPAS_Without_Sensitive_Attributes_LogisticRegression_1_Estimators_20241214__130307.csv
new file mode 100644
index 00000000..15d95060
--- /dev/null
+++ b/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130304/Metrics_COMPAS_Without_Sensitive_Attributes_LogisticRegression_1_Estimators_20241214__130307.csv
@@ -0,0 +1,10 @@
+Metric,overall,sex_priv,sex_dis,race_priv,race_dis,sex&race_priv,sex&race_dis,Model_Name,Virny_Random_State,Model_Params,Runtime_in_Mins
+TPR,0.6242038216560509,0.4666666666666667,0.6540404040404041,0.4421768707482993,0.7067901234567902,0.4787234042553192,0.7208480565371025,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.0014195333333333333
+TNR,0.7316239316239316,0.8088235294117647,0.7082405345211581,0.8164794007490637,0.660377358490566,0.8106508875739645,0.6234817813765182,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.0014195333333333333
+PPV,0.6518847006651884,0.5737704918032787,0.6641025641025641,0.5701754385964912,0.6795252225519288,0.5844155844155844,0.6868686868686869,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.0014195333333333333
+FNR,0.37579617834394907,0.5333333333333333,0.34595959595959597,0.5578231292517006,0.2932098765432099,0.5212765957446809,0.2791519434628975,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.0014195333333333333
+FPR,0.26837606837606837,0.19117647058823528,0.29175946547884185,0.18352059925093633,0.33962264150943394,0.1893491124260355,0.3765182186234818,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.0014195333333333333
+Accuracy,0.6837121212121212,0.6872037914691943,0.6828402366863905,0.6835748792270532,0.6838006230529595,0.6920152091254753,0.6754716981132075,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.0014195333333333333
+F1,0.6377440347071583,0.5147058823529411,0.6590330788804071,0.49808429118773945,0.6928895612708018,0.5263157894736842,0.7034482758620689,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.0014195333333333333
+Selection-Rate,0.4270833333333333,0.2890995260663507,0.46153846153846156,0.2753623188405797,0.5249221183800623,0.29277566539923955,0.560377358490566,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.0014195333333333333
+Sample_Size,1056.0,211.0,845.0,414.0,642.0,526.0,530.0,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.0014195333333333333
diff --git a/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130304/Metrics_COMPAS_Without_Sensitive_Attributes_RandomForestClassifier_1_Estimators_20241214__130307.csv b/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130304/Metrics_COMPAS_Without_Sensitive_Attributes_RandomForestClassifier_1_Estimators_20241214__130307.csv
new file mode 100644
index 00000000..a95ee68d
--- /dev/null
+++ b/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130304/Metrics_COMPAS_Without_Sensitive_Attributes_RandomForestClassifier_1_Estimators_20241214__130307.csv
@@ -0,0 +1,10 @@
+Metric,overall,sex_priv,sex_dis,race_priv,race_dis,sex&race_priv,sex&race_dis,Model_Name,Virny_Random_State,Model_Params,Runtime_in_Mins
+TPR,0.673036093418259,0.6133333333333333,0.6843434343434344,0.5578231292517006,0.7253086419753086,0.5797872340425532,0.734982332155477,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005975766666666666
+TNR,0.7367521367521368,0.8014705882352942,0.7171492204899778,0.8089887640449438,0.6761006289308176,0.7988165680473372,0.6518218623481782,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005975766666666666
+PPV,0.673036093418259,0.6301369863013698,0.6809045226130653,0.6165413533834586,0.6952662721893491,0.615819209039548,0.7074829931972789,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005975766666666666
+FNR,0.32696390658174096,0.38666666666666666,0.31565656565656564,0.4421768707482993,0.27469135802469136,0.42021276595744683,0.26501766784452296,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005975766666666666
+FPR,0.26324786324786326,0.19852941176470587,0.2828507795100223,0.19101123595505617,0.3238993710691824,0.20118343195266272,0.3481781376518219,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005975766666666666
+Accuracy,0.7083333333333334,0.7345971563981043,0.7017751479289941,0.7198067632850241,0.7009345794392523,0.720532319391635,0.6962264150943396,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005975766666666666
+F1,0.673036093418259,0.6216216216216216,0.6826196473551638,0.5857142857142857,0.7099697885196374,0.5972602739726027,0.7209705372616985,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005975766666666666
+Selection-Rate,0.4460227272727273,0.3459715639810427,0.4710059171597633,0.321256038647343,0.5264797507788161,0.3365019011406844,0.5547169811320755,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005975766666666666
+Sample_Size,1056.0,211.0,845.0,414.0,642.0,526.0,530.0,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005975766666666666
diff --git a/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130304/Metrics_COMPAS_Without_Sensitive_Attributes_XGBClassifier_1_Estimators_20241214__130307.csv b/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130304/Metrics_COMPAS_Without_Sensitive_Attributes_XGBClassifier_1_Estimators_20241214__130307.csv
new file mode 100644
index 00000000..7904f20b
--- /dev/null
+++ b/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130304/Metrics_COMPAS_Without_Sensitive_Attributes_XGBClassifier_1_Estimators_20241214__130307.csv
@@ -0,0 +1,10 @@
+Metric,overall,sex_priv,sex_dis,race_priv,race_dis,sex&race_priv,sex&race_dis,Model_Name,Virny_Random_State,Model_Params,Runtime_in_Mins
+TPR,0.6666666666666666,0.56,0.6868686868686869,0.5510204081632653,0.7191358024691358,0.574468085106383,0.7279151943462897,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.006558716666666667
+TNR,0.7282051282051282,0.7720588235294118,0.7149220489977728,0.7752808988764045,0.6886792452830188,0.7751479289940828,0.6639676113360324,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.006558716666666667
+PPV,0.6638477801268499,0.5753424657534246,0.68,0.574468085106383,0.7018072289156626,0.5869565217391305,0.71280276816609,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.006558716666666667
+FNR,0.3333333333333333,0.44,0.31313131313131315,0.4489795918367347,0.2808641975308642,0.425531914893617,0.27208480565371024,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.006558716666666667
+FPR,0.2717948717948718,0.22794117647058823,0.28507795100222716,0.2247191011235955,0.3113207547169811,0.22485207100591717,0.3360323886639676,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.006558716666666667
+Accuracy,0.7007575757575758,0.6966824644549763,0.7017751479289941,0.6956521739130435,0.7040498442367601,0.7034220532319392,0.6981132075471698,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.006558716666666667
+F1,0.6652542372881356,0.5675675675675675,0.6834170854271356,0.5625,0.7103658536585366,0.5806451612903226,0.7202797202797203,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.006558716666666667
+Selection-Rate,0.4479166666666667,0.3459715639810427,0.47337278106508873,0.34057971014492755,0.5171339563862928,0.34980988593155893,0.5452830188679245,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.006558716666666667
+Sample_Size,1056.0,211.0,845.0,414.0,642.0,526.0,530.0,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.006558716666666667
diff --git a/docs/examples/results/models_tuning/tuning_results_COMPAS_Without_Sensitive_Attributes_20241214__130145.csv b/docs/examples/results/models_tuning/tuning_results_COMPAS_Without_Sensitive_Attributes_20241214__130145.csv
new file mode 100644
index 00000000..843621ef
--- /dev/null
+++ b/docs/examples/results/models_tuning/tuning_results_COMPAS_Without_Sensitive_Attributes_20241214__130145.csv
@@ -0,0 +1,5 @@
+Dataset_Name,Model_Name,F1_Score,Accuracy_Score,Model_Best_Params
+COMPAS_Without_Sensitive_Attributes,DecisionTreeClassifier,0.6555,0.6575,"{'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'min_samples_split': 0.1}"
+COMPAS_Without_Sensitive_Attributes,LogisticRegression,0.6484,0.6521,"{'C': 1, 'max_iter': 250, 'penalty': 'l2', 'solver': 'newton-cg'}"
+COMPAS_Without_Sensitive_Attributes,RandomForestClassifier,0.6583,0.6601,"{'max_depth': 10, 'max_features': 0.6, 'min_samples_leaf': 1, 'n_estimators': 100}"
+COMPAS_Without_Sensitive_Attributes,XGBClassifier,0.6649,0.6670,"{'lambda': 100, 'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 200}"
diff --git a/docs/examples/results/models_tuning/tuning_results_COMPAS_Without_Sensitive_Attributes_20241214__130307.csv b/docs/examples/results/models_tuning/tuning_results_COMPAS_Without_Sensitive_Attributes_20241214__130307.csv
new file mode 100644
index 00000000..843621ef
--- /dev/null
+++ b/docs/examples/results/models_tuning/tuning_results_COMPAS_Without_Sensitive_Attributes_20241214__130307.csv
@@ -0,0 +1,5 @@
+Dataset_Name,Model_Name,F1_Score,Accuracy_Score,Model_Best_Params
+COMPAS_Without_Sensitive_Attributes,DecisionTreeClassifier,0.6555,0.6575,"{'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'min_samples_split': 0.1}"
+COMPAS_Without_Sensitive_Attributes,LogisticRegression,0.6484,0.6521,"{'C': 1, 'max_iter': 250, 'penalty': 'l2', 'solver': 'newton-cg'}"
+COMPAS_Without_Sensitive_Attributes,RandomForestClassifier,0.6583,0.6601,"{'max_depth': 10, 'max_features': 0.6, 'min_samples_leaf': 1, 'n_estimators': 100}"
+COMPAS_Without_Sensitive_Attributes,XGBClassifier,0.6649,0.6670,"{'lambda': 100, 'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 200}"
diff --git a/setup.py b/setup.py
index a4200a77..f688a888 100644
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,5 @@
import os
import pathlib
-import pkg_resources
# Always prefer setuptools over distutils
from setuptools import setup, find_packages
@@ -30,9 +29,9 @@
with pathlib.Path('requirements.txt').open() as requirements:
base_packages = [
- str(requirement)
- for requirement
- in pkg_resources.parse_requirements(requirements)
+ line.strip()
+ for line in requirements
+ if line.strip() and not line.strip().startswith('#')
]
# This call to setup() does all the work
diff --git a/virny/analyzers/abstract_overall_variance_analyzer.py b/virny/analyzers/abstract_overall_variance_analyzer.py
index 9454b6a2..6b6f18ed 100644
--- a/virny/analyzers/abstract_overall_variance_analyzer.py
+++ b/virny/analyzers/abstract_overall_variance_analyzer.py
@@ -51,8 +51,8 @@ class AbstractOverallVarianceAnalyzer(metaclass=ABCMeta):
def __init__(self, base_model, base_model_name: str, bootstrap_fraction: float,
X_train: pd.DataFrame, y_train: pd.DataFrame, X_test: pd.DataFrame, y_test: pd.DataFrame,
- dataset_name: str, n_estimators: int, random_state: int = None, with_predict_proba: bool = True,
- notebook_logs_stdout: bool = False, verbose: int = 0):
+ dataset_name: str, n_estimators: int, random_state: int = None, computation_mode: str = None,
+ with_predict_proba: bool = True, notebook_logs_stdout: bool = False, verbose: int = 0):
self.base_model = base_model
self.base_model_name = base_model_name
self.bootstrap_fraction = bootstrap_fraction
@@ -60,6 +60,7 @@ def __init__(self, base_model, base_model_name: str, bootstrap_fraction: float,
self.n_estimators = n_estimators
self.models_lst = [deepcopy(base_model) for _ in range(n_estimators)]
self.random_state = random_state
+ self.computation_mode = computation_mode
self.with_predict_proba = with_predict_proba
self.models_predictions = None
self.prediction_metrics = None
@@ -102,7 +103,10 @@ def compute_metrics(self, save_results: bool = True, with_fit: bool = True):
self.models_predictions = self.UQ_by_boostrap(boostrap_size, with_replacement=True, with_fit=with_fit)
# Count metrics based on prediction proba results
- y_preds, self.prediction_metrics = count_prediction_metrics(self.y_test.values, self.models_predictions, self.with_predict_proba)
+ y_preds, self.prediction_metrics = count_prediction_metrics(y_true=self.y_test.values,
+ uq_results=self.models_predictions,
+ computation_mode=self.computation_mode,
+ with_predict_proba=self.with_predict_proba)
self._logger.info(f'Successfully computed predict proba metrics')
if save_results:
@@ -138,7 +142,7 @@ def UQ_by_boostrap(self, boostrap_size: int, with_replacement: bool, with_fit: b
else:
from tqdm import tqdm
- cycle_range = range(self.n_estimators) if with_fit is False else \
+ cycle_range = range(self.n_estimators) if self._notebook_logs_stdout is None or with_fit is False or self.n_estimators == 1 else \
tqdm(range(self.n_estimators),
desc="Classifiers testing by bootstrap",
colour="blue",
diff --git a/virny/analyzers/batch_overall_variance_analyzer.py b/virny/analyzers/batch_overall_variance_analyzer.py
index ed01ade2..64d22b80 100644
--- a/virny/analyzers/batch_overall_variance_analyzer.py
+++ b/virny/analyzers/batch_overall_variance_analyzer.py
@@ -32,6 +32,8 @@ class BatchOverallVarianceAnalyzer(AbstractOverallVarianceAnalyzer):
Number of estimators in ensemble to measure base_model stability
random_state
[Optional] Controls the randomness of the bootstrap approach for model arbitrariness evaluation
+ computation_mode
+ [Optional] A non-default mode for metrics computation. Should be included in the ComputationMode enum.
with_predict_proba
[Optional] A flag if model can return probabilities for its predictions.
If no, only metrics based on labels (not labels and probabilities) will be computed.
@@ -46,7 +48,8 @@ class BatchOverallVarianceAnalyzer(AbstractOverallVarianceAnalyzer):
def __init__(self, base_model, base_model_name: str, bootstrap_fraction: float,
X_train: pd.DataFrame, y_train: pd.DataFrame, X_test: pd.DataFrame, y_test: pd.DataFrame,
target_column: str, dataset_name: str, n_estimators: int, random_state: int = None,
- with_predict_proba: bool = True, notebook_logs_stdout: bool = False, verbose: int = 0):
+ computation_mode: str = None, with_predict_proba: bool = True,
+ notebook_logs_stdout: bool = False, verbose: int = 0):
super().__init__(base_model=base_model,
base_model_name=base_model_name,
bootstrap_fraction=bootstrap_fraction,
@@ -57,6 +60,7 @@ def __init__(self, base_model, base_model_name: str, bootstrap_fraction: float,
dataset_name=dataset_name,
n_estimators=n_estimators,
random_state=random_state,
+ computation_mode=computation_mode,
with_predict_proba=with_predict_proba,
notebook_logs_stdout=notebook_logs_stdout,
verbose=verbose)
diff --git a/virny/analyzers/batch_overall_variance_analyzer_postprocessing.py b/virny/analyzers/batch_overall_variance_analyzer_postprocessing.py
index 56ea0bed..c25776b9 100644
--- a/virny/analyzers/batch_overall_variance_analyzer_postprocessing.py
+++ b/virny/analyzers/batch_overall_variance_analyzer_postprocessing.py
@@ -43,6 +43,8 @@ class BatchOverallVarianceAnalyzerPostProcessing(BatchOverallVarianceAnalyzer):
Number of estimators in ensemble to measure base_model stability
random_state
[Optional] Controls the randomness of the bootstrap approach for model arbitrariness evaluation
+ computation_mode
+ [Optional] A non-default mode for metrics computation. Should be included in the ComputationMode enum.
with_predict_proba
[Optional] A flag if model can return probabilities for its predictions.
If no, only metrics based on labels (not labels and probabilities) will be computed.
@@ -58,7 +60,8 @@ def __init__(self, postprocessor, sensitive_attribute: str,
base_model, base_model_name: str, bootstrap_fraction: float,
X_train: pd.DataFrame, y_train: pd.DataFrame, X_test: pd.DataFrame, y_test: pd.DataFrame,
target_column: str, dataset_name: str, n_estimators: int, random_state: int = None,
- with_predict_proba: bool = True, notebook_logs_stdout: bool = False, verbose: int = 0):
+ computation_mode: str = None, with_predict_proba: bool = True,
+ notebook_logs_stdout: bool = False, verbose: int = 0):
if sensitive_attribute is None:
raise ValueError('Sensitive attribute for postprocessing is not defined. '
'Please, set postprocessing_sensitive_attribute argument in the metric computation config.')
@@ -74,6 +77,7 @@ def __init__(self, postprocessor, sensitive_attribute: str,
dataset_name=dataset_name,
n_estimators=n_estimators,
random_state=random_state,
+ computation_mode=computation_mode,
with_predict_proba=with_predict_proba,
notebook_logs_stdout=notebook_logs_stdout,
verbose=verbose)
@@ -112,7 +116,7 @@ def UQ_by_boostrap(self, boostrap_size: int, with_replacement: bool, with_fit: b
else:
from tqdm import tqdm
- cycle_range = range(self.n_estimators) if with_fit is False else \
+ cycle_range = range(self.n_estimators) if self._notebook_logs_stdout is None or with_fit is False else \
tqdm(range(self.n_estimators),
desc="Classifiers testing by bootstrap",
colour="blue",
diff --git a/virny/analyzers/subgroup_variance_analyzer.py b/virny/analyzers/subgroup_variance_analyzer.py
index dae8b81d..ccba8447 100644
--- a/virny/analyzers/subgroup_variance_analyzer.py
+++ b/virny/analyzers/subgroup_variance_analyzer.py
@@ -1,6 +1,6 @@
import pandas as pd
-from virny.configs.constants import ModelSetting
+from virny.configs.constants import ModelSetting, ComputationMode
from virny.custom_classes.base_dataset import BaseFlowDataset
from virny.analyzers.subgroup_variance_calculator import SubgroupVarianceCalculator
from virny.analyzers.batch_overall_variance_analyzer import BatchOverallVarianceAnalyzer
@@ -76,6 +76,7 @@ def __init__(self, model_setting: ModelSetting, n_estimators: int, base_model, b
target_column=dataset.target,
n_estimators=n_estimators,
random_state=random_state,
+ computation_mode=computation_mode,
with_predict_proba=with_predict_proba,
notebook_logs_stdout=notebook_logs_stdout,
verbose=verbose)
@@ -91,12 +92,14 @@ def __init__(self, model_setting: ModelSetting, n_estimators: int, base_model, b
target_column=dataset.target,
n_estimators=n_estimators,
random_state=random_state,
+ computation_mode=computation_mode,
with_predict_proba=with_predict_proba,
notebook_logs_stdout=notebook_logs_stdout,
verbose=verbose)
else:
raise ValueError('model_setting is incorrect or not supported')
+ self.computation_mode = computation_mode
self.dataset_name = overall_variance_analyzer.dataset_name
self.n_estimators = overall_variance_analyzer.n_estimators
self.base_model_name = overall_variance_analyzer.base_model_name
@@ -161,11 +164,12 @@ def compute_metrics(self, save_results: bool, result_filename: str = None,
model_dct['postprocessor'] = self.__overall_variance_analyzer.postprocessors_lst[model_idx]
fitted_bootstrap.append(model_dct)
- # Count and display fairness metrics
+ # Count variance metrics for subgroups
self.__subgroup_variance_calculator.set_overall_variance_metrics(self.overall_variance_metrics_dct)
- self.subgroup_variance_metrics_dct = self.__subgroup_variance_calculator.compute_subgroup_metrics(
- y_preds, self.__overall_variance_analyzer.models_predictions,
- save_results, result_filename, save_dir_path
- )
+ self.subgroup_variance_metrics_dct = dict() if self.computation_mode == ComputationMode.NO_BOOTSTRAP.value else \
+ self.__subgroup_variance_calculator.compute_subgroup_metrics(
+ y_preds, self.__overall_variance_analyzer.models_predictions,
+ save_results, result_filename, save_dir_path
+ )
return y_preds, pd.DataFrame(self.subgroup_variance_metrics_dct), fitted_bootstrap
diff --git a/virny/analyzers/subgroup_variance_calculator.py b/virny/analyzers/subgroup_variance_calculator.py
index aa2c8af0..f91884e2 100644
--- a/virny/analyzers/subgroup_variance_calculator.py
+++ b/virny/analyzers/subgroup_variance_calculator.py
@@ -103,7 +103,9 @@ def _partition_and_compute_metrics_for_error_analysis(self, y_preds, models_pred
return results
def _compute_metrics(self, y_test: pd.DataFrame, group_models_predictions):
- _, prediction_metrics = count_prediction_metrics(y_test, group_models_predictions,
+ _, prediction_metrics = count_prediction_metrics(y_true=y_test,
+ uq_results=group_models_predictions,
+ computation_mode=self.computation_mode,
with_predict_proba=self.with_predict_proba)
return prediction_metrics
diff --git a/virny/configs/constants.py b/virny/configs/constants.py
index f237771a..f5735920 100644
--- a/virny/configs/constants.py
+++ b/virny/configs/constants.py
@@ -7,6 +7,7 @@ class ModelSetting(Enum):
class ComputationMode(Enum):
ERROR_ANALYSIS = "error_analysis"
+ NO_BOOTSTRAP = "no_bootstrap"
INTERSECTION_SIGN = '&'
diff --git a/virny/custom_classes/metrics_interactive_visualizer.py b/virny/custom_classes/metrics_interactive_visualizer.py
index 90124a1b..c1b6b1e1 100644
--- a/virny/custom_classes/metrics_interactive_visualizer.py
+++ b/virny/custom_classes/metrics_interactive_visualizer.py
@@ -180,7 +180,7 @@ def create_web_app(self, start_app=True):
overall_metric_max_val1 = gr.Text(value="1.0", label="Max value", scale=1)
with gr.Row():
disparity_metric1 = gr.Dropdown(
- sorted(self.all_disparity_metrics),
+ ['None'] + sorted(self.all_disparity_metrics),
value='Equalized_Odds_FPR', multiselect=False, label="Disparity Constraint (C2)",
scale=2
)
@@ -188,7 +188,7 @@ def create_web_app(self, start_app=True):
disparity_metric_max_val1 = gr.Text(value="1.0", label="Max value", scale=1)
with gr.Row():
overall_metric2 = gr.Dropdown(
- sorted(self.all_overall_metrics),
+ ['None'] + sorted(self.all_overall_metrics),
value='Label_Stability', multiselect=False, label="Overall Constraint (C3)",
scale=2
)
@@ -196,7 +196,7 @@ def create_web_app(self, start_app=True):
overall_metric_max_val2 = gr.Text(value="1.0", label="Max value", scale=1)
with gr.Row():
disparity_metric2 = gr.Dropdown(
- sorted(self.all_disparity_metrics),
+ ['None'] + sorted(self.all_disparity_metrics),
value='Label_Stability_Ratio', multiselect=False, label="Disparity Constraint (C4)",
scale=2
)
@@ -206,7 +206,7 @@ def create_web_app(self, start_app=True):
btn_view1 = gr.Button("Submit")
with gr.Column(scale=3):
bar_plot_for_model_selection = gr.Plot(label="Bar Chart")
- df_with_models_satisfied_all_constraints = gr.DataFrame(label='Models that satisfy all 4 constraints')
+ df_with_models_satisfied_all_constraints = gr.DataFrame(label='Models that satisfy all constraints')
btn_view1.click(self._create_bar_plot_for_model_selection,
inputs=[group_name,
@@ -575,13 +575,20 @@ def _create_bar_plot_for_model_selection(self, group_name, overall_metric1, over
# Create individual constraints
metrics_value_range_dct = dict()
for constraint in [overall_constraint1, disparity_constraint1, overall_constraint2, disparity_constraint2]:
- metrics_value_range_dct[constraint[0]] = [constraint[1], constraint[2]]
+ if constraint[0] != 'None':
+ metrics_value_range_dct[constraint[0]] = [constraint[1], constraint[2]]
+
# Create intersectional constraints
- metrics_value_range_dct[f'{overall_constraint1[0]}&{disparity_constraint1[0]}'] = None
- metrics_value_range_dct[f'{overall_constraint1[0]}&{overall_constraint2[0]}'] = None
- metrics_value_range_dct[f'{overall_constraint1[0]}&{disparity_constraint2[0]}'] = None
- metrics_value_range_dct[(f'{overall_constraint1[0]}&{disparity_constraint1[0]}'
- f'&{overall_constraint2[0]}&{disparity_constraint2[0]}')] = None
+ for constrain_pair in [(overall_constraint1[0], disparity_constraint1[0]),
+ (overall_constraint1[0], overall_constraint2[0]),
+ (overall_constraint1[0], disparity_constraint2[0])]:
+ if constrain_pair[0] != 'None' and constrain_pair[1] != 'None':
+ metrics_value_range_dct[f'{constrain_pair[0]}&{constrain_pair[1]}'] = None
+
+ all_constrains_str = '&'.join(
+ [c for c in [overall_constraint1[0], disparity_constraint1[0], overall_constraint2[0], disparity_constraint2[0]] if c != 'None']
+ )
+ metrics_value_range_dct[all_constrains_str] = None
melted_all_subgroup_metrics_per_model_dct = dict()
for model_name in self.melted_model_metrics_df['Model_Name'].unique():
@@ -597,6 +604,7 @@ def _create_bar_plot_for_model_selection(self, group_name, overall_metric1, over
melted_all_group_metrics_per_model_dct,
metrics_value_range_dct,
group=group_name,
+ num_constrains=all_constrains_str.count('&') + 1,
metric_name_to_alias_dct=metric_name_to_alias_dct)
def _create_subgroup_model_rank_heatmap(self, model_names: list, subgroup_accuracy_metrics_lst: list,
@@ -839,7 +847,7 @@ def _create_metrics_bar_chart_per_one_model(self, model_name: str, metrics_names
alt.layer(
models_metrics_chart, text, data=filtered_metrics_df
).properties(
- width=500,
+ width=280 if metrics_type == 'group' else 300,
height=100
).facet(
row=alt.Row('Metric:N', title=metrics_title, sort=metrics_names)
diff --git a/virny/custom_classes/wrappers/pytorch_tabular_wrapper.py b/virny/custom_classes/wrappers/pytorch_tabular_wrapper.py
index 9cb8f712..206c51c7 100644
--- a/virny/custom_classes/wrappers/pytorch_tabular_wrapper.py
+++ b/virny/custom_classes/wrappers/pytorch_tabular_wrapper.py
@@ -33,4 +33,4 @@ def predict_proba(self, X, seed: int):
return self.estimator.predict(X, tta_seed=seed).values
def predict(self, X, seed: int):
- return self.estimator.predict(X, tta_seed=seed)
+ return self.estimator.predict(X, tta_seed=seed).values[:, -1]
diff --git a/virny/datasets/folktables.py b/virny/datasets/folktables.py
index e9d45b7c..9dda082f 100644
--- a/virny/datasets/folktables.py
+++ b/virny/datasets/folktables.py
@@ -55,6 +55,7 @@ def __init__(self, state, year, root_dir=None, with_nulls=False, with_filter=Tru
acs_data = data_source.get_data(states=state, download=True)
if with_filter:
acs_data = adult_filter(acs_data)
+ acs_data = acs_data.reset_index(drop=True)
if subsample_size:
acs_data = acs_data.sample(subsample_size, random_state=subsample_seed) if subsample_seed is not None \
else acs_data.sample(subsample_size)
@@ -137,6 +138,7 @@ def __init__(self, state, year, root_dir=None, with_nulls=False, with_filter=Tru
acs_data = data_source.get_data(states=state, download=True)
if with_filter:
acs_data = employment_filter(acs_data)
+ acs_data = acs_data.reset_index(drop=True)
if subsample_size:
acs_data = acs_data.sample(subsample_size, random_state=subsample_seed) if subsample_seed is not None \
else acs_data.sample(subsample_size)
@@ -285,6 +287,7 @@ def __init__(self, state, year, root_dir=None, with_nulls=False, with_filter=Tru
acs_data = data_source.get_data(states=state, download=True)
if with_filter:
acs_data = public_coverage_filter(acs_data)
+ acs_data = acs_data.reset_index(drop=True)
if subsample_size:
acs_data = acs_data.sample(subsample_size, random_state=subsample_seed) if subsample_seed is not None \
else acs_data.sample(subsample_size)
diff --git a/virny/datasets/healthcare.py b/virny/datasets/healthcare.py
index 043d68b6..ed354bdf 100644
--- a/virny/datasets/healthcare.py
+++ b/virny/datasets/healthcare.py
@@ -77,13 +77,15 @@ def __init__(self, subsample_size: int = None, subsample_seed: int = None, with_
# Preprocess features
df = df.rename(columns={'Pregancies': 'Pregnancies'})
- df['RegularMedicine'].replace('o', 'no', inplace=True)
+ df['RegularMedicine'] = df['RegularMedicine'].replace('o', 'no')
df['BPLevel'] = df['BPLevel'].str.strip().str.lower()
+ df['Pdiabetes']= df['Pdiabetes'].replace('0','no')
+ df['Pdiabetes']= df['Pdiabetes'].str.lower().str.strip()
# Preprocess a target
df['Diabetic'] = df['Diabetic'].str.strip()
- df['Diabetic'].replace('no', 0, inplace=True)
- df['Diabetic'].replace('yes', 1, inplace=True)
+ df['Diabetic'] = df['Diabetic'].replace('no', 0)
+ df['Diabetic'] = df['Diabetic'].replace('yes', 1)
target = 'Diabetic'
numerical_columns = ['BMI', 'Sleep', 'SoundSleep', 'Pregnancies']
diff --git a/virny/user_interfaces/inference_api.py b/virny/user_interfaces/inference_api.py
index d2a50f69..b3344292 100644
--- a/virny/user_interfaces/inference_api.py
+++ b/virny/user_interfaces/inference_api.py
@@ -1,6 +1,6 @@
import pandas as pd
-from virny.configs.constants import ModelSetting
+from virny.configs.constants import ModelSetting, ComputationMode
from virny.custom_classes.base_dataset import BaseFlowDataset
from virny.analyzers.subgroup_error_analyzer import SubgroupErrorAnalyzer
from virny.analyzers.subgroup_variance_analyzer import SubgroupVarianceAnalyzer
@@ -10,6 +10,9 @@
def compute_metrics_with_fitted_bootstrap(fitted_bootstrap: list, test_base_flow_dataset: BaseFlowDataset,
config, with_predict_proba: bool = True, verbose: int = 0):
model_setting = ModelSetting.BATCH
+ if config.computation_mode == ComputationMode.NO_BOOTSTRAP.value:
+ with_predict_proba = False
+
X_test, y_test = test_base_flow_dataset.X_test, test_base_flow_dataset.y_test
test_protected_groups = create_test_protected_groups(X_test,
test_base_flow_dataset.init_sensitive_attrs_df,
diff --git a/virny/user_interfaces/multiple_models_api.py b/virny/user_interfaces/multiple_models_api.py
index 8e017859..a9b54b39 100644
--- a/virny/user_interfaces/multiple_models_api.py
+++ b/virny/user_interfaces/multiple_models_api.py
@@ -4,7 +4,7 @@
import pandas as pd
from datetime import datetime, timezone
-from virny.configs.constants import ModelSetting
+from virny.configs.constants import ModelSetting, ComputationMode
from virny.custom_classes.base_dataset import BaseFlowDataset
from virny.preprocessing.basic_preprocessing import preprocess_base_model
from virny.analyzers.subgroup_variance_analyzer import SubgroupVarianceAnalyzer
@@ -14,12 +14,12 @@
def compute_metrics_with_config(dataset: BaseFlowDataset, config, models_config: dict,
- save_results_dir_path: str, postprocessor=None, with_predict_proba: bool = True,
+ save_results_dir_path: str = None, postprocessor=None, with_predict_proba: bool = True,
notebook_logs_stdout: bool = False, return_fitted_bootstrap: bool = False,
verbose: int = 0):
"""
Compute stability and accuracy metrics for each model in models_config. Arguments are defined as an input config object.
- Save results in `save_results_dir_path` folder.
+ Save results in `save_results_dir_path` folder if needed.
Return a dictionary where keys are model names, and values are metrics for sensitive attributes defined in config.
@@ -32,7 +32,7 @@ def compute_metrics_with_config(dataset: BaseFlowDataset, config, models_config:
models_config
Dictionary where keys are model names, and values are initialized models
save_results_dir_path
- Location where to save result files with metrics
+ [Optional] Location where to save result files with metrics
postprocessor
[Optional] Postprocessor object to apply to model predictions before metrics computation
with_predict_proba
@@ -55,7 +55,8 @@ def compute_metrics_with_config(dataset: BaseFlowDataset, config, models_config:
verbose = 0
start_datetime = datetime.now(timezone.utc)
- os.makedirs(save_results_dir_path, exist_ok=True)
+ if save_results_dir_path:
+ os.makedirs(save_results_dir_path, exist_ok=True)
model_metrics_dct = dict()
models_metrics_dct, models_fitted_bootstraps_dct = run_metrics_computation(dataset=dataset,
@@ -79,8 +80,9 @@ def compute_metrics_with_config(dataset: BaseFlowDataset, config, models_config:
model_metrics_df = models_metrics_dct[model_name]
model_metrics_dct[model_name] = model_metrics_df
- result_filename = f'Metrics_{config.dataset_name}_{model_name}_{config.n_estimators}_Estimators_{start_datetime.strftime("%Y%m%d__%H%M%S")}.csv'
- model_metrics_dct[model_name].to_csv(f'{save_results_dir_path}/{result_filename}', index=False, mode='w')
+ if save_results_dir_path:
+ result_filename = f'Metrics_{config.dataset_name}_{model_name}_{config.n_estimators}_Estimators_{start_datetime.strftime("%Y%m%d__%H%M%S")}.csv'
+ model_metrics_dct[model_name].to_csv(f'{save_results_dir_path}/{result_filename}', index=False, mode='w')
if return_fitted_bootstrap:
return model_metrics_dct, models_fitted_bootstraps_dct
@@ -136,8 +138,9 @@ def run_metrics_computation(dataset: BaseFlowDataset, bootstrap_fraction: float,
False, otherwise. Note that if it is set to False, only metrics based on labels (not labels and probabilities) will be computed.
Ignored when a postprocessor is not None, and set to False in this case.
notebook_logs_stdout
- [Optional] True, if this interface was execute in a Jupyter notebook,
- False, otherwise.
+ [Optional] True, if to display a progress bar in a Jupyter notebook,
+ False, if to display a progress bar in a python module,
+ None, if to disable a progress bar.
verbose
[Optional] Level of logs printing. The greater level provides more logs.
As for now, 0, 1, 2 levels are supported.
@@ -149,14 +152,17 @@ def run_metrics_computation(dataset: BaseFlowDataset, bootstrap_fraction: float,
else:
from tqdm import tqdm
+ num_models = len(models_config)
+ cycle_range = enumerate(models_config.keys()) if notebook_logs_stdout is None else \
+ tqdm(enumerate(models_config.keys()),
+ total=num_models,
+ desc="Analyze multiple models",
+ colour="red",
+ file=sys.stdout)
+
models_metrics_dct = dict()
models_fitted_bootstraps_dct = dict()
- num_models = len(models_config)
- for model_idx, model_name in tqdm(enumerate(models_config.keys()),
- total=num_models,
- desc="Analyze multiple models",
- colour="red",
- file=sys.stdout):
+ for model_idx, model_name in cycle_range:
if verbose >= 1:
print('\n\n', flush=True)
print('#' * 30, f' [Model {model_idx + 1} / {num_models}] Analyze {model_name} ', '#' * 30)
@@ -253,6 +259,8 @@ def compute_one_model_metrics(base_model, n_estimators: int, dataset: BaseFlowDa
As for now, 0, 1, 2 levels are supported.
"""
+ if computation_mode == ComputationMode.NO_BOOTSTRAP.value:
+ with_predict_proba = False
model_setting = ModelSetting.BATCH if model_setting is None else ModelSetting[model_setting.upper()]
test_protected_groups = create_test_protected_groups(dataset.X_test, dataset.init_sensitive_attrs_df, sensitive_attributes_dct)
@@ -281,7 +289,6 @@ def compute_one_model_metrics(base_model, n_estimators: int, dataset: BaseFlowDa
y_preds, variance_metrics_df, fitted_bootstrap = subgroup_variance_analyzer.compute_metrics(save_results=False,
result_filename=None,
save_dir_path=None)
-
# Compute error metrics for subgroups
error_analyzer = SubgroupErrorAnalyzer(X_test=dataset.X_test,
y_test=dataset.y_test,
diff --git a/virny/user_interfaces/multiple_models_with_multiple_test_sets_api.py b/virny/user_interfaces/multiple_models_with_multiple_test_sets_api.py
index cda7becb..770d109c 100644
--- a/virny/user_interfaces/multiple_models_with_multiple_test_sets_api.py
+++ b/virny/user_interfaces/multiple_models_with_multiple_test_sets_api.py
@@ -3,7 +3,7 @@
import pandas as pd
from datetime import datetime, timezone
-from virny.configs.constants import ModelSetting
+from virny.configs.constants import ModelSetting, ComputationMode
from virny.utils.protected_groups_partitioning import create_test_protected_groups
from virny.preprocessing.basic_preprocessing import preprocess_base_model
from virny.custom_classes.base_dataset import BaseFlowDataset
@@ -144,8 +144,9 @@ def run_metrics_computation_with_multiple_test_sets(dataset: BaseFlowDataset, bo
False, otherwise. Note that if it is set to False, only metrics based on labels (not labels and probabilities) will be computed.
Ignored when a postprocessor is not None, and set to False in this case.
notebook_logs_stdout
- [Optional] True, if this interface was execute in a Jupyter notebook,
- False, otherwise.
+ [Optional] True, if to display a progress bar in a Jupyter notebook,
+ False, if to display a progress bar in a python module,
+ None, if to disable a progress bar.
verbose
[Optional] Level of logs printing. The greater level provides more logs.
As for now, 0, 1, 2 levels are supported.
@@ -157,14 +158,17 @@ def run_metrics_computation_with_multiple_test_sets(dataset: BaseFlowDataset, bo
else:
from tqdm import tqdm
+ num_models = len(models_config)
+ cycle_range = enumerate(models_config.keys()) if notebook_logs_stdout is None else \
+ tqdm(enumerate(models_config.keys()),
+ total=num_models,
+ desc="Analyze multiple models",
+ colour="red",
+ file=sys.stdout)
+
models_metrics_dct = dict()
models_fitted_bootstraps_dct = dict()
- num_models = len(models_config)
- for model_idx, model_name in tqdm(enumerate(models_config.keys()),
- total=num_models,
- desc="Analyze multiple models",
- colour="red",
- file=sys.stdout):
+ for model_idx, model_name in cycle_range:
if verbose >= 1:
print('#' * 30, f' [Model {model_idx + 1} / {num_models}] Analyze {model_name} ', '#' * 30)
try:
@@ -254,6 +258,8 @@ def compute_one_model_metrics_with_multiple_test_sets(base_model, n_estimators:
As for now, 0, 1, 2 levels are supported.
"""
+ if computation_mode == ComputationMode.NO_BOOTSTRAP.value:
+ with_predict_proba = False
model_setting = ModelSetting.BATCH if model_setting is None else ModelSetting[model_setting.upper()]
subgroup_variance_analyzer = SubgroupVarianceAnalyzer(model_setting=model_setting,
n_estimators=n_estimators,
diff --git a/virny/utils/common_helpers.py b/virny/utils/common_helpers.py
index 6a3a7c2b..f1e69fe4 100644
--- a/virny/utils/common_helpers.py
+++ b/virny/utils/common_helpers.py
@@ -23,19 +23,42 @@ def validate_config(config_obj):
Object with parameters defined in a yaml file
"""
+ # ============================================================================================================
+ # Optional parameters
+ # ============================================================================================================
+ if config_obj.model_setting is not None \
+ and not isinstance(config_obj.model_setting, str) \
+ and config_obj.model_setting not in ModelSetting:
+ raise ValueError('model_setting must be a string that is included in the ModelSetting enum. '
+ 'Refer to this function documentation for more details!')
+
+ if config_obj.computation_mode is not None \
+ and not isinstance(config_obj.computation_mode, str) \
+ and config_obj.computation_mode not in ComputationMode:
+ raise ValueError('computation_mode must be a string that is included in the ComputationMode enum. '
+ 'Refer to this function documentation for more details!')
+
+ # ============================================================================================================
+ # Arguments pre-setting depending on the configs
+ # ============================================================================================================
+ if config_obj.computation_mode == ComputationMode.NO_BOOTSTRAP.value:
+ config_obj.bootstrap_fraction = 1.0
+ config_obj.n_estimators = 1
+
# ============================================================================================================
# Required parameters
# ============================================================================================================
if not isinstance(config_obj.dataset_name, str):
raise ValueError('dataset_name must be string')
- if not isinstance(config_obj.bootstrap_fraction, float) \
- or config_obj.bootstrap_fraction < 0.0 \
- or config_obj.bootstrap_fraction > 1.0:
- raise ValueError('bootstrap_fraction must be float in [0.0, 1.0] range')
+ if config_obj.computation_mode != ComputationMode.NO_BOOTSTRAP.value:
+ if not isinstance(config_obj.bootstrap_fraction, float) \
+ or config_obj.bootstrap_fraction < 0.0 \
+ or config_obj.bootstrap_fraction > 1.0:
+ raise ValueError('bootstrap_fraction must be float in [0.0, 1.0] range')
- if not isinstance(config_obj.n_estimators, int) or config_obj.n_estimators <= 1:
- raise ValueError('n_estimators must be integer greater than 1')
+ if not isinstance(config_obj.n_estimators, int) or config_obj.n_estimators <= 1:
+ raise ValueError('n_estimators must be integer greater than 1')
if not isinstance(config_obj.sensitive_attributes_dct, dict):
raise ValueError('sensitive_attributes_dct must be python dictionary')
@@ -56,21 +79,6 @@ def validate_config(config_obj):
raise ValueError('Intersectional attributes in sensitive_attributes_dct must contain '
'single sensitive attributes that also exist in sensitive_attributes_dct')
- # ============================================================================================================
- # Optional parameters
- # ============================================================================================================
- if config_obj.model_setting is not None \
- and not isinstance(config_obj.model_setting, str) \
- and config_obj.model_setting not in ModelSetting:
- raise ValueError('model_setting must be a string that is included in the ModelSetting enum. '
- 'Refer to this function documentation for more details!')
-
- if config_obj.computation_mode is not None \
- and not isinstance(config_obj.computation_mode, str) \
- and config_obj.computation_mode not in ComputationMode:
- raise ValueError('computation_mode must be a string that is included in the ComputationMode enum. '
- 'Refer to this function documentation for more details!')
-
# ============================================================================================================
# Default parameters
# ============================================================================================================
diff --git a/virny/utils/data_viz_utils.py b/virny/utils/data_viz_utils.py
index 8a762d89..c6e2d990 100644
--- a/virny/utils/data_viz_utils.py
+++ b/virny/utils/data_viz_utils.py
@@ -360,11 +360,12 @@ def create_model_performance_summary_visualization(main_matrix, matrix_for_color
def create_flexible_bar_plot_for_model_selection(all_subgroup_metrics_per_model_dct: dict, all_group_metrics_per_model_dct: dict,
- metrics_value_range_dct: dict, group: str, metric_name_to_alias_dct: dict):
+ metrics_value_range_dct: dict, group: str, metric_name_to_alias_dct: dict,
+ num_constrains: int):
# Compute the number of models that satisfy the conditions
models_in_range_df, df_with_models_satisfied_all_constraints = (
create_models_in_range_dct(all_subgroup_metrics_per_model_dct, all_group_metrics_per_model_dct,
- metrics_value_range_dct, group))
+ metrics_value_range_dct, group, num_constrains=num_constrains))
def get_column_alias(metric_group):
if '&' not in metric_group:
@@ -492,7 +493,7 @@ def get_column_alias(metric_group):
def create_models_in_range_dct(all_subgroup_metrics_per_model_dct: dict, all_group_metrics_per_model_dct: dict,
- metrics_value_range_dct: dict, group: str):
+ metrics_value_range_dct: dict, group: str, num_constrains: int = 4):
# Merge subgroup and group metrics for each model and align their columns
all_metrics_for_all_models_df = pd.DataFrame()
for model_name in all_subgroup_metrics_per_model_dct.keys():
@@ -502,7 +503,6 @@ def create_models_in_range_dct(all_subgroup_metrics_per_model_dct: dict, all_gro
subgroup_metrics_per_model_df = all_subgroup_metrics_per_model_dct[model_name][
(all_subgroup_metrics_per_model_dct[model_name]['Subgroup'] == 'overall')
]
- subgroup_metrics_per_model_df['Subgroup'] = subgroup_metrics_per_model_df['Subgroup']
aligned_subgroup_metrics_per_model_df = subgroup_metrics_per_model_df[group_metrics_per_model_df.columns]
combined_metrics_per_model_df = pd.concat([group_metrics_per_model_df, aligned_subgroup_metrics_per_model_df]).reset_index(drop=True)
@@ -543,9 +543,14 @@ def create_models_in_range_dct(all_subgroup_metrics_per_model_dct: dict, all_gro
else:
pd_condition &= (pivoted_model_metrics_df[metric] >= min_range_val) & (pivoted_model_metrics_df[metric] <= max_range_val)
+ # If-statement for different pandas versions
num_satisfied_models_df = pivoted_model_metrics_df[pd_condition]['Model_Type'].value_counts().reset_index()
- num_satisfied_models_df.rename(columns = {'Model_Type': 'Number_of_Models'}, inplace = True)
- num_satisfied_models_df.rename(columns = {'index': 'Model_Type'}, inplace = True)
+ if 'count' in num_satisfied_models_df.columns:
+ num_satisfied_models_df.rename(columns = {'count': 'Number_of_Models'}, inplace = True)
+ else:
+ num_satisfied_models_df.rename(columns = {'Model_Type': 'Number_of_Models'}, inplace = True)
+ num_satisfied_models_df.rename(columns = {'index': 'Model_Type'}, inplace = True)
+
# If a constraint for a metric group is not satisfied, add zeros for all model names
if num_satisfied_models_df.shape[0] == 0:
num_satisfied_models_df = pd.DataFrame({'Model_Type': model_types,
@@ -558,7 +563,7 @@ def create_models_in_range_dct(all_subgroup_metrics_per_model_dct: dict, all_gro
# Concatenate based on rows
models_in_range_df = pd.concat([models_in_range_df, num_satisfied_models_df], ignore_index=True, sort=False)
- if metric_group.count('&') == 3:
+ if metric_group.count('&') == num_constrains - 1:
df_with_models_satisfied_all_constraints = pivoted_model_metrics_df[pd_condition][['Model_Type', 'Model_Name']]
return models_in_range_df, df_with_models_satisfied_all_constraints
diff --git a/virny/utils/stability_utils.py b/virny/utils/stability_utils.py
index 9ea34078..a3a6c105 100644
--- a/virny/utils/stability_utils.py
+++ b/virny/utils/stability_utils.py
@@ -2,7 +2,7 @@
import numpy as np
import pandas as pd
-from virny.configs.constants import ALEATORIC_UNCERTAINTY, EPISTEMIC_UNCERTAINTY, OVERALL_UNCERTAINTY
+from virny.configs.constants import ALEATORIC_UNCERTAINTY, EPISTEMIC_UNCERTAINTY, OVERALL_UNCERTAINTY, ComputationMode
from virny.metrics import METRIC_TO_FUNCTION, METRICS_FOR_PREDICT_PROBA, METRICS_FOR_LABELS
@@ -31,7 +31,7 @@ def combine_bootstrap_predictions(bootstrap_predictions: dict, y_test_indexes: n
return pd.Series(y_preds, index=y_test_indexes)
-def count_prediction_metrics(y_true, uq_results, with_predict_proba: bool = True):
+def count_prediction_metrics(y_true, uq_results, with_predict_proba: bool = True, computation_mode = None):
"""
Compute means, stds, iqr, entropy, jitter, label stability, and transform predictions to pd.Dataframe.
@@ -46,6 +46,8 @@ def count_prediction_metrics(y_true, uq_results, with_predict_proba: bool = True
with_predict_proba
[Optional] A flag if model can return probabilities for its predictions.
If no, only metrics based on labels (not labels and probabilities) will be computed.
+ computation_mode
+ [Optional] A mode for computing metrics
"""
if isinstance(uq_results, np.ndarray):
@@ -54,27 +56,28 @@ def count_prediction_metrics(y_true, uq_results, with_predict_proba: bool = True
results = pd.DataFrame(uq_results).transpose()
metrics_dct = dict()
- # Compute metrics for prediction probabilities
- if not with_predict_proba:
- uq_labels = results
- else:
- uq_predict_probas = results
- for metric in METRICS_FOR_PREDICT_PROBA:
- if metric == EPISTEMIC_UNCERTAINTY: # skip computation for a metric that is based on two other metrics
- continue
+ if computation_mode != ComputationMode.NO_BOOTSTRAP.value: # Do not compute stability and uncertainty metrics for NO_BOOTSTRAP
+ # Compute metrics for prediction probabilities
+ if not with_predict_proba:
+ uq_labels = results
+ else:
+ uq_predict_probas = results
+ for metric in METRICS_FOR_PREDICT_PROBA:
+ if metric == EPISTEMIC_UNCERTAINTY: # skip computation for a metric that is based on two other metrics
+ continue
- metrics_dct[metric] = METRIC_TO_FUNCTION[metric](y_true, uq_predict_probas)
+ metrics_dct[metric] = METRIC_TO_FUNCTION[metric](y_true, uq_predict_probas)
- metrics_dct[EPISTEMIC_UNCERTAINTY] = metrics_dct[OVERALL_UNCERTAINTY] - metrics_dct[ALEATORIC_UNCERTAINTY]
+ metrics_dct[EPISTEMIC_UNCERTAINTY] = metrics_dct[OVERALL_UNCERTAINTY] - metrics_dct[ALEATORIC_UNCERTAINTY]
- # Convert predict proba results of each model to correspondent labels.
- # Here we use int(x<0.5) since we use predict_prob()[:, 0] to make predictions.
- # Hence, if a value is, for example, 0.3 --> label == 1, 0.6 -- > label == 0
- uq_labels = (results < 0.5).astype(int)
+ # Convert predict proba results of each model to correspondent labels.
+ # Here we use int(x<0.5) since we use predict_prob()[:, 0] to make predictions.
+ # Hence, if a value is, for example, 0.3 --> label == 1, 0.6 -- > label == 0
+ uq_labels = (results < 0.5).astype(int)
- # Compute metrics for prediction labels
- for metric in METRICS_FOR_LABELS:
- metrics_dct[metric] = METRIC_TO_FUNCTION[metric](y_true, uq_labels)
+ # Compute metrics for prediction labels
+ for metric in METRICS_FOR_LABELS:
+ metrics_dct[metric] = METRIC_TO_FUNCTION[metric](y_true, uq_labels)
if with_predict_proba:
y_preds = np.array([int(x<0.5) for x in results.mean().values])
@@ -85,6 +88,9 @@ def count_prediction_metrics(y_true, uq_results, with_predict_proba: bool = True
def generate_bootstrap(features, labels, boostrap_size, with_replacement=True, random_state=None):
+ if boostrap_size == features.shape[0]:
+ return pd.DataFrame(features), pd.DataFrame(labels)
+
# Create a local random state.
# Note that to keep reverse compatibility we need to use different generators for different python versions
# since random number generation was changed in Python 3.12