From b8c7144e946df63b6369b555f0a89dd78b430dd7 Mon Sep 17 00:00:00 2001 From: Denys Herasymuk Date: Sat, 21 Sep 2024 00:54:06 +0300 Subject: [PATCH 1/9] Fixed indexing in folktable datasets --- virny/datasets/folktables.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/virny/datasets/folktables.py b/virny/datasets/folktables.py index e9d45b7c..9dda082f 100644 --- a/virny/datasets/folktables.py +++ b/virny/datasets/folktables.py @@ -55,6 +55,7 @@ def __init__(self, state, year, root_dir=None, with_nulls=False, with_filter=Tru acs_data = data_source.get_data(states=state, download=True) if with_filter: acs_data = adult_filter(acs_data) + acs_data = acs_data.reset_index(drop=True) if subsample_size: acs_data = acs_data.sample(subsample_size, random_state=subsample_seed) if subsample_seed is not None \ else acs_data.sample(subsample_size) @@ -137,6 +138,7 @@ def __init__(self, state, year, root_dir=None, with_nulls=False, with_filter=Tru acs_data = data_source.get_data(states=state, download=True) if with_filter: acs_data = employment_filter(acs_data) + acs_data = acs_data.reset_index(drop=True) if subsample_size: acs_data = acs_data.sample(subsample_size, random_state=subsample_seed) if subsample_seed is not None \ else acs_data.sample(subsample_size) @@ -285,6 +287,7 @@ def __init__(self, state, year, root_dir=None, with_nulls=False, with_filter=Tru acs_data = data_source.get_data(states=state, download=True) if with_filter: acs_data = public_coverage_filter(acs_data) + acs_data = acs_data.reset_index(drop=True) if subsample_size: acs_data = acs_data.sample(subsample_size, random_state=subsample_seed) if subsample_seed is not None \ else acs_data.sample(subsample_size) From 5c20c3c7d13f715963b8d90e673065dbb5f5d95c Mon Sep 17 00:00:00 2001 From: Denys Herasymuk Date: Wed, 20 Nov 2024 23:32:55 +0200 Subject: [PATCH 2/9] Added features for virny_flow --- virny/user_interfaces/multiple_models_api.py | 14 ++++++++------ virny/utils/data_viz_utils.py | 3 +-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/virny/user_interfaces/multiple_models_api.py b/virny/user_interfaces/multiple_models_api.py index 8e017859..df327098 100644 --- a/virny/user_interfaces/multiple_models_api.py +++ b/virny/user_interfaces/multiple_models_api.py @@ -14,12 +14,12 @@ def compute_metrics_with_config(dataset: BaseFlowDataset, config, models_config: dict, - save_results_dir_path: str, postprocessor=None, with_predict_proba: bool = True, + save_results_dir_path: str = None, postprocessor=None, with_predict_proba: bool = True, notebook_logs_stdout: bool = False, return_fitted_bootstrap: bool = False, verbose: int = 0): """ Compute stability and accuracy metrics for each model in models_config. Arguments are defined as an input config object. - Save results in `save_results_dir_path` folder. + Save results in `save_results_dir_path` folder if needed. Return a dictionary where keys are model names, and values are metrics for sensitive attributes defined in config. @@ -32,7 +32,7 @@ def compute_metrics_with_config(dataset: BaseFlowDataset, config, models_config: models_config Dictionary where keys are model names, and values are initialized models save_results_dir_path - Location where to save result files with metrics + [Optional] Location where to save result files with metrics postprocessor [Optional] Postprocessor object to apply to model predictions before metrics computation with_predict_proba @@ -55,7 +55,8 @@ def compute_metrics_with_config(dataset: BaseFlowDataset, config, models_config: verbose = 0 start_datetime = datetime.now(timezone.utc) - os.makedirs(save_results_dir_path, exist_ok=True) + if save_results_dir_path: + os.makedirs(save_results_dir_path, exist_ok=True) model_metrics_dct = dict() models_metrics_dct, models_fitted_bootstraps_dct = run_metrics_computation(dataset=dataset, @@ -79,8 +80,9 @@ def compute_metrics_with_config(dataset: BaseFlowDataset, config, models_config: model_metrics_df = models_metrics_dct[model_name] model_metrics_dct[model_name] = model_metrics_df - result_filename = f'Metrics_{config.dataset_name}_{model_name}_{config.n_estimators}_Estimators_{start_datetime.strftime("%Y%m%d__%H%M%S")}.csv' - model_metrics_dct[model_name].to_csv(f'{save_results_dir_path}/{result_filename}', index=False, mode='w') + if save_results_dir_path: + result_filename = f'Metrics_{config.dataset_name}_{model_name}_{config.n_estimators}_Estimators_{start_datetime.strftime("%Y%m%d__%H%M%S")}.csv' + model_metrics_dct[model_name].to_csv(f'{save_results_dir_path}/{result_filename}', index=False, mode='w') if return_fitted_bootstrap: return model_metrics_dct, models_fitted_bootstraps_dct diff --git a/virny/utils/data_viz_utils.py b/virny/utils/data_viz_utils.py index 8a762d89..a5724312 100644 --- a/virny/utils/data_viz_utils.py +++ b/virny/utils/data_viz_utils.py @@ -544,8 +544,7 @@ def create_models_in_range_dct(all_subgroup_metrics_per_model_dct: dict, all_gro pd_condition &= (pivoted_model_metrics_df[metric] >= min_range_val) & (pivoted_model_metrics_df[metric] <= max_range_val) num_satisfied_models_df = pivoted_model_metrics_df[pd_condition]['Model_Type'].value_counts().reset_index() - num_satisfied_models_df.rename(columns = {'Model_Type': 'Number_of_Models'}, inplace = True) - num_satisfied_models_df.rename(columns = {'index': 'Model_Type'}, inplace = True) + num_satisfied_models_df.rename(columns = {'count': 'Number_of_Models'}, inplace = True) # If a constraint for a metric group is not satisfied, add zeros for all model names if num_satisfied_models_df.shape[0] == 0: num_satisfied_models_df = pd.DataFrame({'Model_Type': model_types, From edb97b52bd4381076275ab6846a87de3c8a79440 Mon Sep 17 00:00:00 2001 From: Denys Herasymuk Date: Wed, 4 Dec 2024 16:18:08 +0200 Subject: [PATCH 3/9] Adapted VirnyView for different pandas versions --- virny/custom_classes/metrics_interactive_visualizer.py | 2 +- virny/utils/data_viz_utils.py | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/virny/custom_classes/metrics_interactive_visualizer.py b/virny/custom_classes/metrics_interactive_visualizer.py index 90124a1b..f8791907 100644 --- a/virny/custom_classes/metrics_interactive_visualizer.py +++ b/virny/custom_classes/metrics_interactive_visualizer.py @@ -839,7 +839,7 @@ def _create_metrics_bar_chart_per_one_model(self, model_name: str, metrics_names alt.layer( models_metrics_chart, text, data=filtered_metrics_df ).properties( - width=500, + width=280 if metrics_type == 'group' else 300, height=100 ).facet( row=alt.Row('Metric:N', title=metrics_title, sort=metrics_names) diff --git a/virny/utils/data_viz_utils.py b/virny/utils/data_viz_utils.py index a5724312..5dbae755 100644 --- a/virny/utils/data_viz_utils.py +++ b/virny/utils/data_viz_utils.py @@ -502,7 +502,6 @@ def create_models_in_range_dct(all_subgroup_metrics_per_model_dct: dict, all_gro subgroup_metrics_per_model_df = all_subgroup_metrics_per_model_dct[model_name][ (all_subgroup_metrics_per_model_dct[model_name]['Subgroup'] == 'overall') ] - subgroup_metrics_per_model_df['Subgroup'] = subgroup_metrics_per_model_df['Subgroup'] aligned_subgroup_metrics_per_model_df = subgroup_metrics_per_model_df[group_metrics_per_model_df.columns] combined_metrics_per_model_df = pd.concat([group_metrics_per_model_df, aligned_subgroup_metrics_per_model_df]).reset_index(drop=True) @@ -543,8 +542,14 @@ def create_models_in_range_dct(all_subgroup_metrics_per_model_dct: dict, all_gro else: pd_condition &= (pivoted_model_metrics_df[metric] >= min_range_val) & (pivoted_model_metrics_df[metric] <= max_range_val) + # If-statement for different pandas versions num_satisfied_models_df = pivoted_model_metrics_df[pd_condition]['Model_Type'].value_counts().reset_index() - num_satisfied_models_df.rename(columns = {'count': 'Number_of_Models'}, inplace = True) + if 'count' in num_satisfied_models_df.columns: + num_satisfied_models_df.rename(columns = {'count': 'Number_of_Models'}, inplace = True) + else: + num_satisfied_models_df.rename(columns = {'Model_Type': 'Number_of_Models'}, inplace = True) + num_satisfied_models_df.rename(columns = {'index': 'Model_Type'}, inplace = True) + # If a constraint for a metric group is not satisfied, add zeros for all model names if num_satisfied_models_df.shape[0] == 0: num_satisfied_models_df = pd.DataFrame({'Model_Type': model_types, From c062fcdf0376fbf39e31ec40145bc75ed7bfeb87 Mon Sep 17 00:00:00 2001 From: Denys Herasymuk Date: Sat, 14 Dec 2024 15:05:21 +0200 Subject: [PATCH 4/9] Added no bootstrap computation mode --- ...Models_Interface_With_Error_Analysis.ipynb | 2 +- ...e_Models_Interface_With_No_Bootstrap.ipynb | 1313 +++++++++++++++++ docs/examples/experiment_config.yaml | 7 +- ...assifier_1_Estimators_20241214__130145.csv | 10 + ...gression_1_Estimators_20241214__130145.csv | 10 + ...assifier_1_Estimators_20241214__130145.csv | 10 + ...assifier_1_Estimators_20241214__130145.csv | 10 + ...assifier_1_Estimators_20241214__130307.csv | 10 + ...gression_1_Estimators_20241214__130307.csv | 10 + ...assifier_1_Estimators_20241214__130307.csv | 10 + ...assifier_1_Estimators_20241214__130307.csv | 10 + ..._Sensitive_Attributes_20241214__130145.csv | 5 + ..._Sensitive_Attributes_20241214__130307.csv | 5 + .../abstract_overall_variance_analyzer.py | 12 +- .../batch_overall_variance_analyzer.py | 6 +- ...verall_variance_analyzer_postprocessing.py | 6 +- virny/analyzers/subgroup_variance_analyzer.py | 16 +- .../analyzers/subgroup_variance_calculator.py | 4 +- virny/configs/constants.py | 1 + virny/user_interfaces/multiple_models_api.py | 5 +- ...iple_models_with_multiple_test_sets_api.py | 4 +- virny/utils/common_helpers.py | 50 +- virny/utils/stability_utils.py | 44 +- 23 files changed, 1499 insertions(+), 61 deletions(-) create mode 100644 docs/examples/Multiple_Models_Interface_With_No_Bootstrap.ipynb create mode 100644 docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130143/Metrics_COMPAS_Without_Sensitive_Attributes_DecisionTreeClassifier_1_Estimators_20241214__130145.csv create mode 100644 docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130143/Metrics_COMPAS_Without_Sensitive_Attributes_LogisticRegression_1_Estimators_20241214__130145.csv create mode 100644 docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130143/Metrics_COMPAS_Without_Sensitive_Attributes_RandomForestClassifier_1_Estimators_20241214__130145.csv create mode 100644 docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130143/Metrics_COMPAS_Without_Sensitive_Attributes_XGBClassifier_1_Estimators_20241214__130145.csv create mode 100644 docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130304/Metrics_COMPAS_Without_Sensitive_Attributes_DecisionTreeClassifier_1_Estimators_20241214__130307.csv create mode 100644 docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130304/Metrics_COMPAS_Without_Sensitive_Attributes_LogisticRegression_1_Estimators_20241214__130307.csv create mode 100644 docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130304/Metrics_COMPAS_Without_Sensitive_Attributes_RandomForestClassifier_1_Estimators_20241214__130307.csv create mode 100644 docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130304/Metrics_COMPAS_Without_Sensitive_Attributes_XGBClassifier_1_Estimators_20241214__130307.csv create mode 100644 docs/examples/results/models_tuning/tuning_results_COMPAS_Without_Sensitive_Attributes_20241214__130145.csv create mode 100644 docs/examples/results/models_tuning/tuning_results_COMPAS_Without_Sensitive_Attributes_20241214__130307.csv diff --git a/docs/examples/Multiple_Models_Interface_With_Error_Analysis.ipynb b/docs/examples/Multiple_Models_Interface_With_Error_Analysis.ipynb index 13659c88..5b293086 100644 --- a/docs/examples/Multiple_Models_Interface_With_Error_Analysis.ipynb +++ b/docs/examples/Multiple_Models_Interface_With_Error_Analysis.ipynb @@ -208,7 +208,7 @@ "\n", "* **n_estimators**: int, the number of estimators for bootstrap to compute subgroup stability metrics.\n", "\n", - "* **computation_mode**: str, 'default' or 'error_analysis'. Name of the computation mode. When a default computation mode measures metrics for sex_priv and sex_dis, an `error_analysis` mode measures metrics for (sex_priv, sex_priv_correct, sex_priv_incorrect) and (sex_dis, sex_dis_correct, sex_dis_incorrect). Therefore, a user can analyze how a model is certain about its incorrect predictions.\n", + "* **computation_mode**: str, 'default', 'error_analysis', or 'no_bootstrap'. Name of the computation mode. When a default computation mode measures metrics for sex_priv and sex_dis, an `error_analysis` mode measures metrics for (sex_priv, sex_priv_correct, sex_priv_incorrect) and (sex_dis, sex_dis_correct, sex_dis_incorrect). Therefore, a user can analyze how a model is certain about its incorrect predictions.\n", "\n", "* **sensitive_attributes_dct**: dict, a dictionary where keys are sensitive attribute names (including intersectional attributes), and values are disadvantaged values for these attributes. Intersectional attributes must include '&' between sensitive attributes. You do not need to specify disadvantaged values for intersectional groups since they will be derived from disadvantaged values in sensitive_attributes_dct for each separate sensitive attribute in this intersectional pair.\n", "\n", diff --git a/docs/examples/Multiple_Models_Interface_With_No_Bootstrap.ipynb b/docs/examples/Multiple_Models_Interface_With_No_Bootstrap.ipynb new file mode 100644 index 00000000..cb38cd86 --- /dev/null +++ b/docs/examples/Multiple_Models_Interface_With_No_Bootstrap.ipynb @@ -0,0 +1,1313 @@ +{ + "cells": [ + { + "cell_type": "code", + "id": "248cbed8", + "metadata": { + "ExecuteTime": { + "end_time": "2024-12-14T13:03:01.510778Z", + "start_time": "2024-12-14T13:03:01.021557Z" + } + }, + "source": [ + "%matplotlib inline\n", + "%load_ext autoreload\n", + "%autoreload 2" + ], + "outputs": [], + "execution_count": 1 + }, + { + "cell_type": "code", + "id": "7ec6cd08", + "metadata": { + "ExecuteTime": { + "end_time": "2024-12-14T13:03:01.527295Z", + "start_time": "2024-12-14T13:03:01.518339Z" + } + }, + "source": [ + "import os\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "os.environ[\"PYTHONWARNINGS\"] = \"ignore\"" + ], + "outputs": [], + "execution_count": 2 + }, + { + "cell_type": "code", + "id": "b8cb69f2", + "metadata": { + "ExecuteTime": { + "end_time": "2024-12-14T13:03:01.541905Z", + "start_time": "2024-12-14T13:03:01.532512Z" + } + }, + "source": [ + "cur_folder_name = os.getcwd().split('/')[-1]\n", + "if cur_folder_name != \"Virny\":\n", + " os.chdir(\"../..\")\n", + "\n", + "print('Current location: ', os.getcwd())" + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Current location: /Users/denys_herasymuk/UCU/4course_2term/Bachelor_Thesis/Code/Virny\n" + ] + } + ], + "execution_count": 3 + }, + { + "cell_type": "markdown", + "id": "a578f2ab", + "metadata": {}, + "source": "# Multiple Models Interface With No Bootstrap" + }, + { + "cell_type": "markdown", + "id": "2251a923", + "metadata": {}, + "source": [ + "In this example, we are going to conduct a deep performance profiling for 4 models. The only difference with the multiple models interface tutorial is the use of the `no_bootstrap` computation mode. This mode is useful in the case we do not need to measure stability abd uncertainty metrics, but want to measure only accuracy and classic fairness metrics. The computation mode disables the bootstrap approach and greatly speed-up metric computation.\n", + "\n", + "For that, we will use `compute_metrics_with_config` interface that can compute metrics for multiple models. Thus, we will need to do the next steps:\n", + "\n", + "* Initialize input variables\n", + "\n", + "* Compute subgroup metrics\n", + "\n", + "* Perform disparity metrics composition using the Metric Composer\n", + "\n", + "* Create static visualizations using the Metric Visualizer" + ] + }, + { + "cell_type": "markdown", + "id": "606df34d", + "metadata": {}, + "source": [ + "## Import dependencies" + ] + }, + { + "cell_type": "code", + "id": "7a9241de", + "metadata": { + "ExecuteTime": { + "end_time": "2024-12-14T13:03:04.498252Z", + "start_time": "2024-12-14T13:03:01.672151Z" + } + }, + "source": [ + "import os\n", + "from pprint import pprint\n", + "from datetime import datetime, timezone\n", + "\n", + "from xgboost import XGBClassifier\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.linear_model import LogisticRegression\n", + "\n", + "from sklearn.compose import ColumnTransformer\n", + "from sklearn.preprocessing import OneHotEncoder\n", + "from sklearn.preprocessing import StandardScaler\n", + "\n", + "from virny.utils.custom_initializers import create_config_obj, read_model_metric_dfs, create_models_config_from_tuned_params_df\n", + "from virny.user_interfaces.multiple_models_api import compute_metrics_with_config\n", + "from virny.preprocessing.basic_preprocessing import preprocess_dataset\n", + "from virny.custom_classes.metrics_composer import MetricsComposer\n", + "from virny.utils.model_tuning_utils import tune_ML_models\n", + "from virny.datasets import CompasWithoutSensitiveAttrsDataset" + ], + "outputs": [], + "execution_count": 4 + }, + { + "cell_type": "markdown", + "id": "75699f5f", + "metadata": {}, + "source": [ + "## Initialize Input Variables" + ] + }, + { + "cell_type": "markdown", + "id": "e86f6556", + "metadata": {}, + "source": [ + "Based on the library flow, we need to create 3 input objects for a user interface:\n", + "\n", + "* A **config yaml** that is a file with configuration parameters for different user interfaces for metric computation.\n", + "\n", + "* A **dataset class** that is a wrapper above the user’s raw dataset that includes its descriptive attributes like a target column, numerical columns, categorical columns, etc. This class must be inherited from the BaseDataset class, which was created for user convenience.\n", + "\n", + "* Finally, a **models config** that is a Python dictionary, where keys are model names and values are initialized models for analysis. This dictionary helps conduct audits for different analysis modes and analyze different types of models." + ] + }, + { + "cell_type": "code", + "source": [ + "DATASET_SPLIT_SEED = 42\n", + "MODELS_TUNING_SEED = 42\n", + "TEST_SET_FRACTION = 0.2" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-12-14T13:03:04.522770Z", + "start_time": "2024-12-14T13:03:04.502996Z" + } + }, + "id": "ce359a052925eb3a", + "outputs": [], + "execution_count": 5 + }, + { + "cell_type": "code", + "source": [ + "models_params_for_tuning = {\n", + " 'DecisionTreeClassifier': {\n", + " 'model': DecisionTreeClassifier(random_state=MODELS_TUNING_SEED),\n", + " 'params': {\n", + " \"max_depth\": [20, 30],\n", + " \"min_samples_split\" : [0.1],\n", + " \"max_features\": ['sqrt'],\n", + " \"criterion\": [\"gini\", \"entropy\"]\n", + " }\n", + " },\n", + " 'LogisticRegression': {\n", + " 'model': LogisticRegression(random_state=MODELS_TUNING_SEED),\n", + " 'params': {\n", + " 'penalty': ['l2'],\n", + " 'C' : [0.0001, 0.1, 1, 100],\n", + " 'solver': ['newton-cg', 'lbfgs'],\n", + " 'max_iter': [250],\n", + " }\n", + " },\n", + " 'RandomForestClassifier': {\n", + " 'model': RandomForestClassifier(random_state=MODELS_TUNING_SEED),\n", + " 'params': {\n", + " \"max_depth\": [6, 10],\n", + " \"min_samples_leaf\": [1],\n", + " \"n_estimators\": [50, 100],\n", + " \"max_features\": [0.6]\n", + " }\n", + " },\n", + " 'XGBClassifier': {\n", + " 'model': XGBClassifier(random_state=MODELS_TUNING_SEED, verbosity=0),\n", + " 'params': {\n", + " 'learning_rate': [0.1],\n", + " 'n_estimators': [200],\n", + " 'max_depth': [5, 7],\n", + " 'lambda': [10, 100]\n", + " }\n", + " }\n", + "}" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-12-14T13:03:04.545549Z", + "start_time": "2024-12-14T13:03:04.527336Z" + } + }, + "id": "2ece07ab7e3a9acc", + "outputs": [], + "execution_count": 6 + }, + { + "cell_type": "markdown", + "source": [ + "### Create a config object" + ], + "metadata": { + "collapsed": false + }, + "id": "1090a686532d96f5" + }, + { + "cell_type": "markdown", + "source": [ + "`compute_metrics_with_config` interface requires that your **yaml file** includes the following parameters:\n", + "\n", + "* **dataset_name**: str, a name of your dataset; it will be used to name files with metrics.\n", + "\n", + "* **computation_mode**: str, 'default', 'error_analysis', or 'no_bootstrap'. Name of the computation mode.\n", + "\n", + "* **random_state**: int, a seed to control the randomness of the whole model evaluation pipeline.\n", + "\n", + "* **sensitive_attributes_dct**: dict, a dictionary where keys are sensitive attribute names (including intersectional attributes), and values are disadvantaged values for these attributes. Intersectional attributes must include '&' between sensitive attributes. You do not need to specify disadvantaged values for intersectional groups since they will be derived from disadvantaged values in sensitive_attributes_dct for each separate sensitive attribute in this intersectional pair.\n", + "\n", + "Note that disadvantaged value in a sensitive attribute dictionary must be **the same as in the original dataset**. For example, when distinct values of the _sex_ column in the original dataset are 'F' and 'M', and after pre-processing they became 0 and 1 respectively, you still need to set a disadvantaged value as 'F' or 'M' in the sensitive attribute dictionary." + ], + "metadata": { + "collapsed": false + }, + "id": "d0a03b8f5c5d0ea7" + }, + { + "cell_type": "code", + "source": [ + "ROOT_DIR = os.path.join('docs', 'examples')\n", + "config_yaml_path = os.path.join(ROOT_DIR, 'experiment_config.yaml')\n", + "config_yaml_content = \"\"\"\n", + "dataset_name: COMPAS_Without_Sensitive_Attributes\n", + "computation_mode: no_bootstrap\n", + "random_state: 42\n", + "sensitive_attributes_dct: {'sex': 1, 'race': 'African-American', 'sex&race': None}\n", + "\"\"\"\n", + "\n", + "with open(config_yaml_path, 'w', encoding='utf-8') as f:\n", + " f.write(config_yaml_content)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-12-14T13:03:04.571877Z", + "start_time": "2024-12-14T13:03:04.552978Z" + } + }, + "id": "af22ee06f1e3eb1a", + "outputs": [], + "execution_count": 7 + }, + { + "cell_type": "code", + "source": [ + "config = create_config_obj(config_yaml_path=config_yaml_path)\n", + "SAVE_RESULTS_DIR_PATH = os.path.join(ROOT_DIR, 'results', f'{config.dataset_name}_Metrics_{datetime.now(timezone.utc).strftime(\"%Y%m%d__%H%M%S\")}')" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-12-14T13:03:04.601486Z", + "start_time": "2024-12-14T13:03:04.580723Z" + } + }, + "id": "65181f72484bb92b", + "outputs": [], + "execution_count": 8 + }, + { + "cell_type": "markdown", + "id": "74f57422", + "metadata": {}, + "source": [ + "### Preprocess the dataset and create a BaseFlowDataset class" + ] + }, + { + "cell_type": "markdown", + "id": "eed149cd", + "metadata": {}, + "source": [ + "Based on the BaseDataset class, your **dataset class** should include the following attributes:\n", + "\n", + "* **Obligatory attributes**: dataset, target, features, numerical_columns, categorical_columns\n", + "\n", + "* **Optional attributes**: X_data, y_data, columns_with_nulls\n", + "\n", + "For more details, please refer to the library documentation." + ] + }, + { + "cell_type": "code", + "id": "6c55c6a0", + "metadata": { + "ExecuteTime": { + "end_time": "2024-12-14T13:03:04.642822Z", + "start_time": "2024-12-14T13:03:04.606712Z" + } + }, + "source": [ + "data_loader = CompasWithoutSensitiveAttrsDataset()\n", + "data_loader.X_data[data_loader.X_data.columns[:5]].head()" + ], + "outputs": [ + { + "data": { + "text/plain": [ + " juv_fel_count juv_misd_count juv_other_count priors_count \\\n", + "0 0.0 -2.340451 1.0 -15.010999 \n", + "1 0.0 0.000000 0.0 0.000000 \n", + "2 0.0 0.000000 0.0 0.000000 \n", + "3 0.0 0.000000 0.0 6.000000 \n", + "4 0.0 0.000000 0.0 7.513697 \n", + "\n", + " age_cat_25 - 45 \n", + "0 1 \n", + "1 1 \n", + "2 0 \n", + "3 1 \n", + "4 1 " + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
juv_fel_countjuv_misd_countjuv_other_countpriors_countage_cat_25 - 45
00.0-2.3404511.0-15.0109991
10.00.0000000.00.0000001
20.00.0000000.00.0000000
30.00.0000000.06.0000001
40.00.0000000.07.5136971
\n", + "
" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 9 + }, + { + "cell_type": "code", + "source": [ + "column_transformer = ColumnTransformer(transformers=[\n", + " ('categorical_features', OneHotEncoder(handle_unknown='ignore', sparse_output=False), data_loader.categorical_columns),\n", + " ('numerical_features', StandardScaler(), data_loader.numerical_columns),\n", + "])" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-12-14T13:03:04.667830Z", + "start_time": "2024-12-14T13:03:04.649199Z" + } + }, + "id": "ebbef5eaf9dc0943", + "outputs": [], + "execution_count": 10 + }, + { + "cell_type": "code", + "source": [ + "base_flow_dataset = preprocess_dataset(data_loader=data_loader, \n", + " column_transformer=column_transformer,\n", + " sensitive_attributes_dct=config.sensitive_attributes_dct,\n", + " test_set_fraction=TEST_SET_FRACTION,\n", + " dataset_split_seed=DATASET_SPLIT_SEED)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-12-14T13:03:04.712448Z", + "start_time": "2024-12-14T13:03:04.683336Z" + } + }, + "id": "97ed4609effbf53f", + "outputs": [], + "execution_count": 11 + }, + { + "cell_type": "markdown", + "source": [ + "### Tune models and create a models config for metrics computation" + ], + "metadata": { + "collapsed": false + }, + "id": "d538119a04cb3d80" + }, + { + "cell_type": "code", + "source": [ + "tuned_params_df, models_config = tune_ML_models(models_params_for_tuning, base_flow_dataset, config.dataset_name, n_folds=3)\n", + "tuned_params_df" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-12-14T13:03:07.249834Z", + "start_time": "2024-12-14T13:03:04.743779Z" + } + }, + "id": "782741c190a4690b", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024/12/14, 15:03:04: Tuning DecisionTreeClassifier...\n", + "2024/12/14, 15:03:06: Tuning for DecisionTreeClassifier is finished [F1 score = 0.6554846983071245, Accuracy = 0.6575048862828714]\n", + "\n", + "2024/12/14, 15:03:06: Tuning LogisticRegression...\n", + "2024/12/14, 15:03:06: Tuning for LogisticRegression is finished [F1 score = 0.6483823116804864, Accuracy = 0.6520611566087312]\n", + "\n", + "2024/12/14, 15:03:06: Tuning RandomForestClassifier...\n", + "2024/12/14, 15:03:06: Tuning for RandomForestClassifier is finished [F1 score = 0.6582739175359259, Accuracy = 0.6601120816372682]\n", + "\n", + "2024/12/14, 15:03:06: Tuning XGBClassifier...\n", + "2024/12/14, 15:03:07: Tuning for XGBClassifier is finished [F1 score = 0.6649018515640065, Accuracy = 0.6669791262841636]\n", + "\n" + ] + }, + { + "data": { + "text/plain": [ + " Dataset_Name Model_Name F1_Score \\\n", + "0 COMPAS_Without_Sensitive_Attributes DecisionTreeClassifier 0.655485 \n", + "1 COMPAS_Without_Sensitive_Attributes LogisticRegression 0.648382 \n", + "2 COMPAS_Without_Sensitive_Attributes RandomForestClassifier 0.658274 \n", + "3 COMPAS_Without_Sensitive_Attributes XGBClassifier 0.664902 \n", + "\n", + " Accuracy_Score Model_Best_Params \n", + "0 0.657505 {'criterion': 'gini', 'max_depth': 20, 'max_fe... \n", + "1 0.652061 {'C': 1, 'max_iter': 250, 'penalty': 'l2', 'so... \n", + "2 0.660112 {'max_depth': 10, 'max_features': 0.6, 'min_sa... \n", + "3 0.666979 {'lambda': 100, 'learning_rate': 0.1, 'max_dep... " + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Dataset_NameModel_NameF1_ScoreAccuracy_ScoreModel_Best_Params
0COMPAS_Without_Sensitive_AttributesDecisionTreeClassifier0.6554850.657505{'criterion': 'gini', 'max_depth': 20, 'max_fe...
1COMPAS_Without_Sensitive_AttributesLogisticRegression0.6483820.652061{'C': 1, 'max_iter': 250, 'penalty': 'l2', 'so...
2COMPAS_Without_Sensitive_AttributesRandomForestClassifier0.6582740.660112{'max_depth': 10, 'max_features': 0.6, 'min_sa...
3COMPAS_Without_Sensitive_AttributesXGBClassifier0.6649020.666979{'lambda': 100, 'learning_rate': 0.1, 'max_dep...
\n", + "
" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 12 + }, + { + "cell_type": "code", + "source": [ + "now = datetime.now(timezone.utc)\n", + "date_time_str = now.strftime(\"%Y%m%d__%H%M%S\")\n", + "tuned_df_path = os.path.join(ROOT_DIR, 'results', 'models_tuning', f'tuning_results_{config.dataset_name}_{date_time_str}.csv')\n", + "tuned_params_df.to_csv(tuned_df_path, sep=\",\", columns=tuned_params_df.columns, float_format=\"%.4f\", index=False)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-12-14T13:03:07.325010Z", + "start_time": "2024-12-14T13:03:07.299713Z" + } + }, + "id": "21ccc879c5c3e215", + "outputs": [], + "execution_count": 13 + }, + { + "cell_type": "markdown", + "source": [ + "Create models_config from the saved tuned_params_df for higher reliability" + ], + "metadata": { + "collapsed": false + }, + "id": "2da2057228e94ae5" + }, + { + "cell_type": "code", + "source": [ + "models_config = create_models_config_from_tuned_params_df(models_params_for_tuning, tuned_df_path)\n", + "pprint(models_config)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-12-14T13:03:07.406748Z", + "start_time": "2024-12-14T13:03:07.380171Z" + } + }, + "id": "3b15f202741fa2ae", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'DecisionTreeClassifier': DecisionTreeClassifier(max_depth=20, max_features='sqrt', min_samples_split=0.1,\n", + " random_state=42),\n", + " 'LogisticRegression': LogisticRegression(C=1, max_iter=250, random_state=42, solver='newton-cg'),\n", + " 'RandomForestClassifier': RandomForestClassifier(max_depth=10, max_features=0.6, random_state=42),\n", + " 'XGBClassifier': XGBClassifier(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bynode=None,\n", + " colsample_bytree=None, device=None, early_stopping_rounds=None,\n", + " enable_categorical=False, eval_metric=None, feature_types=None,\n", + " gamma=None, grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, lambda=100, learning_rate=0.1,\n", + " max_bin=None, max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=5, max_leaves=None,\n", + " min_child_weight=None, missing=nan, monotone_constraints=None,\n", + " multi_strategy=None, n_estimators=200, n_jobs=None,\n", + " num_parallel_tree=None, ...)}\n" + ] + } + ], + "execution_count": 14 + }, + { + "cell_type": "markdown", + "id": "f445b64a", + "metadata": {}, + "source": [ + "## Subgroup Metric Computation" + ] + }, + { + "cell_type": "markdown", + "id": "c3530f06", + "metadata": {}, + "source": [ + "After that we need to input the _BaseFlowDataset_ object, models config, and config yaml to a metric computation interface and execute it. The interface uses subgroup analyzers to compute different sets of metrics for each privileged and disadvantaged group. As for now, our library supports **Subgroup Variance Analyzer** and **Subgroup Error Analyzer**, but it is easily extensible to any other analyzers. When the variance and error analyzers complete metric computation, their metrics are combined, returned in a matrix format, and stored in a file if defined." + ] + }, + { + "cell_type": "code", + "id": "197eadaa", + "metadata": { + "ExecuteTime": { + "end_time": "2024-12-14T13:03:08.416589Z", + "start_time": "2024-12-14T13:03:07.470832Z" + } + }, + "source": [ + "metrics_dct = compute_metrics_with_config(base_flow_dataset, config, models_config, SAVE_RESULTS_DIR_PATH,\n", + " notebook_logs_stdout=True,\n", + " with_predict_proba=False)" + ], + "outputs": [ + { + "data": { + "text/plain": [ + "Analyze multiple models: 0%| | 0/4 [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Metricoverallsex_privsex_disrace_privrace_dis
0TPR0.6157110.4933330.6388890.5170070.660494
1TNR0.7230770.8308820.6904230.7865170.669811
2PPV0.6415930.6166670.6454080.5714290.670846
3FNR0.3842890.5066670.3611110.4829930.339506
4FPR0.2769230.1691180.3095770.2134830.330189
5Accuracy0.6751890.7109000.6662720.6908210.665109
6F10.6283860.5481480.6421320.5428570.665630
7Selection-Rate0.4280300.2843600.4639050.3212560.496885
8Sample_Size1056.000000211.000000845.000000414.000000642.000000
\n", + "" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 16 + }, + { + "cell_type": "markdown", + "id": "a7ff67e9", + "metadata": {}, + "source": [ + "## Disparity Metric Composition" + ] + }, + { + "cell_type": "markdown", + "id": "274c97e2", + "metadata": {}, + "source": [ + "To compose disparity metrics, the Metric Composer should be applied. **Metric Composer** is responsible for the second stage of the model audit. Currently, it computes our custom error disparity, stability disparity, and uncertainty disparity metrics, but extending it for new disparity metrics is very simple. We noticed that more and more disparity metrics have appeared during the last decade, but most of them are based on the same group specific metrics. Hence, such a separation of group specific and disparity metrics computation allows us to experiment with different combinations of group specific metrics and avoid group metrics recomputation for a new set of disparity metrics." + ] + }, + { + "cell_type": "code", + "id": "f94a20dc", + "metadata": { + "ExecuteTime": { + "end_time": "2024-12-14T13:03:08.528522Z", + "start_time": "2024-12-14T13:03:08.504889Z" + } + }, + "source": [ + "models_metrics_dct = read_model_metric_dfs(SAVE_RESULTS_DIR_PATH, model_names=list(models_config.keys()))" + ], + "outputs": [], + "execution_count": 17 + }, + { + "cell_type": "code", + "id": "b04d06cf", + "metadata": { + "ExecuteTime": { + "end_time": "2024-12-14T13:03:08.565733Z", + "start_time": "2024-12-14T13:03:08.545162Z" + } + }, + "source": [ + "metrics_composer = MetricsComposer(models_metrics_dct, config.sensitive_attributes_dct)" + ], + "outputs": [], + "execution_count": 18 + }, + { + "cell_type": "markdown", + "id": "e1a23ece", + "metadata": {}, + "source": [ + "Compute composed metrics" + ] + }, + { + "cell_type": "code", + "id": "be6ace22", + "metadata": { + "ExecuteTime": { + "end_time": "2024-12-14T13:03:08.607374Z", + "start_time": "2024-12-14T13:03:08.575786Z" + } + }, + "source": [ + "models_composed_metrics_df = metrics_composer.compose_metrics()" + ], + "outputs": [], + "execution_count": 19 + }, + { + "cell_type": "code", + "source": [ + "models_composed_metrics_df" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-12-14T13:03:08.677252Z", + "start_time": "2024-12-14T13:03:08.653618Z" + } + }, + "id": "a286da0406c6401d", + "outputs": [ + { + "data": { + "text/plain": [ + " Metric sex race sex&race \\\n", + "0 Accuracy_Difference -0.044628 -0.025712 -0.052464 \n", + "1 Equalized_Odds_FNR -0.145556 -0.143487 -0.139463 \n", + "2 Equalized_Odds_FPR 0.140459 0.116706 0.172376 \n", + "3 Statistical_Parity_Difference 0.179545 0.175629 0.212669 \n", + "4 Disparate_Impact 1.631400 1.546694 1.661918 \n", + "5 Equalized_Odds_TNR -0.140459 -0.116706 -0.172376 \n", + "6 Equalized_Odds_TPR 0.145556 0.143487 0.139463 \n", + "7 Accuracy_Difference -0.004364 0.000226 -0.016544 \n", + "8 Equalized_Odds_FNR -0.187374 -0.264613 -0.242125 \n", + "9 Equalized_Odds_FPR 0.100583 0.156102 0.187169 \n", + "10 Statistical_Parity_Difference 0.172439 0.249560 0.267602 \n", + "11 Disparate_Impact 1.596469 1.906296 1.914016 \n", + "12 Equalized_Odds_TNR -0.100583 -0.156102 -0.187169 \n", + "13 Equalized_Odds_TPR 0.187374 0.264613 0.242125 \n", + "14 Accuracy_Difference -0.032822 -0.018872 -0.024306 \n", + "15 Equalized_Odds_FNR -0.071010 -0.167486 -0.155195 \n", + "16 Equalized_Odds_FPR 0.084321 0.132888 0.146995 \n", + "17 Statistical_Parity_Difference 0.125034 0.205224 0.218215 \n", + "18 Disparate_Impact 1.361401 1.638817 1.648481 \n", + "19 Equalized_Odds_TNR -0.084321 -0.132888 -0.146995 \n", + "20 Equalized_Odds_TPR 0.071010 0.167486 0.155195 \n", + "21 Accuracy_Difference 0.005093 0.008398 -0.005309 \n", + "22 Equalized_Odds_FNR -0.126869 -0.168115 -0.153447 \n", + "23 Equalized_Odds_FPR 0.057137 0.086602 0.111180 \n", + "24 Statistical_Parity_Difference 0.127401 0.176554 0.195473 \n", + "25 Disparate_Impact 1.368242 1.518393 1.558798 \n", + "26 Equalized_Odds_TNR -0.057137 -0.086602 -0.111180 \n", + "27 Equalized_Odds_TPR 0.126869 0.168115 0.153447 \n", + "\n", + " Model_Name \n", + "0 DecisionTreeClassifier \n", + "1 DecisionTreeClassifier \n", + "2 DecisionTreeClassifier \n", + "3 DecisionTreeClassifier \n", + "4 DecisionTreeClassifier \n", + "5 DecisionTreeClassifier \n", + "6 DecisionTreeClassifier \n", + "7 LogisticRegression \n", + "8 LogisticRegression \n", + "9 LogisticRegression \n", + "10 LogisticRegression \n", + "11 LogisticRegression \n", + "12 LogisticRegression \n", + "13 LogisticRegression \n", + "14 RandomForestClassifier \n", + "15 RandomForestClassifier \n", + "16 RandomForestClassifier \n", + "17 RandomForestClassifier \n", + "18 RandomForestClassifier \n", + "19 RandomForestClassifier \n", + "20 RandomForestClassifier \n", + "21 XGBClassifier \n", + "22 XGBClassifier \n", + "23 XGBClassifier \n", + "24 XGBClassifier \n", + "25 XGBClassifier \n", + "26 XGBClassifier \n", + "27 XGBClassifier " + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Metricsexracesex&raceModel_Name
0Accuracy_Difference-0.044628-0.025712-0.052464DecisionTreeClassifier
1Equalized_Odds_FNR-0.145556-0.143487-0.139463DecisionTreeClassifier
2Equalized_Odds_FPR0.1404590.1167060.172376DecisionTreeClassifier
3Statistical_Parity_Difference0.1795450.1756290.212669DecisionTreeClassifier
4Disparate_Impact1.6314001.5466941.661918DecisionTreeClassifier
5Equalized_Odds_TNR-0.140459-0.116706-0.172376DecisionTreeClassifier
6Equalized_Odds_TPR0.1455560.1434870.139463DecisionTreeClassifier
7Accuracy_Difference-0.0043640.000226-0.016544LogisticRegression
8Equalized_Odds_FNR-0.187374-0.264613-0.242125LogisticRegression
9Equalized_Odds_FPR0.1005830.1561020.187169LogisticRegression
10Statistical_Parity_Difference0.1724390.2495600.267602LogisticRegression
11Disparate_Impact1.5964691.9062961.914016LogisticRegression
12Equalized_Odds_TNR-0.100583-0.156102-0.187169LogisticRegression
13Equalized_Odds_TPR0.1873740.2646130.242125LogisticRegression
14Accuracy_Difference-0.032822-0.018872-0.024306RandomForestClassifier
15Equalized_Odds_FNR-0.071010-0.167486-0.155195RandomForestClassifier
16Equalized_Odds_FPR0.0843210.1328880.146995RandomForestClassifier
17Statistical_Parity_Difference0.1250340.2052240.218215RandomForestClassifier
18Disparate_Impact1.3614011.6388171.648481RandomForestClassifier
19Equalized_Odds_TNR-0.084321-0.132888-0.146995RandomForestClassifier
20Equalized_Odds_TPR0.0710100.1674860.155195RandomForestClassifier
21Accuracy_Difference0.0050930.008398-0.005309XGBClassifier
22Equalized_Odds_FNR-0.126869-0.168115-0.153447XGBClassifier
23Equalized_Odds_FPR0.0571370.0866020.111180XGBClassifier
24Statistical_Parity_Difference0.1274010.1765540.195473XGBClassifier
25Disparate_Impact1.3682421.5183931.558798XGBClassifier
26Equalized_Odds_TNR-0.057137-0.086602-0.111180XGBClassifier
27Equalized_Odds_TPR0.1268690.1681150.153447XGBClassifier
\n", + "
" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 20 + }, + { + "cell_type": "code", + "id": "2326c129", + "metadata": { + "ExecuteTime": { + "end_time": "2024-12-14T13:03:08.707240Z", + "start_time": "2024-12-14T13:03:08.705849Z" + } + }, + "source": [], + "outputs": [], + "execution_count": null + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/examples/experiment_config.yaml b/docs/examples/experiment_config.yaml index 6a134740..b0679c67 100644 --- a/docs/examples/experiment_config.yaml +++ b/docs/examples/experiment_config.yaml @@ -1,6 +1,5 @@ +dataset_name: COMPAS_Without_Sensitive_Attributes +computation_mode: no_bootstrap random_state: 42 -dataset_name: diabetes -bootstrap_fraction: 0.8 -n_estimators: 10 # Better to input the higher number of estimators than 100; this is only for this use case example -sensitive_attributes_dct: {'Gender': 'Female'} +sensitive_attributes_dct: {'sex': 1, 'race': 'African-American', 'sex&race': None} diff --git a/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130143/Metrics_COMPAS_Without_Sensitive_Attributes_DecisionTreeClassifier_1_Estimators_20241214__130145.csv b/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130143/Metrics_COMPAS_Without_Sensitive_Attributes_DecisionTreeClassifier_1_Estimators_20241214__130145.csv new file mode 100644 index 00000000..1acd72f2 --- /dev/null +++ b/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130143/Metrics_COMPAS_Without_Sensitive_Attributes_DecisionTreeClassifier_1_Estimators_20241214__130145.csv @@ -0,0 +1,10 @@ +Metric,overall,sex_priv,sex_dis,race_priv,race_dis,sex&race_priv,sex&race_dis,Model_Name,Virny_Random_State,Model_Params,Runtime_in_Mins +TPR,0.6157112526539278,0.49333333333333335,0.6388888888888888,0.5170068027210885,0.6604938271604939,0.5319148936170213,0.6713780918727915,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.001245 +TNR,0.7230769230769231,0.8308823529411765,0.6904231625835189,0.7865168539325843,0.6698113207547169,0.7958579881656804,0.6234817813765182,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.001245 +PPV,0.6415929203539823,0.6166666666666667,0.6454081632653061,0.5714285714285714,0.670846394984326,0.591715976331361,0.6713780918727915,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.001245 +FNR,0.3842887473460722,0.5066666666666667,0.3611111111111111,0.48299319727891155,0.3395061728395062,0.46808510638297873,0.3286219081272085,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.001245 +FPR,0.27692307692307694,0.16911764705882354,0.30957683741648107,0.21348314606741572,0.330188679245283,0.20414201183431951,0.3765182186234818,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.001245 +Accuracy,0.6751893939393939,0.7109004739336493,0.6662721893491125,0.6908212560386473,0.6651090342679128,0.7015209125475285,0.6490566037735849,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.001245 +F1,0.628385698808234,0.5481481481481482,0.6421319796954315,0.5428571428571428,0.6656298600311042,0.5602240896358543,0.6713780918727915,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.001245 +Selection-Rate,0.42803030303030304,0.2843601895734597,0.463905325443787,0.321256038647343,0.4968847352024922,0.32129277566539927,0.5339622641509434,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.001245 +Sample_Size,1056.0,211.0,845.0,414.0,642.0,526.0,530.0,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.001245 diff --git a/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130143/Metrics_COMPAS_Without_Sensitive_Attributes_LogisticRegression_1_Estimators_20241214__130145.csv b/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130143/Metrics_COMPAS_Without_Sensitive_Attributes_LogisticRegression_1_Estimators_20241214__130145.csv new file mode 100644 index 00000000..b34d607c --- /dev/null +++ b/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130143/Metrics_COMPAS_Without_Sensitive_Attributes_LogisticRegression_1_Estimators_20241214__130145.csv @@ -0,0 +1,10 @@ +Metric,overall,sex_priv,sex_dis,race_priv,race_dis,sex&race_priv,sex&race_dis,Model_Name,Virny_Random_State,Model_Params,Runtime_in_Mins +TPR,0.6242038216560509,0.4666666666666667,0.6540404040404041,0.4421768707482993,0.7067901234567902,0.4787234042553192,0.7208480565371025,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.00319945 +TNR,0.7316239316239316,0.8088235294117647,0.7082405345211581,0.8164794007490637,0.660377358490566,0.8106508875739645,0.6234817813765182,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.00319945 +PPV,0.6518847006651884,0.5737704918032787,0.6641025641025641,0.5701754385964912,0.6795252225519288,0.5844155844155844,0.6868686868686869,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.00319945 +FNR,0.37579617834394907,0.5333333333333333,0.34595959595959597,0.5578231292517006,0.2932098765432099,0.5212765957446809,0.2791519434628975,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.00319945 +FPR,0.26837606837606837,0.19117647058823528,0.29175946547884185,0.18352059925093633,0.33962264150943394,0.1893491124260355,0.3765182186234818,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.00319945 +Accuracy,0.6837121212121212,0.6872037914691943,0.6828402366863905,0.6835748792270532,0.6838006230529595,0.6920152091254753,0.6754716981132075,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.00319945 +F1,0.6377440347071583,0.5147058823529411,0.6590330788804071,0.49808429118773945,0.6928895612708018,0.5263157894736842,0.7034482758620689,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.00319945 +Selection-Rate,0.4270833333333333,0.2890995260663507,0.46153846153846156,0.2753623188405797,0.5249221183800623,0.29277566539923955,0.560377358490566,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.00319945 +Sample_Size,1056.0,211.0,845.0,414.0,642.0,526.0,530.0,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.00319945 diff --git a/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130143/Metrics_COMPAS_Without_Sensitive_Attributes_RandomForestClassifier_1_Estimators_20241214__130145.csv b/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130143/Metrics_COMPAS_Without_Sensitive_Attributes_RandomForestClassifier_1_Estimators_20241214__130145.csv new file mode 100644 index 00000000..b4a90a0b --- /dev/null +++ b/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130143/Metrics_COMPAS_Without_Sensitive_Attributes_RandomForestClassifier_1_Estimators_20241214__130145.csv @@ -0,0 +1,10 @@ +Metric,overall,sex_priv,sex_dis,race_priv,race_dis,sex&race_priv,sex&race_dis,Model_Name,Virny_Random_State,Model_Params,Runtime_in_Mins +TPR,0.673036093418259,0.6133333333333333,0.6843434343434344,0.5578231292517006,0.7253086419753086,0.5797872340425532,0.734982332155477,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005280633333333334 +TNR,0.7367521367521368,0.8014705882352942,0.7171492204899778,0.8089887640449438,0.6761006289308176,0.7988165680473372,0.6518218623481782,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005280633333333334 +PPV,0.673036093418259,0.6301369863013698,0.6809045226130653,0.6165413533834586,0.6952662721893491,0.615819209039548,0.7074829931972789,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005280633333333334 +FNR,0.32696390658174096,0.38666666666666666,0.31565656565656564,0.4421768707482993,0.27469135802469136,0.42021276595744683,0.26501766784452296,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005280633333333334 +FPR,0.26324786324786326,0.19852941176470587,0.2828507795100223,0.19101123595505617,0.3238993710691824,0.20118343195266272,0.3481781376518219,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005280633333333334 +Accuracy,0.7083333333333334,0.7345971563981043,0.7017751479289941,0.7198067632850241,0.7009345794392523,0.720532319391635,0.6962264150943396,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005280633333333334 +F1,0.673036093418259,0.6216216216216216,0.6826196473551638,0.5857142857142857,0.7099697885196374,0.5972602739726027,0.7209705372616985,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005280633333333334 +Selection-Rate,0.4460227272727273,0.3459715639810427,0.4710059171597633,0.321256038647343,0.5264797507788161,0.3365019011406844,0.5547169811320755,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005280633333333334 +Sample_Size,1056.0,211.0,845.0,414.0,642.0,526.0,530.0,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005280633333333334 diff --git a/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130143/Metrics_COMPAS_Without_Sensitive_Attributes_XGBClassifier_1_Estimators_20241214__130145.csv b/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130143/Metrics_COMPAS_Without_Sensitive_Attributes_XGBClassifier_1_Estimators_20241214__130145.csv new file mode 100644 index 00000000..1cd21a91 --- /dev/null +++ b/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130143/Metrics_COMPAS_Without_Sensitive_Attributes_XGBClassifier_1_Estimators_20241214__130145.csv @@ -0,0 +1,10 @@ +Metric,overall,sex_priv,sex_dis,race_priv,race_dis,sex&race_priv,sex&race_dis,Model_Name,Virny_Random_State,Model_Params,Runtime_in_Mins +TPR,0.6666666666666666,0.56,0.6868686868686869,0.5510204081632653,0.7191358024691358,0.574468085106383,0.7279151943462897,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.007809566666666667 +TNR,0.7282051282051282,0.7720588235294118,0.7149220489977728,0.7752808988764045,0.6886792452830188,0.7751479289940828,0.6639676113360324,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.007809566666666667 +PPV,0.6638477801268499,0.5753424657534246,0.68,0.574468085106383,0.7018072289156626,0.5869565217391305,0.71280276816609,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.007809566666666667 +FNR,0.3333333333333333,0.44,0.31313131313131315,0.4489795918367347,0.2808641975308642,0.425531914893617,0.27208480565371024,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.007809566666666667 +FPR,0.2717948717948718,0.22794117647058823,0.28507795100222716,0.2247191011235955,0.3113207547169811,0.22485207100591717,0.3360323886639676,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.007809566666666667 +Accuracy,0.7007575757575758,0.6966824644549763,0.7017751479289941,0.6956521739130435,0.7040498442367601,0.7034220532319392,0.6981132075471698,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.007809566666666667 +F1,0.6652542372881356,0.5675675675675675,0.6834170854271356,0.5625,0.7103658536585366,0.5806451612903226,0.7202797202797203,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.007809566666666667 +Selection-Rate,0.4479166666666667,0.3459715639810427,0.47337278106508873,0.34057971014492755,0.5171339563862928,0.34980988593155893,0.5452830188679245,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.007809566666666667 +Sample_Size,1056.0,211.0,845.0,414.0,642.0,526.0,530.0,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.007809566666666667 diff --git a/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130304/Metrics_COMPAS_Without_Sensitive_Attributes_DecisionTreeClassifier_1_Estimators_20241214__130307.csv b/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130304/Metrics_COMPAS_Without_Sensitive_Attributes_DecisionTreeClassifier_1_Estimators_20241214__130307.csv new file mode 100644 index 00000000..dbeb7662 --- /dev/null +++ b/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130304/Metrics_COMPAS_Without_Sensitive_Attributes_DecisionTreeClassifier_1_Estimators_20241214__130307.csv @@ -0,0 +1,10 @@ +Metric,overall,sex_priv,sex_dis,race_priv,race_dis,sex&race_priv,sex&race_dis,Model_Name,Virny_Random_State,Model_Params,Runtime_in_Mins +TPR,0.6157112526539278,0.49333333333333335,0.6388888888888888,0.5170068027210885,0.6604938271604939,0.5319148936170213,0.6713780918727915,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.0013096833333333332 +TNR,0.7230769230769231,0.8308823529411765,0.6904231625835189,0.7865168539325843,0.6698113207547169,0.7958579881656804,0.6234817813765182,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.0013096833333333332 +PPV,0.6415929203539823,0.6166666666666667,0.6454081632653061,0.5714285714285714,0.670846394984326,0.591715976331361,0.6713780918727915,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.0013096833333333332 +FNR,0.3842887473460722,0.5066666666666667,0.3611111111111111,0.48299319727891155,0.3395061728395062,0.46808510638297873,0.3286219081272085,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.0013096833333333332 +FPR,0.27692307692307694,0.16911764705882354,0.30957683741648107,0.21348314606741572,0.330188679245283,0.20414201183431951,0.3765182186234818,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.0013096833333333332 +Accuracy,0.6751893939393939,0.7109004739336493,0.6662721893491125,0.6908212560386473,0.6651090342679128,0.7015209125475285,0.6490566037735849,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.0013096833333333332 +F1,0.628385698808234,0.5481481481481482,0.6421319796954315,0.5428571428571428,0.6656298600311042,0.5602240896358543,0.6713780918727915,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.0013096833333333332 +Selection-Rate,0.42803030303030304,0.2843601895734597,0.463905325443787,0.321256038647343,0.4968847352024922,0.32129277566539927,0.5339622641509434,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.0013096833333333332 +Sample_Size,1056.0,211.0,845.0,414.0,642.0,526.0,530.0,DecisionTreeClassifier,42,"{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': 42, 'splitter': 'best'}",0.0013096833333333332 diff --git a/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130304/Metrics_COMPAS_Without_Sensitive_Attributes_LogisticRegression_1_Estimators_20241214__130307.csv b/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130304/Metrics_COMPAS_Without_Sensitive_Attributes_LogisticRegression_1_Estimators_20241214__130307.csv new file mode 100644 index 00000000..15d95060 --- /dev/null +++ b/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130304/Metrics_COMPAS_Without_Sensitive_Attributes_LogisticRegression_1_Estimators_20241214__130307.csv @@ -0,0 +1,10 @@ +Metric,overall,sex_priv,sex_dis,race_priv,race_dis,sex&race_priv,sex&race_dis,Model_Name,Virny_Random_State,Model_Params,Runtime_in_Mins +TPR,0.6242038216560509,0.4666666666666667,0.6540404040404041,0.4421768707482993,0.7067901234567902,0.4787234042553192,0.7208480565371025,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.0014195333333333333 +TNR,0.7316239316239316,0.8088235294117647,0.7082405345211581,0.8164794007490637,0.660377358490566,0.8106508875739645,0.6234817813765182,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.0014195333333333333 +PPV,0.6518847006651884,0.5737704918032787,0.6641025641025641,0.5701754385964912,0.6795252225519288,0.5844155844155844,0.6868686868686869,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.0014195333333333333 +FNR,0.37579617834394907,0.5333333333333333,0.34595959595959597,0.5578231292517006,0.2932098765432099,0.5212765957446809,0.2791519434628975,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.0014195333333333333 +FPR,0.26837606837606837,0.19117647058823528,0.29175946547884185,0.18352059925093633,0.33962264150943394,0.1893491124260355,0.3765182186234818,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.0014195333333333333 +Accuracy,0.6837121212121212,0.6872037914691943,0.6828402366863905,0.6835748792270532,0.6838006230529595,0.6920152091254753,0.6754716981132075,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.0014195333333333333 +F1,0.6377440347071583,0.5147058823529411,0.6590330788804071,0.49808429118773945,0.6928895612708018,0.5263157894736842,0.7034482758620689,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.0014195333333333333 +Selection-Rate,0.4270833333333333,0.2890995260663507,0.46153846153846156,0.2753623188405797,0.5249221183800623,0.29277566539923955,0.560377358490566,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.0014195333333333333 +Sample_Size,1056.0,211.0,845.0,414.0,642.0,526.0,530.0,LogisticRegression,42,"{'C': 1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 250, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}",0.0014195333333333333 diff --git a/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130304/Metrics_COMPAS_Without_Sensitive_Attributes_RandomForestClassifier_1_Estimators_20241214__130307.csv b/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130304/Metrics_COMPAS_Without_Sensitive_Attributes_RandomForestClassifier_1_Estimators_20241214__130307.csv new file mode 100644 index 00000000..a95ee68d --- /dev/null +++ b/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130304/Metrics_COMPAS_Without_Sensitive_Attributes_RandomForestClassifier_1_Estimators_20241214__130307.csv @@ -0,0 +1,10 @@ +Metric,overall,sex_priv,sex_dis,race_priv,race_dis,sex&race_priv,sex&race_dis,Model_Name,Virny_Random_State,Model_Params,Runtime_in_Mins +TPR,0.673036093418259,0.6133333333333333,0.6843434343434344,0.5578231292517006,0.7253086419753086,0.5797872340425532,0.734982332155477,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005975766666666666 +TNR,0.7367521367521368,0.8014705882352942,0.7171492204899778,0.8089887640449438,0.6761006289308176,0.7988165680473372,0.6518218623481782,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005975766666666666 +PPV,0.673036093418259,0.6301369863013698,0.6809045226130653,0.6165413533834586,0.6952662721893491,0.615819209039548,0.7074829931972789,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005975766666666666 +FNR,0.32696390658174096,0.38666666666666666,0.31565656565656564,0.4421768707482993,0.27469135802469136,0.42021276595744683,0.26501766784452296,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005975766666666666 +FPR,0.26324786324786326,0.19852941176470587,0.2828507795100223,0.19101123595505617,0.3238993710691824,0.20118343195266272,0.3481781376518219,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005975766666666666 +Accuracy,0.7083333333333334,0.7345971563981043,0.7017751479289941,0.7198067632850241,0.7009345794392523,0.720532319391635,0.6962264150943396,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005975766666666666 +F1,0.673036093418259,0.6216216216216216,0.6826196473551638,0.5857142857142857,0.7099697885196374,0.5972602739726027,0.7209705372616985,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005975766666666666 +Selection-Rate,0.4460227272727273,0.3459715639810427,0.4710059171597633,0.321256038647343,0.5264797507788161,0.3365019011406844,0.5547169811320755,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005975766666666666 +Sample_Size,1056.0,211.0,845.0,414.0,642.0,526.0,530.0,RandomForestClassifier,42,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 0.6, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}",0.005975766666666666 diff --git a/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130304/Metrics_COMPAS_Without_Sensitive_Attributes_XGBClassifier_1_Estimators_20241214__130307.csv b/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130304/Metrics_COMPAS_Without_Sensitive_Attributes_XGBClassifier_1_Estimators_20241214__130307.csv new file mode 100644 index 00000000..7904f20b --- /dev/null +++ b/docs/examples/results/COMPAS_Without_Sensitive_Attributes_Metrics_20241214__130304/Metrics_COMPAS_Without_Sensitive_Attributes_XGBClassifier_1_Estimators_20241214__130307.csv @@ -0,0 +1,10 @@ +Metric,overall,sex_priv,sex_dis,race_priv,race_dis,sex&race_priv,sex&race_dis,Model_Name,Virny_Random_State,Model_Params,Runtime_in_Mins +TPR,0.6666666666666666,0.56,0.6868686868686869,0.5510204081632653,0.7191358024691358,0.574468085106383,0.7279151943462897,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.006558716666666667 +TNR,0.7282051282051282,0.7720588235294118,0.7149220489977728,0.7752808988764045,0.6886792452830188,0.7751479289940828,0.6639676113360324,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.006558716666666667 +PPV,0.6638477801268499,0.5753424657534246,0.68,0.574468085106383,0.7018072289156626,0.5869565217391305,0.71280276816609,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.006558716666666667 +FNR,0.3333333333333333,0.44,0.31313131313131315,0.4489795918367347,0.2808641975308642,0.425531914893617,0.27208480565371024,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.006558716666666667 +FPR,0.2717948717948718,0.22794117647058823,0.28507795100222716,0.2247191011235955,0.3113207547169811,0.22485207100591717,0.3360323886639676,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.006558716666666667 +Accuracy,0.7007575757575758,0.6966824644549763,0.7017751479289941,0.6956521739130435,0.7040498442367601,0.7034220532319392,0.6981132075471698,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.006558716666666667 +F1,0.6652542372881356,0.5675675675675675,0.6834170854271356,0.5625,0.7103658536585366,0.5806451612903226,0.7202797202797203,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.006558716666666667 +Selection-Rate,0.4479166666666667,0.3459715639810427,0.47337278106508873,0.34057971014492755,0.5171339563862928,0.34980988593155893,0.5452830188679245,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.006558716666666667 +Sample_Size,1056.0,211.0,845.0,414.0,642.0,526.0,530.0,XGBClassifier,42,"{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': 0.1, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': 5, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 200, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': 0, 'lambda': 100}",0.006558716666666667 diff --git a/docs/examples/results/models_tuning/tuning_results_COMPAS_Without_Sensitive_Attributes_20241214__130145.csv b/docs/examples/results/models_tuning/tuning_results_COMPAS_Without_Sensitive_Attributes_20241214__130145.csv new file mode 100644 index 00000000..843621ef --- /dev/null +++ b/docs/examples/results/models_tuning/tuning_results_COMPAS_Without_Sensitive_Attributes_20241214__130145.csv @@ -0,0 +1,5 @@ +Dataset_Name,Model_Name,F1_Score,Accuracy_Score,Model_Best_Params +COMPAS_Without_Sensitive_Attributes,DecisionTreeClassifier,0.6555,0.6575,"{'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'min_samples_split': 0.1}" +COMPAS_Without_Sensitive_Attributes,LogisticRegression,0.6484,0.6521,"{'C': 1, 'max_iter': 250, 'penalty': 'l2', 'solver': 'newton-cg'}" +COMPAS_Without_Sensitive_Attributes,RandomForestClassifier,0.6583,0.6601,"{'max_depth': 10, 'max_features': 0.6, 'min_samples_leaf': 1, 'n_estimators': 100}" +COMPAS_Without_Sensitive_Attributes,XGBClassifier,0.6649,0.6670,"{'lambda': 100, 'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 200}" diff --git a/docs/examples/results/models_tuning/tuning_results_COMPAS_Without_Sensitive_Attributes_20241214__130307.csv b/docs/examples/results/models_tuning/tuning_results_COMPAS_Without_Sensitive_Attributes_20241214__130307.csv new file mode 100644 index 00000000..843621ef --- /dev/null +++ b/docs/examples/results/models_tuning/tuning_results_COMPAS_Without_Sensitive_Attributes_20241214__130307.csv @@ -0,0 +1,5 @@ +Dataset_Name,Model_Name,F1_Score,Accuracy_Score,Model_Best_Params +COMPAS_Without_Sensitive_Attributes,DecisionTreeClassifier,0.6555,0.6575,"{'criterion': 'gini', 'max_depth': 20, 'max_features': 'sqrt', 'min_samples_split': 0.1}" +COMPAS_Without_Sensitive_Attributes,LogisticRegression,0.6484,0.6521,"{'C': 1, 'max_iter': 250, 'penalty': 'l2', 'solver': 'newton-cg'}" +COMPAS_Without_Sensitive_Attributes,RandomForestClassifier,0.6583,0.6601,"{'max_depth': 10, 'max_features': 0.6, 'min_samples_leaf': 1, 'n_estimators': 100}" +COMPAS_Without_Sensitive_Attributes,XGBClassifier,0.6649,0.6670,"{'lambda': 100, 'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 200}" diff --git a/virny/analyzers/abstract_overall_variance_analyzer.py b/virny/analyzers/abstract_overall_variance_analyzer.py index 9454b6a2..feae0310 100644 --- a/virny/analyzers/abstract_overall_variance_analyzer.py +++ b/virny/analyzers/abstract_overall_variance_analyzer.py @@ -51,8 +51,8 @@ class AbstractOverallVarianceAnalyzer(metaclass=ABCMeta): def __init__(self, base_model, base_model_name: str, bootstrap_fraction: float, X_train: pd.DataFrame, y_train: pd.DataFrame, X_test: pd.DataFrame, y_test: pd.DataFrame, - dataset_name: str, n_estimators: int, random_state: int = None, with_predict_proba: bool = True, - notebook_logs_stdout: bool = False, verbose: int = 0): + dataset_name: str, n_estimators: int, random_state: int = None, computation_mode: str = None, + with_predict_proba: bool = True, notebook_logs_stdout: bool = False, verbose: int = 0): self.base_model = base_model self.base_model_name = base_model_name self.bootstrap_fraction = bootstrap_fraction @@ -60,6 +60,7 @@ def __init__(self, base_model, base_model_name: str, bootstrap_fraction: float, self.n_estimators = n_estimators self.models_lst = [deepcopy(base_model) for _ in range(n_estimators)] self.random_state = random_state + self.computation_mode = computation_mode self.with_predict_proba = with_predict_proba self.models_predictions = None self.prediction_metrics = None @@ -102,7 +103,10 @@ def compute_metrics(self, save_results: bool = True, with_fit: bool = True): self.models_predictions = self.UQ_by_boostrap(boostrap_size, with_replacement=True, with_fit=with_fit) # Count metrics based on prediction proba results - y_preds, self.prediction_metrics = count_prediction_metrics(self.y_test.values, self.models_predictions, self.with_predict_proba) + y_preds, self.prediction_metrics = count_prediction_metrics(y_true=self.y_test.values, + uq_results=self.models_predictions, + computation_mode=self.computation_mode, + with_predict_proba=self.with_predict_proba) self._logger.info(f'Successfully computed predict proba metrics') if save_results: @@ -138,7 +142,7 @@ def UQ_by_boostrap(self, boostrap_size: int, with_replacement: bool, with_fit: b else: from tqdm import tqdm - cycle_range = range(self.n_estimators) if with_fit is False else \ + cycle_range = range(self.n_estimators) if with_fit is False or self.n_estimators == 1 else \ tqdm(range(self.n_estimators), desc="Classifiers testing by bootstrap", colour="blue", diff --git a/virny/analyzers/batch_overall_variance_analyzer.py b/virny/analyzers/batch_overall_variance_analyzer.py index ed01ade2..64d22b80 100644 --- a/virny/analyzers/batch_overall_variance_analyzer.py +++ b/virny/analyzers/batch_overall_variance_analyzer.py @@ -32,6 +32,8 @@ class BatchOverallVarianceAnalyzer(AbstractOverallVarianceAnalyzer): Number of estimators in ensemble to measure base_model stability random_state [Optional] Controls the randomness of the bootstrap approach for model arbitrariness evaluation + computation_mode + [Optional] A non-default mode for metrics computation. Should be included in the ComputationMode enum. with_predict_proba [Optional] A flag if model can return probabilities for its predictions. If no, only metrics based on labels (not labels and probabilities) will be computed. @@ -46,7 +48,8 @@ class BatchOverallVarianceAnalyzer(AbstractOverallVarianceAnalyzer): def __init__(self, base_model, base_model_name: str, bootstrap_fraction: float, X_train: pd.DataFrame, y_train: pd.DataFrame, X_test: pd.DataFrame, y_test: pd.DataFrame, target_column: str, dataset_name: str, n_estimators: int, random_state: int = None, - with_predict_proba: bool = True, notebook_logs_stdout: bool = False, verbose: int = 0): + computation_mode: str = None, with_predict_proba: bool = True, + notebook_logs_stdout: bool = False, verbose: int = 0): super().__init__(base_model=base_model, base_model_name=base_model_name, bootstrap_fraction=bootstrap_fraction, @@ -57,6 +60,7 @@ def __init__(self, base_model, base_model_name: str, bootstrap_fraction: float, dataset_name=dataset_name, n_estimators=n_estimators, random_state=random_state, + computation_mode=computation_mode, with_predict_proba=with_predict_proba, notebook_logs_stdout=notebook_logs_stdout, verbose=verbose) diff --git a/virny/analyzers/batch_overall_variance_analyzer_postprocessing.py b/virny/analyzers/batch_overall_variance_analyzer_postprocessing.py index 56ea0bed..146898ef 100644 --- a/virny/analyzers/batch_overall_variance_analyzer_postprocessing.py +++ b/virny/analyzers/batch_overall_variance_analyzer_postprocessing.py @@ -43,6 +43,8 @@ class BatchOverallVarianceAnalyzerPostProcessing(BatchOverallVarianceAnalyzer): Number of estimators in ensemble to measure base_model stability random_state [Optional] Controls the randomness of the bootstrap approach for model arbitrariness evaluation + computation_mode + [Optional] A non-default mode for metrics computation. Should be included in the ComputationMode enum. with_predict_proba [Optional] A flag if model can return probabilities for its predictions. If no, only metrics based on labels (not labels and probabilities) will be computed. @@ -58,7 +60,8 @@ def __init__(self, postprocessor, sensitive_attribute: str, base_model, base_model_name: str, bootstrap_fraction: float, X_train: pd.DataFrame, y_train: pd.DataFrame, X_test: pd.DataFrame, y_test: pd.DataFrame, target_column: str, dataset_name: str, n_estimators: int, random_state: int = None, - with_predict_proba: bool = True, notebook_logs_stdout: bool = False, verbose: int = 0): + computation_mode: str = None, with_predict_proba: bool = True, + notebook_logs_stdout: bool = False, verbose: int = 0): if sensitive_attribute is None: raise ValueError('Sensitive attribute for postprocessing is not defined. ' 'Please, set postprocessing_sensitive_attribute argument in the metric computation config.') @@ -74,6 +77,7 @@ def __init__(self, postprocessor, sensitive_attribute: str, dataset_name=dataset_name, n_estimators=n_estimators, random_state=random_state, + computation_mode=computation_mode, with_predict_proba=with_predict_proba, notebook_logs_stdout=notebook_logs_stdout, verbose=verbose) diff --git a/virny/analyzers/subgroup_variance_analyzer.py b/virny/analyzers/subgroup_variance_analyzer.py index dae8b81d..ccba8447 100644 --- a/virny/analyzers/subgroup_variance_analyzer.py +++ b/virny/analyzers/subgroup_variance_analyzer.py @@ -1,6 +1,6 @@ import pandas as pd -from virny.configs.constants import ModelSetting +from virny.configs.constants import ModelSetting, ComputationMode from virny.custom_classes.base_dataset import BaseFlowDataset from virny.analyzers.subgroup_variance_calculator import SubgroupVarianceCalculator from virny.analyzers.batch_overall_variance_analyzer import BatchOverallVarianceAnalyzer @@ -76,6 +76,7 @@ def __init__(self, model_setting: ModelSetting, n_estimators: int, base_model, b target_column=dataset.target, n_estimators=n_estimators, random_state=random_state, + computation_mode=computation_mode, with_predict_proba=with_predict_proba, notebook_logs_stdout=notebook_logs_stdout, verbose=verbose) @@ -91,12 +92,14 @@ def __init__(self, model_setting: ModelSetting, n_estimators: int, base_model, b target_column=dataset.target, n_estimators=n_estimators, random_state=random_state, + computation_mode=computation_mode, with_predict_proba=with_predict_proba, notebook_logs_stdout=notebook_logs_stdout, verbose=verbose) else: raise ValueError('model_setting is incorrect or not supported') + self.computation_mode = computation_mode self.dataset_name = overall_variance_analyzer.dataset_name self.n_estimators = overall_variance_analyzer.n_estimators self.base_model_name = overall_variance_analyzer.base_model_name @@ -161,11 +164,12 @@ def compute_metrics(self, save_results: bool, result_filename: str = None, model_dct['postprocessor'] = self.__overall_variance_analyzer.postprocessors_lst[model_idx] fitted_bootstrap.append(model_dct) - # Count and display fairness metrics + # Count variance metrics for subgroups self.__subgroup_variance_calculator.set_overall_variance_metrics(self.overall_variance_metrics_dct) - self.subgroup_variance_metrics_dct = self.__subgroup_variance_calculator.compute_subgroup_metrics( - y_preds, self.__overall_variance_analyzer.models_predictions, - save_results, result_filename, save_dir_path - ) + self.subgroup_variance_metrics_dct = dict() if self.computation_mode == ComputationMode.NO_BOOTSTRAP.value else \ + self.__subgroup_variance_calculator.compute_subgroup_metrics( + y_preds, self.__overall_variance_analyzer.models_predictions, + save_results, result_filename, save_dir_path + ) return y_preds, pd.DataFrame(self.subgroup_variance_metrics_dct), fitted_bootstrap diff --git a/virny/analyzers/subgroup_variance_calculator.py b/virny/analyzers/subgroup_variance_calculator.py index aa2c8af0..f91884e2 100644 --- a/virny/analyzers/subgroup_variance_calculator.py +++ b/virny/analyzers/subgroup_variance_calculator.py @@ -103,7 +103,9 @@ def _partition_and_compute_metrics_for_error_analysis(self, y_preds, models_pred return results def _compute_metrics(self, y_test: pd.DataFrame, group_models_predictions): - _, prediction_metrics = count_prediction_metrics(y_test, group_models_predictions, + _, prediction_metrics = count_prediction_metrics(y_true=y_test, + uq_results=group_models_predictions, + computation_mode=self.computation_mode, with_predict_proba=self.with_predict_proba) return prediction_metrics diff --git a/virny/configs/constants.py b/virny/configs/constants.py index f237771a..f5735920 100644 --- a/virny/configs/constants.py +++ b/virny/configs/constants.py @@ -7,6 +7,7 @@ class ModelSetting(Enum): class ComputationMode(Enum): ERROR_ANALYSIS = "error_analysis" + NO_BOOTSTRAP = "no_bootstrap" INTERSECTION_SIGN = '&' diff --git a/virny/user_interfaces/multiple_models_api.py b/virny/user_interfaces/multiple_models_api.py index df327098..2bffbf6b 100644 --- a/virny/user_interfaces/multiple_models_api.py +++ b/virny/user_interfaces/multiple_models_api.py @@ -4,7 +4,7 @@ import pandas as pd from datetime import datetime, timezone -from virny.configs.constants import ModelSetting +from virny.configs.constants import ModelSetting, ComputationMode from virny.custom_classes.base_dataset import BaseFlowDataset from virny.preprocessing.basic_preprocessing import preprocess_base_model from virny.analyzers.subgroup_variance_analyzer import SubgroupVarianceAnalyzer @@ -255,6 +255,8 @@ def compute_one_model_metrics(base_model, n_estimators: int, dataset: BaseFlowDa As for now, 0, 1, 2 levels are supported. """ + if computation_mode == ComputationMode.NO_BOOTSTRAP.value: + with_predict_proba = False model_setting = ModelSetting.BATCH if model_setting is None else ModelSetting[model_setting.upper()] test_protected_groups = create_test_protected_groups(dataset.X_test, dataset.init_sensitive_attrs_df, sensitive_attributes_dct) @@ -283,7 +285,6 @@ def compute_one_model_metrics(base_model, n_estimators: int, dataset: BaseFlowDa y_preds, variance_metrics_df, fitted_bootstrap = subgroup_variance_analyzer.compute_metrics(save_results=False, result_filename=None, save_dir_path=None) - # Compute error metrics for subgroups error_analyzer = SubgroupErrorAnalyzer(X_test=dataset.X_test, y_test=dataset.y_test, diff --git a/virny/user_interfaces/multiple_models_with_multiple_test_sets_api.py b/virny/user_interfaces/multiple_models_with_multiple_test_sets_api.py index cda7becb..c81daa42 100644 --- a/virny/user_interfaces/multiple_models_with_multiple_test_sets_api.py +++ b/virny/user_interfaces/multiple_models_with_multiple_test_sets_api.py @@ -3,7 +3,7 @@ import pandas as pd from datetime import datetime, timezone -from virny.configs.constants import ModelSetting +from virny.configs.constants import ModelSetting, ComputationMode from virny.utils.protected_groups_partitioning import create_test_protected_groups from virny.preprocessing.basic_preprocessing import preprocess_base_model from virny.custom_classes.base_dataset import BaseFlowDataset @@ -254,6 +254,8 @@ def compute_one_model_metrics_with_multiple_test_sets(base_model, n_estimators: As for now, 0, 1, 2 levels are supported. """ + if computation_mode == ComputationMode.NO_BOOTSTRAP.value: + with_predict_proba = False model_setting = ModelSetting.BATCH if model_setting is None else ModelSetting[model_setting.upper()] subgroup_variance_analyzer = SubgroupVarianceAnalyzer(model_setting=model_setting, n_estimators=n_estimators, diff --git a/virny/utils/common_helpers.py b/virny/utils/common_helpers.py index 6a3a7c2b..f1e69fe4 100644 --- a/virny/utils/common_helpers.py +++ b/virny/utils/common_helpers.py @@ -23,19 +23,42 @@ def validate_config(config_obj): Object with parameters defined in a yaml file """ + # ============================================================================================================ + # Optional parameters + # ============================================================================================================ + if config_obj.model_setting is not None \ + and not isinstance(config_obj.model_setting, str) \ + and config_obj.model_setting not in ModelSetting: + raise ValueError('model_setting must be a string that is included in the ModelSetting enum. ' + 'Refer to this function documentation for more details!') + + if config_obj.computation_mode is not None \ + and not isinstance(config_obj.computation_mode, str) \ + and config_obj.computation_mode not in ComputationMode: + raise ValueError('computation_mode must be a string that is included in the ComputationMode enum. ' + 'Refer to this function documentation for more details!') + + # ============================================================================================================ + # Arguments pre-setting depending on the configs + # ============================================================================================================ + if config_obj.computation_mode == ComputationMode.NO_BOOTSTRAP.value: + config_obj.bootstrap_fraction = 1.0 + config_obj.n_estimators = 1 + # ============================================================================================================ # Required parameters # ============================================================================================================ if not isinstance(config_obj.dataset_name, str): raise ValueError('dataset_name must be string') - if not isinstance(config_obj.bootstrap_fraction, float) \ - or config_obj.bootstrap_fraction < 0.0 \ - or config_obj.bootstrap_fraction > 1.0: - raise ValueError('bootstrap_fraction must be float in [0.0, 1.0] range') + if config_obj.computation_mode != ComputationMode.NO_BOOTSTRAP.value: + if not isinstance(config_obj.bootstrap_fraction, float) \ + or config_obj.bootstrap_fraction < 0.0 \ + or config_obj.bootstrap_fraction > 1.0: + raise ValueError('bootstrap_fraction must be float in [0.0, 1.0] range') - if not isinstance(config_obj.n_estimators, int) or config_obj.n_estimators <= 1: - raise ValueError('n_estimators must be integer greater than 1') + if not isinstance(config_obj.n_estimators, int) or config_obj.n_estimators <= 1: + raise ValueError('n_estimators must be integer greater than 1') if not isinstance(config_obj.sensitive_attributes_dct, dict): raise ValueError('sensitive_attributes_dct must be python dictionary') @@ -56,21 +79,6 @@ def validate_config(config_obj): raise ValueError('Intersectional attributes in sensitive_attributes_dct must contain ' 'single sensitive attributes that also exist in sensitive_attributes_dct') - # ============================================================================================================ - # Optional parameters - # ============================================================================================================ - if config_obj.model_setting is not None \ - and not isinstance(config_obj.model_setting, str) \ - and config_obj.model_setting not in ModelSetting: - raise ValueError('model_setting must be a string that is included in the ModelSetting enum. ' - 'Refer to this function documentation for more details!') - - if config_obj.computation_mode is not None \ - and not isinstance(config_obj.computation_mode, str) \ - and config_obj.computation_mode not in ComputationMode: - raise ValueError('computation_mode must be a string that is included in the ComputationMode enum. ' - 'Refer to this function documentation for more details!') - # ============================================================================================================ # Default parameters # ============================================================================================================ diff --git a/virny/utils/stability_utils.py b/virny/utils/stability_utils.py index 9ea34078..a3a6c105 100644 --- a/virny/utils/stability_utils.py +++ b/virny/utils/stability_utils.py @@ -2,7 +2,7 @@ import numpy as np import pandas as pd -from virny.configs.constants import ALEATORIC_UNCERTAINTY, EPISTEMIC_UNCERTAINTY, OVERALL_UNCERTAINTY +from virny.configs.constants import ALEATORIC_UNCERTAINTY, EPISTEMIC_UNCERTAINTY, OVERALL_UNCERTAINTY, ComputationMode from virny.metrics import METRIC_TO_FUNCTION, METRICS_FOR_PREDICT_PROBA, METRICS_FOR_LABELS @@ -31,7 +31,7 @@ def combine_bootstrap_predictions(bootstrap_predictions: dict, y_test_indexes: n return pd.Series(y_preds, index=y_test_indexes) -def count_prediction_metrics(y_true, uq_results, with_predict_proba: bool = True): +def count_prediction_metrics(y_true, uq_results, with_predict_proba: bool = True, computation_mode = None): """ Compute means, stds, iqr, entropy, jitter, label stability, and transform predictions to pd.Dataframe. @@ -46,6 +46,8 @@ def count_prediction_metrics(y_true, uq_results, with_predict_proba: bool = True with_predict_proba [Optional] A flag if model can return probabilities for its predictions. If no, only metrics based on labels (not labels and probabilities) will be computed. + computation_mode + [Optional] A mode for computing metrics """ if isinstance(uq_results, np.ndarray): @@ -54,27 +56,28 @@ def count_prediction_metrics(y_true, uq_results, with_predict_proba: bool = True results = pd.DataFrame(uq_results).transpose() metrics_dct = dict() - # Compute metrics for prediction probabilities - if not with_predict_proba: - uq_labels = results - else: - uq_predict_probas = results - for metric in METRICS_FOR_PREDICT_PROBA: - if metric == EPISTEMIC_UNCERTAINTY: # skip computation for a metric that is based on two other metrics - continue + if computation_mode != ComputationMode.NO_BOOTSTRAP.value: # Do not compute stability and uncertainty metrics for NO_BOOTSTRAP + # Compute metrics for prediction probabilities + if not with_predict_proba: + uq_labels = results + else: + uq_predict_probas = results + for metric in METRICS_FOR_PREDICT_PROBA: + if metric == EPISTEMIC_UNCERTAINTY: # skip computation for a metric that is based on two other metrics + continue - metrics_dct[metric] = METRIC_TO_FUNCTION[metric](y_true, uq_predict_probas) + metrics_dct[metric] = METRIC_TO_FUNCTION[metric](y_true, uq_predict_probas) - metrics_dct[EPISTEMIC_UNCERTAINTY] = metrics_dct[OVERALL_UNCERTAINTY] - metrics_dct[ALEATORIC_UNCERTAINTY] + metrics_dct[EPISTEMIC_UNCERTAINTY] = metrics_dct[OVERALL_UNCERTAINTY] - metrics_dct[ALEATORIC_UNCERTAINTY] - # Convert predict proba results of each model to correspondent labels. - # Here we use int(x<0.5) since we use predict_prob()[:, 0] to make predictions. - # Hence, if a value is, for example, 0.3 --> label == 1, 0.6 -- > label == 0 - uq_labels = (results < 0.5).astype(int) + # Convert predict proba results of each model to correspondent labels. + # Here we use int(x<0.5) since we use predict_prob()[:, 0] to make predictions. + # Hence, if a value is, for example, 0.3 --> label == 1, 0.6 -- > label == 0 + uq_labels = (results < 0.5).astype(int) - # Compute metrics for prediction labels - for metric in METRICS_FOR_LABELS: - metrics_dct[metric] = METRIC_TO_FUNCTION[metric](y_true, uq_labels) + # Compute metrics for prediction labels + for metric in METRICS_FOR_LABELS: + metrics_dct[metric] = METRIC_TO_FUNCTION[metric](y_true, uq_labels) if with_predict_proba: y_preds = np.array([int(x<0.5) for x in results.mean().values]) @@ -85,6 +88,9 @@ def count_prediction_metrics(y_true, uq_results, with_predict_proba: bool = True def generate_bootstrap(features, labels, boostrap_size, with_replacement=True, random_state=None): + if boostrap_size == features.shape[0]: + return pd.DataFrame(features), pd.DataFrame(labels) + # Create a local random state. # Note that to keep reverse compatibility we need to use different generators for different python versions # since random number generation was changed in Python 3.12 From 6fb06ba0ea6d2ae302177070cb938ccbd99dd801 Mon Sep 17 00:00:00 2001 From: Denys Herasymuk Date: Sun, 15 Dec 2024 01:52:02 +0200 Subject: [PATCH 5/9] Added none constraints for an Interactive Visualizer --- .../metrics_interactive_visualizer.py | 28 ++++++++++++------- virny/utils/data_viz_utils.py | 9 +++--- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/virny/custom_classes/metrics_interactive_visualizer.py b/virny/custom_classes/metrics_interactive_visualizer.py index f8791907..c1b6b1e1 100644 --- a/virny/custom_classes/metrics_interactive_visualizer.py +++ b/virny/custom_classes/metrics_interactive_visualizer.py @@ -180,7 +180,7 @@ def create_web_app(self, start_app=True): overall_metric_max_val1 = gr.Text(value="1.0", label="Max value", scale=1) with gr.Row(): disparity_metric1 = gr.Dropdown( - sorted(self.all_disparity_metrics), + ['None'] + sorted(self.all_disparity_metrics), value='Equalized_Odds_FPR', multiselect=False, label="Disparity Constraint (C2)", scale=2 ) @@ -188,7 +188,7 @@ def create_web_app(self, start_app=True): disparity_metric_max_val1 = gr.Text(value="1.0", label="Max value", scale=1) with gr.Row(): overall_metric2 = gr.Dropdown( - sorted(self.all_overall_metrics), + ['None'] + sorted(self.all_overall_metrics), value='Label_Stability', multiselect=False, label="Overall Constraint (C3)", scale=2 ) @@ -196,7 +196,7 @@ def create_web_app(self, start_app=True): overall_metric_max_val2 = gr.Text(value="1.0", label="Max value", scale=1) with gr.Row(): disparity_metric2 = gr.Dropdown( - sorted(self.all_disparity_metrics), + ['None'] + sorted(self.all_disparity_metrics), value='Label_Stability_Ratio', multiselect=False, label="Disparity Constraint (C4)", scale=2 ) @@ -206,7 +206,7 @@ def create_web_app(self, start_app=True): btn_view1 = gr.Button("Submit") with gr.Column(scale=3): bar_plot_for_model_selection = gr.Plot(label="Bar Chart") - df_with_models_satisfied_all_constraints = gr.DataFrame(label='Models that satisfy all 4 constraints') + df_with_models_satisfied_all_constraints = gr.DataFrame(label='Models that satisfy all constraints') btn_view1.click(self._create_bar_plot_for_model_selection, inputs=[group_name, @@ -575,13 +575,20 @@ def _create_bar_plot_for_model_selection(self, group_name, overall_metric1, over # Create individual constraints metrics_value_range_dct = dict() for constraint in [overall_constraint1, disparity_constraint1, overall_constraint2, disparity_constraint2]: - metrics_value_range_dct[constraint[0]] = [constraint[1], constraint[2]] + if constraint[0] != 'None': + metrics_value_range_dct[constraint[0]] = [constraint[1], constraint[2]] + # Create intersectional constraints - metrics_value_range_dct[f'{overall_constraint1[0]}&{disparity_constraint1[0]}'] = None - metrics_value_range_dct[f'{overall_constraint1[0]}&{overall_constraint2[0]}'] = None - metrics_value_range_dct[f'{overall_constraint1[0]}&{disparity_constraint2[0]}'] = None - metrics_value_range_dct[(f'{overall_constraint1[0]}&{disparity_constraint1[0]}' - f'&{overall_constraint2[0]}&{disparity_constraint2[0]}')] = None + for constrain_pair in [(overall_constraint1[0], disparity_constraint1[0]), + (overall_constraint1[0], overall_constraint2[0]), + (overall_constraint1[0], disparity_constraint2[0])]: + if constrain_pair[0] != 'None' and constrain_pair[1] != 'None': + metrics_value_range_dct[f'{constrain_pair[0]}&{constrain_pair[1]}'] = None + + all_constrains_str = '&'.join( + [c for c in [overall_constraint1[0], disparity_constraint1[0], overall_constraint2[0], disparity_constraint2[0]] if c != 'None'] + ) + metrics_value_range_dct[all_constrains_str] = None melted_all_subgroup_metrics_per_model_dct = dict() for model_name in self.melted_model_metrics_df['Model_Name'].unique(): @@ -597,6 +604,7 @@ def _create_bar_plot_for_model_selection(self, group_name, overall_metric1, over melted_all_group_metrics_per_model_dct, metrics_value_range_dct, group=group_name, + num_constrains=all_constrains_str.count('&') + 1, metric_name_to_alias_dct=metric_name_to_alias_dct) def _create_subgroup_model_rank_heatmap(self, model_names: list, subgroup_accuracy_metrics_lst: list, diff --git a/virny/utils/data_viz_utils.py b/virny/utils/data_viz_utils.py index 5dbae755..c6e2d990 100644 --- a/virny/utils/data_viz_utils.py +++ b/virny/utils/data_viz_utils.py @@ -360,11 +360,12 @@ def create_model_performance_summary_visualization(main_matrix, matrix_for_color def create_flexible_bar_plot_for_model_selection(all_subgroup_metrics_per_model_dct: dict, all_group_metrics_per_model_dct: dict, - metrics_value_range_dct: dict, group: str, metric_name_to_alias_dct: dict): + metrics_value_range_dct: dict, group: str, metric_name_to_alias_dct: dict, + num_constrains: int): # Compute the number of models that satisfy the conditions models_in_range_df, df_with_models_satisfied_all_constraints = ( create_models_in_range_dct(all_subgroup_metrics_per_model_dct, all_group_metrics_per_model_dct, - metrics_value_range_dct, group)) + metrics_value_range_dct, group, num_constrains=num_constrains)) def get_column_alias(metric_group): if '&' not in metric_group: @@ -492,7 +493,7 @@ def get_column_alias(metric_group): def create_models_in_range_dct(all_subgroup_metrics_per_model_dct: dict, all_group_metrics_per_model_dct: dict, - metrics_value_range_dct: dict, group: str): + metrics_value_range_dct: dict, group: str, num_constrains: int = 4): # Merge subgroup and group metrics for each model and align their columns all_metrics_for_all_models_df = pd.DataFrame() for model_name in all_subgroup_metrics_per_model_dct.keys(): @@ -562,7 +563,7 @@ def create_models_in_range_dct(all_subgroup_metrics_per_model_dct: dict, all_gro # Concatenate based on rows models_in_range_df = pd.concat([models_in_range_df, num_satisfied_models_df], ignore_index=True, sort=False) - if metric_group.count('&') == 3: + if metric_group.count('&') == num_constrains - 1: df_with_models_satisfied_all_constraints = pivoted_model_metrics_df[pd_condition][['Model_Type', 'Model_Name']] return models_in_range_df, df_with_models_satisfied_all_constraints From be390d8cc6420936d26de792dd4d2c3031707471 Mon Sep 17 00:00:00 2001 From: Denys Herasymuk Date: Sun, 15 Dec 2024 15:02:15 +0200 Subject: [PATCH 6/9] Fixed predict method in PytorchTabular --- virny/custom_classes/wrappers/pytorch_tabular_wrapper.py | 2 +- virny/user_interfaces/inference_api.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/virny/custom_classes/wrappers/pytorch_tabular_wrapper.py b/virny/custom_classes/wrappers/pytorch_tabular_wrapper.py index 9cb8f712..206c51c7 100644 --- a/virny/custom_classes/wrappers/pytorch_tabular_wrapper.py +++ b/virny/custom_classes/wrappers/pytorch_tabular_wrapper.py @@ -33,4 +33,4 @@ def predict_proba(self, X, seed: int): return self.estimator.predict(X, tta_seed=seed).values def predict(self, X, seed: int): - return self.estimator.predict(X, tta_seed=seed) + return self.estimator.predict(X, tta_seed=seed).values[:, -1] diff --git a/virny/user_interfaces/inference_api.py b/virny/user_interfaces/inference_api.py index d2a50f69..b3344292 100644 --- a/virny/user_interfaces/inference_api.py +++ b/virny/user_interfaces/inference_api.py @@ -1,6 +1,6 @@ import pandas as pd -from virny.configs.constants import ModelSetting +from virny.configs.constants import ModelSetting, ComputationMode from virny.custom_classes.base_dataset import BaseFlowDataset from virny.analyzers.subgroup_error_analyzer import SubgroupErrorAnalyzer from virny.analyzers.subgroup_variance_analyzer import SubgroupVarianceAnalyzer @@ -10,6 +10,9 @@ def compute_metrics_with_fitted_bootstrap(fitted_bootstrap: list, test_base_flow_dataset: BaseFlowDataset, config, with_predict_proba: bool = True, verbose: int = 0): model_setting = ModelSetting.BATCH + if config.computation_mode == ComputationMode.NO_BOOTSTRAP.value: + with_predict_proba = False + X_test, y_test = test_base_flow_dataset.X_test, test_base_flow_dataset.y_test test_protected_groups = create_test_protected_groups(X_test, test_base_flow_dataset.init_sensitive_attrs_df, From 7c51a4759a98c15a899cb204010342ce5778df46 Mon Sep 17 00:00:00 2001 From: Denys Herasymuk Date: Fri, 4 Apr 2025 16:13:50 +0300 Subject: [PATCH 7/9] Aligned replace function in diabetes to new pandas version --- virny/datasets/healthcare.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/virny/datasets/healthcare.py b/virny/datasets/healthcare.py index 043d68b6..ed354bdf 100644 --- a/virny/datasets/healthcare.py +++ b/virny/datasets/healthcare.py @@ -77,13 +77,15 @@ def __init__(self, subsample_size: int = None, subsample_seed: int = None, with_ # Preprocess features df = df.rename(columns={'Pregancies': 'Pregnancies'}) - df['RegularMedicine'].replace('o', 'no', inplace=True) + df['RegularMedicine'] = df['RegularMedicine'].replace('o', 'no') df['BPLevel'] = df['BPLevel'].str.strip().str.lower() + df['Pdiabetes']= df['Pdiabetes'].replace('0','no') + df['Pdiabetes']= df['Pdiabetes'].str.lower().str.strip() # Preprocess a target df['Diabetic'] = df['Diabetic'].str.strip() - df['Diabetic'].replace('no', 0, inplace=True) - df['Diabetic'].replace('yes', 1, inplace=True) + df['Diabetic'] = df['Diabetic'].replace('no', 0) + df['Diabetic'] = df['Diabetic'].replace('yes', 1) target = 'Diabetic' numerical_columns = ['BMI', 'Sleep', 'SoundSleep', 'Pregnancies'] From b6693baf11848c66186edae23e38015a9db3ba78 Mon Sep 17 00:00:00 2001 From: Denys Herasymuk Date: Mon, 7 Apr 2025 23:37:12 +0300 Subject: [PATCH 8/9] Added notebook_logs_stdout is None to disable a progress bar --- .../abstract_overall_variance_analyzer.py | 2 +- ...verall_variance_analyzer_postprocessing.py | 2 +- virny/user_interfaces/multiple_models_api.py | 20 +++++++++++-------- ...iple_models_with_multiple_test_sets_api.py | 20 +++++++++++-------- 4 files changed, 26 insertions(+), 18 deletions(-) diff --git a/virny/analyzers/abstract_overall_variance_analyzer.py b/virny/analyzers/abstract_overall_variance_analyzer.py index feae0310..6b6f18ed 100644 --- a/virny/analyzers/abstract_overall_variance_analyzer.py +++ b/virny/analyzers/abstract_overall_variance_analyzer.py @@ -142,7 +142,7 @@ def UQ_by_boostrap(self, boostrap_size: int, with_replacement: bool, with_fit: b else: from tqdm import tqdm - cycle_range = range(self.n_estimators) if with_fit is False or self.n_estimators == 1 else \ + cycle_range = range(self.n_estimators) if self._notebook_logs_stdout is None or with_fit is False or self.n_estimators == 1 else \ tqdm(range(self.n_estimators), desc="Classifiers testing by bootstrap", colour="blue", diff --git a/virny/analyzers/batch_overall_variance_analyzer_postprocessing.py b/virny/analyzers/batch_overall_variance_analyzer_postprocessing.py index 146898ef..c25776b9 100644 --- a/virny/analyzers/batch_overall_variance_analyzer_postprocessing.py +++ b/virny/analyzers/batch_overall_variance_analyzer_postprocessing.py @@ -116,7 +116,7 @@ def UQ_by_boostrap(self, boostrap_size: int, with_replacement: bool, with_fit: b else: from tqdm import tqdm - cycle_range = range(self.n_estimators) if with_fit is False else \ + cycle_range = range(self.n_estimators) if self._notebook_logs_stdout is None or with_fit is False else \ tqdm(range(self.n_estimators), desc="Classifiers testing by bootstrap", colour="blue", diff --git a/virny/user_interfaces/multiple_models_api.py b/virny/user_interfaces/multiple_models_api.py index 2bffbf6b..a9b54b39 100644 --- a/virny/user_interfaces/multiple_models_api.py +++ b/virny/user_interfaces/multiple_models_api.py @@ -138,8 +138,9 @@ def run_metrics_computation(dataset: BaseFlowDataset, bootstrap_fraction: float, False, otherwise. Note that if it is set to False, only metrics based on labels (not labels and probabilities) will be computed. Ignored when a postprocessor is not None, and set to False in this case. notebook_logs_stdout - [Optional] True, if this interface was execute in a Jupyter notebook, - False, otherwise. + [Optional] True, if to display a progress bar in a Jupyter notebook, + False, if to display a progress bar in a python module, + None, if to disable a progress bar. verbose [Optional] Level of logs printing. The greater level provides more logs. As for now, 0, 1, 2 levels are supported. @@ -151,14 +152,17 @@ def run_metrics_computation(dataset: BaseFlowDataset, bootstrap_fraction: float, else: from tqdm import tqdm + num_models = len(models_config) + cycle_range = enumerate(models_config.keys()) if notebook_logs_stdout is None else \ + tqdm(enumerate(models_config.keys()), + total=num_models, + desc="Analyze multiple models", + colour="red", + file=sys.stdout) + models_metrics_dct = dict() models_fitted_bootstraps_dct = dict() - num_models = len(models_config) - for model_idx, model_name in tqdm(enumerate(models_config.keys()), - total=num_models, - desc="Analyze multiple models", - colour="red", - file=sys.stdout): + for model_idx, model_name in cycle_range: if verbose >= 1: print('\n\n', flush=True) print('#' * 30, f' [Model {model_idx + 1} / {num_models}] Analyze {model_name} ', '#' * 30) diff --git a/virny/user_interfaces/multiple_models_with_multiple_test_sets_api.py b/virny/user_interfaces/multiple_models_with_multiple_test_sets_api.py index c81daa42..770d109c 100644 --- a/virny/user_interfaces/multiple_models_with_multiple_test_sets_api.py +++ b/virny/user_interfaces/multiple_models_with_multiple_test_sets_api.py @@ -144,8 +144,9 @@ def run_metrics_computation_with_multiple_test_sets(dataset: BaseFlowDataset, bo False, otherwise. Note that if it is set to False, only metrics based on labels (not labels and probabilities) will be computed. Ignored when a postprocessor is not None, and set to False in this case. notebook_logs_stdout - [Optional] True, if this interface was execute in a Jupyter notebook, - False, otherwise. + [Optional] True, if to display a progress bar in a Jupyter notebook, + False, if to display a progress bar in a python module, + None, if to disable a progress bar. verbose [Optional] Level of logs printing. The greater level provides more logs. As for now, 0, 1, 2 levels are supported. @@ -157,14 +158,17 @@ def run_metrics_computation_with_multiple_test_sets(dataset: BaseFlowDataset, bo else: from tqdm import tqdm + num_models = len(models_config) + cycle_range = enumerate(models_config.keys()) if notebook_logs_stdout is None else \ + tqdm(enumerate(models_config.keys()), + total=num_models, + desc="Analyze multiple models", + colour="red", + file=sys.stdout) + models_metrics_dct = dict() models_fitted_bootstraps_dct = dict() - num_models = len(models_config) - for model_idx, model_name in tqdm(enumerate(models_config.keys()), - total=num_models, - desc="Analyze multiple models", - colour="red", - file=sys.stdout): + for model_idx, model_name in cycle_range: if verbose >= 1: print('#' * 30, f' [Model {model_idx + 1} / {num_models}] Analyze {model_name} ', '#' * 30) try: From 8cfebffe598d2569eadba2f2f68cc5acfbfdf796 Mon Sep 17 00:00:00 2001 From: Denys Herasymuk Date: Tue, 5 May 2026 22:32:32 -0400 Subject: [PATCH 9/9] Removed pkg_resources to be compatible with Python 3.13+ --- setup.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index a4200a77..f688a888 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,5 @@ import os import pathlib -import pkg_resources # Always prefer setuptools over distutils from setuptools import setup, find_packages @@ -30,9 +29,9 @@ with pathlib.Path('requirements.txt').open() as requirements: base_packages = [ - str(requirement) - for requirement - in pkg_resources.parse_requirements(requirements) + line.strip() + for line in requirements + if line.strip() and not line.strip().startswith('#') ] # This call to setup() does all the work