diff --git a/NAMESPACE b/NAMESPACE index e2bcfc5..f1ae3b0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,8 @@ # Generated by roxygen2: do not edit by hand +S3method(modelStudio,dalex._explainer.object.Explainer) S3method(modelStudio,explainer) +S3method(modelStudio,python.builtin.object) export(modelStudio) export(modelStudioOptions) import(progress) diff --git a/R/modelStudio.R b/R/modelStudio.R index 592f6b9..168736f 100644 --- a/R/modelStudio.R +++ b/R/modelStudio.R @@ -384,6 +384,15 @@ modelStudio.explainer <- function(explainer, model_studio } +#:# alias for reticulate pickle/dalex Explainer +#' @noRd +#' @export +modelStudio.python.builtin.object <- modelStudio.explainer + +#' @noRd +#' @export +modelStudio.dalex._explainer.object.Explainer <- modelStudio.explainer + #' @noRd #' @title remove_file_paths #' diff --git a/README.md b/README.md index 6a9a264..b4718e2 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ The main `modelStudio()` function computes various (instance and dataset level) [**explain FIFA20**](https://pbiecek.github.io/explainFIFA20/) [explain Lung Cancer](https://github.com/hbaniecki/transparent_xai/) [**R & Python examples**](http://modelstudio.drwhy.ai/articles/vignette_examples.html) -[More Resources](https://modeloriented.github.io/modelStudio/#more) +[More Resources](http://modelstudio.drwhy.ai/#more-resources) [**FAQ & Troubleshooting**](https://github.com/ModelOriented/modelStudio/issues/54)  @@ -73,7 +73,7 @@ install.packages("iBreakDown") # packages for explainer objects install.packages("DALEX") -devtools::install_github("ModelOriented/DALEXtra") +install.packages("DALEXtra") ``` ### mlr [dashboard](https://modeloriented.github.io/modelStudio/mlr.html) @@ -94,7 +94,7 @@ test <- data[-index, ] # mlr ClassifTask takes target as factor train$survived <- as.factor(train$survived) -# prepare the model +# fit a model task <- makeClassifTask(id = "titanic", data = train, target = "survived") @@ -137,7 +137,7 @@ test <- data[-index, ] train_matrix <- model.matrix(survived ~.-1, train) test_matrix <- model.matrix(survived ~.-1, test) -# prepare the model +# fit a model xgb_matrix <- xgb.DMatrix(train_matrix, label = train$survived) params <- list(eta = 0.01, subsample = 0.6, max_depth = 7, min_child_weight = 3, objective = "binary:logistic", eval_metric = "auc") @@ -161,96 +161,88 @@ modelStudio(explainer, ### scikit-learn [dashboard](https://modeloriented.github.io/modelStudio/scikit-learn.html) -Use `pickle` Python module and `reticulate` R package to easily produce modelStudio for scikit-learn model. +Use `pickle` Python module and `reticulate` R package to easily make a studio for a scikit-learn model. -In this example we fit a Pipeline MLPClassifier on the titanic data. First install the `dalex` package. +In this example we will fit a Pipeline MLPClassifier model on titanic data. + +Install the `dalex` package. ```bash pip3 install dalex --force ``` -Make an explainer object in Python: +First, use `dalex` in Python: ```python -# import modules +# load packages and data import dalex as dx -from dalex import datasets -from sklearn.neural_network import MLPClassifier -from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder -from sklearn.impute import SimpleImputer +from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline -from sklearn.tree import DecisionTreeRegressor +from sklearn.preprocessing import StandardScaler, OneHotEncoder +from sklearn.impute import SimpleImputer from sklearn.compose import ColumnTransformer +from sklearn.neural_network import MLPClassifier -# load the data -data = datasets.load_titanic() +data = dx.datasets.load_titanic() X = data.drop(columns='survived') y = data.survived -# make a pipeline model +# split the data +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1) + +# fit a pipeline model numeric_features = ['age', 'fare', 'sibsp', 'parch'] -numeric_transformer = Pipeline(steps=[ +numeric_transformer = Pipeline( + steps=[ ('imputer', SimpleImputer(strategy='median')), - ('scaler', StandardScaler())]) - + ('scaler', StandardScaler()) + ] +) categorical_features = ['gender', 'class', 'embarked'] -categorical_transformer = Pipeline(steps=[ +categorical_transformer = Pipeline( + steps=[ ('imputer', SimpleImputer(strategy='constant', fill_value='missing')), - ('onehot', OneHotEncoder(handle_unknown='ignore'))]) + ('onehot', OneHotEncoder(handle_unknown='ignore')) + ] +) preprocessor = ColumnTransformer( - transformers=[ - ('num', numeric_transformer, numeric_features), - ('cat', categorical_transformer, categorical_features)]) - - -clf = Pipeline(steps=[('preprocessor', preprocessor), - ('classifier', MLPClassifier(hidden_layer_sizes=(150,100,50), - max_iter=500, random_state=0))]) - -clf.fit(X, y) + transformers=[ + ('num', numeric_transformer, numeric_features), + ('cat', categorical_transformer, categorical_features) + ] +) + +model = Pipeline( + steps=[ + ('preprocessor', preprocessor), + ('classifier', MLPClassifier(hidden_layer_sizes=(150,100,50), max_iter=500, random_state=0)) + ] +) +model.fit(X_train, y_train) -# make an explainer -explainer = dx.Explainer(clf, X, y) +# create an explainer for the model +explainer = dx.Explainer(model, X_test, y_test, label = 'scikit-learn') -# remove these functions before dump +#! remove residual_function before dump ! explainer.residual_function = None -explainer.predict_function = None # pack the explainer into a pickle file -import pickle -pickle_out = open("explainer_titanic.pickle","wb") +import pickle +pickle_out = open("explainer_scikitlearn.pickle","wb") pickle.dump(explainer, pickle_out) -pickle_out.close() +pickle_out.close() ``` -Then use `modelStudio` in R: +Then, use `modelStudio` in R: ```r -# use reticulate to load the explainer from a pickle file +# load the explainer from the pickle file library(reticulate) -explainer <- py_load_object('explainer_titanic.pickle') - -# make a predict_function -predict_function <- function(model, data) { - if ("predict_proba" %in% names(model)) { - pred <- model$predict_proba(data) - if (ncol(pred) == 2) { - pred <- pred[,2] - } - } else { - pred <- model$predict(data) - } - pred -} - -# adjust the explainer -explainer$predict_function <- predict_function -explainer$label <- 'scikit-learn' -class(explainer) <- c(class(explainer), 'explainer') - -# make a modelStudio +explainer <- py_load_object('explainer_scikitlearn.pickle', pickle = "pickle") + +# make a studio for the model library(modelStudio) modelStudio(explainer) ``` diff --git a/inst/WORDLIST b/inst/WORDLIST index 1f89891..3401999 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -28,6 +28,7 @@ tensorflow Shapley cran CRAN -MLPCLassifier +MLPClassifier keras lightGBM +customizable diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index afdd162..c8e041b 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -3,4 +3,5 @@ template: default_assets: false params: ganalytics: UA-5650686-14 - noindex: true \ No newline at end of file + noindex: true + \ No newline at end of file diff --git a/pkgdown/favicon/h2o.html b/pkgdown/favicon/h2o.html new file mode 100644 index 0000000..396a5d2 --- /dev/null +++ b/pkgdown/favicon/h2o.html @@ -0,0 +1,1795 @@ + + +
+ + + + + + + + + + +