openpharma
diff --git a/‎.Rbuildignore‎
Lines changed: 1 addition & 0 deletions b/‎.Rbuildignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎DESCRIPTION‎
Lines changed: 3 additions & 3 deletions b/‎DESCRIPTION‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎NAMESPACE‎
Lines changed: 21 additions & 0 deletions b/‎NAMESPACE‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎R/globals.R‎
Lines changed: 6 additions & 0 deletions b/‎R/globals.R‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎R/imports.R‎
Lines changed: 4 additions & 0 deletions b/‎R/imports.R‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎R/internal.R‎
Lines changed: 5 additions & 7 deletions b/‎R/internal.R‎
Lines changed: 5 additions & 7 deletions
diff --git a/‎R/knockoff_filters.R‎
Lines changed: 1 addition & 2 deletions b/‎R/knockoff_filters.R‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎R/performance.R‎
Lines changed: 1 addition & 1 deletion b/‎R/performance.R‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/plot.R‎
Lines changed: 17 additions & 11 deletions b/‎R/plot.R‎
Lines changed: 17 additions & 11 deletions
diff --git a/‎R/simdata-data.R‎
Lines changed: 49 additions & 0 deletions b/‎R/simdata-data.R‎
Lines changed: 49 additions & 0 deletions
@@ -5,3 +5,4 @@
 ^knockofftools\.Rproj$
 ^LICENSE$
 ^README\.md$
+^\.Rproj\.user$
@@ -26,10 +26,10 @@ Imports:
     survival (>= 2.44.1.1),
     randomForestSRC (>= 2.9.1),
     grf (>= 2.3.1),
-    glasso (>= 1.11),
     CVglasso (>= 1.0),
-    nnet (>= 7.3-18)
-RoxygenNote: 7.2.3
+    nnet (>= 7.3-18),
+    magrittr (>= 1.5)
+RoxygenNote: 7.3.3
 Suggests: 
     knitr,
     rmarkdown,
 
@@ -35,3 +35,24 @@ export(stat_predictive_causal_forest)
 export(stat_predictive_glmnet)
 export(stat_random_forest)
 export(variable.selections)
+importFrom(magrittr,"%>%")
+importFrom(stats,as.formula)
+importFrom(stats,coef)
+importFrom(stats,contrasts)
+importFrom(stats,cutree)
+importFrom(stats,dist)
+importFrom(stats,dnbinom)
+importFrom(stats,hclust)
+importFrom(stats,ks.test)
+importFrom(stats,lm)
+importFrom(stats,model.matrix)
+importFrom(stats,pnbinom)
+importFrom(stats,predict)
+importFrom(stats,qqnorm)
+importFrom(stats,rbinom)
+importFrom(stats,rexp)
+importFrom(stats,rmultinom)
+importFrom(stats,rnorm)
+importFrom(stats,runif)
+importFrom(stats,sd)
+importFrom(stats,toeplitz)
@@ -0,0 +1,6 @@
+utils::globalVariables(c(
+  "selected",
+  "variable",
+  "draw",
+  "drawclass"
+))
@@ -0,0 +1,4 @@
+#' @keywords internal
+#' @importFrom magrittr %>%
+#' @importFrom stats as.formula coef contrasts cutree dist dnbinom hclust ks.test lm model.matrix pnbinom predict qqnorm rbinom rexp rmultinom rnorm runif sd toeplitz
+NULL
@@ -133,7 +133,7 @@ find_single_optimal_variable_set <- function(S, p, trim=0.5) {
 #' Do not call this function on its own. Fits cross-validated glmnet model with fixed effect.
 #'
 #'
-#' @param X.fixed a data.frame (or tibble) with "numeric" and "factor" columns corresponding to covariates or terms that should be treated as fixed effects in the model.
+#' @param X_fixed a data.frame (or tibble) with "numeric" and "factor" columns corresponding to covariates or terms that should be treated as fixed effects in the model.
 #' @param X original data.frame (or tibble) with "numeric" and "factor" columns only. The number of columns, ncol(X) needs to be > 2.
 #' @param y response vector with \code{length(y) = nrow(X)}. Accepts "numeric" (family="gaussian") or binary "factor" (family="binomial"). Can also be a survival object of class Surv
 #' as obtained from y = survival::Surv(time, status).
@@ -158,7 +158,7 @@ cv_coeffs_glmnet_with_fixed_effect <- function(X_fixed, X, y, family, nlambda=50
     X = scale(X)
   }
 
-  if (!methods::hasArg(lambda) ) {
+  if (!hasArg(lambda) ) {
     if( identical(family, "gaussian") ) {
       if(!is.numeric(y)) {
         stop('Input y must be numeric.')
@@ -195,13 +195,12 @@ cv_coeffs_glmnet_with_fixed_effect <- function(X_fixed, X, y, family, nlambda=50
 #' @param y response vector with \code{length(y) = nrow(X)}. Accepts "numeric" (family="gaussian") or binary "factor" (family="binomial"). Can also be a survival object of class Surv
 #' as obtained from y = survival::Surv(time, status).
 #' @param type should be "regression" if y is numeric, "classification" if y is a binary factor variable or "survival" if y is a survival object.
-#' @param ...
 #'
 #' @return importance scores
 #' @export
 #'
 #' @keywords internal
-random_forest_importance_scores <- function(X, y, trt, type = "regression", ...){
+random_forest_importance_scores <- function(X, y, trt, type = "regression"){
   # make the column names unique
   colnames(X) = make.unique(colnames(X))
 
@@ -323,16 +322,15 @@ ns.transform <- function(y) {
 
 }
 
-#' Heuristic check for whether a variable can be reasonably treated as continuous
+#' Heuristic check for whether numeric variables can be reasonably treated as continuous
 #'
-#' @param x a numeric variable vector
+#' @param X the design matrix of interest with columns either "numeric" or "factor"
 #'
 #' @return a logical TRUE or FALSE depending on whether n_distinct(x) > 30
 #' @export
 #'
 #' @keywords internal
 check_if_continuous <- function(X) {
-  `%>%` <- magrittr::`%>%`
   X_numeric <- dplyr::select_if(X, is.numeric)
   is.continuous <- sum(X_numeric %>% lapply(dplyr::n_distinct) %>% unlist() <= 30) > 0
   if (is.continuous) warning("Some of the numeric columns of X have suspiciously few distinct values: n_distinct <= 30. Those columns should perhaps not be treated as continuous variables. Please review carefully and read the documentation about the gcm parameter of the knockoff.statistics function.")
 
@@ -157,7 +157,6 @@ knockoff.statistics <- function(y, X, type="regression",
 #' @param y response vector with \code{length(y) = nrow(X)}. Accepts "numeric", binary "factor", or survival ("Surv") object.
 #' @param X data.frame (or tibble) with "numeric" and "factor" columns only. The number of columns, ncol(X) needs to be > 2.
 #' @param type should be "regression" if y is numeric, "classification" if y is a binary factor variable or "survival" if y is a survival object.
-#' @param M the number of independent knockoff feature statistics that should be calculated.
 #' @param knockoff.method what type of knockoffs to calculate. Defaults to sequential knockoffs, knockoff.method="seq", but other options are "sparseseq" and "mx". The "mx" option only works if all columns of X are continuous.
 #' @param statistic knockoff feature statistic function, defaults to glmnet coefficient difference (statistic="stat_glmnet"; see ?stat_glmnet). Other options include statistic="stat_random_forest" (see ?stat_random_forest), statistic="stat_predictive_glmnet" (see ?stat_predictive_glmnet) or statistic="stat_predictive_causal_forest" (see ?stat_predictive_causal_forest).
 #' @param trt binary treatment (factor) variable required if statistic involves a predictive knockoff filter (i.e. if statistic="stat_predictive_glmnet" or statistic="stat_predictive_causal_forest")
@@ -315,7 +314,7 @@ stat_glmnet <- function(y, X, X_k, type = "regression", X.fixed=NULL, penalty.fi
 #' @param y response vector with \code{length(y) = nrow(X)}. Accepts "numeric" (family="gaussian") or binary "factor" (family="binomial"). Can also be a survival object of class "Surv" (type="survival")
 #' as obtained from y = survival::Surv(time, status).
 #' @param type should be "regression" if y is numeric, "classification" if y is a binary factor variable or "survival" if y is a survival object.
-#' @param ...
+#' @param ... other parameters passed to \code{random_forest_importance_scores}.
 #'
 #' @return data.frame with knockoff statistics W as column. The number of rows matches the number of columns (variables) of the data.frame X and the variable names are recorded in rownames(W).
 #' @export
 
@@ -21,7 +21,7 @@ eval_fdp <- function(selected, negatives) {
 #' True positive proportion (tpp) as function of selection and known positives:
 #'
 #' @param selected vector of indices of selected variables
-#' @param negatives vector of indices of known non-null variables (that influence response)
+#' @param positives vector of indices of known non-null variables (that influence response)
 #'
 #' @return true positive rate
 #' @export
 
@@ -1,17 +1,25 @@
+
 #' Heatmap of multiple variable selections ordered by importance
 #'
-#' @param S data.frame of variable selections from multiple knockoffs (each entry is either 1 if variable is selected and 0 otherwise). Columns correspond to different knockoffs and rows correspond to the underlying variables. row.names(S) records the variable names.
+#' @param x data.frame of variable selections from multiple knockoffs
+#' (each entry is either 1 if variable is selected and 0 otherwise).
+#' Columns correspond to different knockoffs and rows correspond to the
+#' underlying variables. row.names(x) records the variable names.
+#'
+#' @param ... Additional arguments passed to other plot methods (currently ignored).
+#'
 #' @param nbcocluster bivariate vector c(number of variable clusters, number of selection clusters).
-#' The former number must be specified less than nrow(S) and the latter must be less than ncol(S).
+#' The former number must be specified less than nrow(x) and the latter must be less than ncol(x).
 #'
 #' @details To help visualize most important variables we perform clustering both selections and variables.
 #'
 #' @return plot of heatmap
+#'
+#' @method plot variable.selections
 #' @export
 #'
 #' @examples
 #' library(knockofftools)
-#'
 #' set.seed(1)
 #'
 #' # Simulate 8 Gaussian covariate predictors and 2 binary factors:
@@ -20,19 +28,19 @@
 #' # create linear predictor with first 5 beta-coefficients = 1 (all other zero)
 #' lp <- generate_lp(X, p_nn = 5, a=1)
 #'
-#' # Gaussian
-#'
-#' # Simulate response from a linear model y = lp + epsilon, where epsilon ~ N(0,1):
+#' # Simulate response:
 #' y <- lp + rnorm(100)
 #'
-#' # Calculate M independent knockoff feature statistics:
+#' # Calculate knockoff statistics:
 #' W <- knockoff.statistics(y=y, X=X, type="regression", M=5)
 #'
-#' S = variable.selections(W, error.type = "pfer", level = 1)
+#' S <- variable.selections(W, error.type = "pfer", level = 1)
 #'
 #' # plot heatmap of knockoff selections:
 #' plot(S)
-plot.variable.selections <- function(S, nbcocluster=c(7,7)) {
+plot.variable.selections <- function(x, ..., nbcocluster=c(7,7)) {
+
+  S <- x
 
   if (class(S)[1]!="variable.selections") {
     stop("Input S must be of class \'variable.selections\'. Please see ?variable.selections.")
@@ -49,8 +57,6 @@ plot.variable.selections <- function(S, nbcocluster=c(7,7)) {
                            variable = factor(rownames(S)),
                            selected = as.numeric(as.matrix(S)))
 
-  `%>%` <- dplyr::`%>%`
-
   sel.mat <- matrix(selections$selected,nrow=nrow(S))
   hclust.row <- hclust(dist(sel.mat, method="binary"), method="ward.D")
   hclust.col <- hclust(dist(t(sel.mat), method="binary"), method="ward.D")
 
@@ -0,0 +1,49 @@
+#' Simulated dataset for knockofftools
+#'
+#' @description
+#' A synthetic dataset generated by the function \code{generate_simdata()}.
+#' It contains simulated Gaussian, binary, and survival outcomes along with covariates.
+#'
+#' @format A data frame with 2000 rows and 33 variables:
+#' \describe{
+#'   \item{Yg}{Continuous outcome}
+#'   \item{Yb}{Binary outcome}
+#'   \item{Tc}{Treatment indicator}
+#'   \item{X1}{Covariate 1}
+#'   \item{X2}{Covariate 2}
+#'   \item{X3}{Covariate 3}
+#'   \item{X4}{Covariate 4}
+#'   \item{X5}{Covariate 5}
+#'   \item{X6}{Covariate 6}
+#'   \item{X7}{Covariate 7}
+#'   \item{X8}{Covariate 8}
+#'   \item{X9}{Covariate 9}
+#'   \item{X10}{Covariate 10}
+#'   \item{X11}{Covariate 11}
+#'   \item{X12}{Covariate 12}
+#'   \item{X13}{Covariate 13}
+#'   \item{X14}{Covariate 14}
+#'   \item{X15}{Covariate 15}
+#'   \item{X16}{Covariate 16}
+#'   \item{X17}{Covariate 17}
+#'   \item{X18}{Covariate 18}
+#'   \item{X19}{Covariate 19}
+#'   \item{X20}{Covariate 20}
+#'   \item{X21}{Covariate 21}
+#'   \item{X22}{Covariate 22}
+#'   \item{X23}{Covariate 23}
+#'   \item{X24}{Covariate 24}
+#'   \item{X25}{Covariate 25}
+#'   \item{X26}{Covariate 26}
+#'   \item{X27}{Covariate 27}
+#'   \item{X28}{Covariate 28}
+#'   \item{X29}{Covariate 29}
+#'   \item{X30}{Covariate 30}
+#' }
+#'
+#' @source Simulated using \code{generate_simdata()}
+#'
+#' @examples
+#' data(simdata)
+#' head(simdata)
+"simdata"