-
Notifications
You must be signed in to change notification settings - Fork 29
Expand file tree
/
Copy pathsubsetDS.R
More file actions
134 lines (122 loc) · 5.08 KB
/
subsetDS.R
File metadata and controls
134 lines (122 loc) · 5.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#'
#' @title Generates a valid subset of a table or a vector
#' @description The function uses the R classical subsetting with squared brackets '[]' and allows also to
#' subset using a logical operator and a threshold. The object to subset from must be a vector (factor, numeric
#' or character) or a table (data.frame or matrix).
#' @details If the input data is a table: The user specifies the rows and/or columns to include in the subset if the input
#' object is a table; the columns can be referred to by their names. The name of a vector (i.e. a variable) can also be provided
#' with a logical operator and a threshold (see example 3).
#' If the input data is a vector: when the parameters 'rows', 'logical' and 'threshold' are all provided the last two are ignored (
#' 'rows' has precedence over the other two parameters then).
#' If the requested subset is not valid (i.e. contains less than the allowed
#' number of observations), the subset is not generated, rather a table or a vector of missing values is generated to allow
#' for any subsequent process using the output of the function to proceed after informing the user via a message.
#' @param dt a string character, the name of the dataframe or the factor vector and the range of the subset.
#' @param complt a boolean that tells if the subset to subset should include only complete cases
#' @param rs a vector of two integers that give the range of rows de extract.
#' @param cs a vector of two integers or one or more characters; the indices of the columns to extract or the names of the columns (i.e.
#' names of the variables to extract).
#' @param lg a character, the logical parameter to use if the user wishes to subset a vector using a logical
#' operator. This parameter is ignored if the input data is not a vector.
#' @param th a numeric, the threshold to use in conjunction with the logical parameter. This parameter is ignored
#' if the input data is not a vector.
#' @param varname a character, if the input data is a table, if this parameter is provided along with the 'logical' and 'threshold'
#' parameters, a subtable is based the threshold applied to the specified variable. This parameter is however ignored if the parameter
#' 'rows' and/or 'cols' are provided.
#' @return a subset of the vector, matrix or dataframe as specified is stored on the server side
#' @author Gaye, A.
#' @export
#'
subsetDS <- function(dt=NULL, complt=NULL, rs=NULL, cs=NULL, lg=NULL, th=NULL, varname=NULL){
# Check Permissive Privacy Control Level.
dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana', 'carrot'))
# this filter sets the minimum number of observations that are allowed
#############################################################
# MODULE 1: CAPTURE THE nfilter SETTINGS
thr <- dsBase::listDisclosureSettingsDS()
nfilter.tab <- as.numeric(thr$nfilter.tab)
#nfilter.glm <- as.numeric(thr$nfilter.glm)
#nfilter.subset <- as.numeric(thr$nfilter.subset)
#nfilter.string <- as.numeric(thr$nfilter.string)
#############################################################
# the logical operators are given as integers change them into characters
if(!(is.null(lg))){
if(lg == 1){lg <- ">"}
if(lg == 2){lg <- ">="}
if(lg == 3){lg <- "<"}
if(lg == 4){lg <- "<="}
if(lg == 5){lg <- "=="}
if(lg == 6){lg <- "!="}
}
# evaluate the input data object
D <- eval(parse(text=dt), envir = parent.frame())
# if 'complt' is set to TRUE, get continue with a dataset with complete cases only
if(complt){
cc <- stats::complete.cases(D)
xx <- which(cc == TRUE)
Dtemp <- D
if(is.vector(D) | is.factor(D)){
D <- Dtemp[xx]
}else{
D <- Dtemp[xx,]
}
}
# carry out the subsetting
if(is.vector(D) | is.factor(D)){ # if the input data is a vector
if(is.null(rs)){
if(is.null(lg) | is.null(th)){
subvect <- D
}else{
exprs1 <- paste0("D[which(D", lg, th, ")]")
subvect <- eval(parse(text=exprs1))
}
}else{
subvect <- D[rs]
}
if(length(subvect) < nfilter.tab){
if(length(subvect) == 0){
output <- D[-c(1:length(D))]
}else{
temp1 <- subvect
temp1[1:length(temp1)] <- NA
output <- temp1
}
}else{
output <- subvect
}
}else{ # if the input data is a table
if(!(is.null(rs)) | !(is.null(cs))){
if(!(is.null(rs)) & !(is.null(cs))){
subtable <- D[rs, cs]
}else{
if(is.null(cs)){
cs <- c(1:dim(D)[2])
}
if(is.null(rs)){
rs <- c(1:dim(D)[1])
}
subtable <- D[rs,cs]
}
}else{
if(is.null(varname)){
subtable <- D
}else{
idx <- which(colnames(D) == varname)
exprs2 <- paste0('D[which(D[,',idx,']', lg, th, '),]')
subtable <- eval(parse(text=exprs2))
}
}
if((dim(subtable)[1]) < nfilter.tab){
if((dim(subtable)[1]) == 0){
output <- D[-c(1:dim(D)[1]),]
}else{
subD <- subtable
subD[] <- NA
output <- subD
}
}else{
output <- subtable
}
}
return(output)
}