-
Notifications
You must be signed in to change notification settings - Fork 29
Expand file tree
/
Copy pathrecodeValuesDS.R
More file actions
154 lines (134 loc) · 7.82 KB
/
Copy pathrecodeValuesDS.R
File metadata and controls
154 lines (134 loc) · 7.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#'
#' @title recodeValuesDS an assign function called by ds.recodeValues
#' @description This function recodes specified values of elements in a vector into
#' a matched set of alternative specified values.
#' @details For all details see the help header for ds.recodeValues
#' @param var.name.text a character string providing the name for the vector representing the
#' variable to be recoded. <var.name.text> argument generated and passed directly to
#' recodeValuesDS by ds.recodeValues
#' @param values2replace.text a character string specifying the values in the
#' vector specified by the argument <var.name.text> that are to be replaced by new
#' values as specified in the new.values.vector. The <values2replace.text> argument
#' is generated and passed directly to recodeValuesDS by ds.recodeValues. In effect, the
#' <values2replace.vector> argument of the ds.recodeValues function is converted
#' to a character string format that is acceptable to the DataSHIELD R parser in the data repository
#' and so can be accepted by recodeValuesDS
#' @param new.values.text a character string specifying the new values to which
#' the specified values in the vector <var.name> are to be converted.
#' The <new.values.text> argument is generated and passed directly to recodeValuesDS
#' by ds.recodeValues. In effect, the <new.values.vector> argument of the
#' ds.recodeValues function is converted to a character string format that is
#' acceptable to the DataSHIELD R parser in the data repository
#' and so can be used in the call to recodeValuesDS.
#' @param missing if supplied, any missing values in the variable referred to by var.name.text
#' will be replaced by this value.
#' @return the object specified by the <newobj> argument (or default name '<var.name>_recoded')
#' initially specified in calling ds.recodeValues. The output object (the required
#' recoded variable called <newobj> is written to the serverside.
#' @author Paul Burton, Demetris Avraam for DataSHIELD Development Team
#' @export
#'
recodeValuesDS <- function(var.name.text=NULL, values2replace.text=NULL, new.values.text=NULL, missing=NULL){
# Check Permissive Privacy Control Level.
dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana', 'carrot'))
#############################################################
#MODULE 1: CAPTURE THE used nfilter SETTINGS
thr <- dsBase::listDisclosureSettingsDS()
nfilter.subset <- as.numeric(thr$nfilter.subset)
nfilter.stringShort <- as.numeric(thr$nfilter.stringShort)
#############################################################
# DISCLOSURE TRAPS
var.name.text.chars <- strsplit(var.name.text, split="")
if(length(var.name.text.chars[[1]]) > nfilter.stringShort){
studysideMessage <- "Error: var.name.text argument too long (see nfilter.stringShort)"
stop(studysideMessage, call. = FALSE)
}
values2replace.text.chars <- strsplit(values2replace.text, split="")
if(length(values2replace.text.chars[[1]]) > nfilter.stringShort){
studysideMessage <- "Error: values2replace.text argument too long (see nfilter.stringShort)"
stop(studysideMessage, call. = FALSE)
}
new.values.text.chars <- strsplit(new.values.text, split="")
if(length(new.values.text.chars[[1]]) > nfilter.stringShort){
studysideMessage <- "Error: new.values.text argument too long (see nfilter.stringShort)"
stop(studysideMessage, call. = FALSE)
}
var2recode <- eval(parse(text=var.name.text), envir = parent.frame())
values2replace <- unlist(strsplit(values2replace.text, split=","))
new.values <- unlist(strsplit(new.values.text, split=","))
if(!is.null(missing)){missing <- as.numeric(missing)}
# get the class of the input variable
var.class <- class(var2recode)
# if the class of the input variable is not factor, numeric, character or integer then
# stop and return an error message
if (!(var.class %in% c('factor', 'character', 'numeric', 'integer'))){
studysideMessage <- "Error: The variable to recode must be either a factor, a character, a numeric or an integer"
stop(studysideMessage, call. = FALSE)
}
# recode using the recode function from the dplyr package
if (var.class == 'factor'){
expr <- as.list(new.values)
names(expr) <- values2replace
var.recoded <- dplyr::recode_factor(var2recode, !!!(expr))
if (!is.null(missing)){
var.recoded.tmp <- var.recoded
var.recoded <- addNA(var.recoded.tmp)
levels(var.recoded) <- c(levels(var.recoded.tmp), missing)
}
}
if (var.class == 'character'){
expr <- as.list(new.values)
names(expr) <- values2replace
var.recoded <- dplyr::recode(var2recode, !!!(expr), .missing=if(is.null(missing)){NULL}else{paste0("'", missing, "'")})
}
if (var.class == 'numeric'){
expr <- as.list(as.numeric(new.values))
names(expr) <- values2replace
var.recoded <- dplyr::recode(var2recode, !!!(expr), .missing=missing)
}
if (var.class == 'integer'){
expr <- as.list(as.numeric(new.values))
names(expr) <- values2replace
var2recode.n <- as.numeric(var2recode)
var.recoded <- dplyr::recode(var2recode.n, !!!(expr), .missing=missing)
var.recoded <- as.integer(var.recoded)
}
# DISCLOSURE TRAP ON LENGTH OF NA AND non-NA ELEMENTS OF ORIGINAL AND RECODED VECTORS
mark.original <- stats::complete.cases(var2recode)
non.NA.original.vector <- var2recode[mark.original]
non.NA.length.original <- length(non.NA.original.vector)
mark.recoded <- stats::complete.cases(var.recoded)
non.NA.recoded.vector <- var.recoded[mark.recoded]
non.NA.length.recoded <- length(non.NA.recoded.vector)
difference.non.NA.lengths <- abs(non.NA.length.recoded-non.NA.length.original)
# Non-NA SUBSET OF RECODED VARIABLE SMALLER THAN MINIMUM SUBSET SIZE - BLOCK CREATION OF RECODED VECTOR
# AND RETURN MESSAGE
if(non.NA.length.recoded < nfilter.subset){
studysideMessage <- "Error: number of non-NA elements of recoded vector < minimum subset size"
stop(studysideMessage, call. = FALSE)
}
########################################################################
##########MODULE WARNING OF POTENTIAL DIFFERENCE ATTACK ################
########################################################################
if((difference.non.NA.lengths < nfilter.subset) && (difference.non.NA.lengths > 0)){
studysideWarning1 <- "Warning: DataSHIELD monitors every session for potentially disclosive analytic requests. "
studysideWarning2 <- "The analysis you just submitted has generated a recoded variable in which the number of non-missing "
studysideWarning3 <- "elements differs - but only very slightly - from the original variable. This is most likely to be "
studysideWarning4 <- "an innocent consequence of your recoding needs. However, it could in theory be one step "
studysideWarning5 <- "in a difference-based attack aimed at identifying individuals. This analytic request has "
studysideWarning6 <- "therefore been highlighted in the session log file. Please be reassured, if you do not try "
studysideWarning7 <- "to identify individuals this will cause you no difficulty. However, if you do plan a "
studysideWarning8 <- "malicious attempt to identify individuals by differencing, this will become obvious in the "
studysideWarning9 <- "session log and you will be sanctioned. Possible consequences include loss of future access "
studysideWarning10 <- "to DataSHIELD and/or legal penalties."
return.message <- list(studysideWarning1, studysideWarning2, studysideWarning3, studysideWarning4,
studysideWarning5, studysideWarning6, studysideWarning7, studysideWarning8,
studysideWarning9, studysideWarning10)
warning(return.message, call. = FALSE)
}
# Convert characters "NA" to values NA if any
var.recoded[which(var.recoded == "NA")] <- NA
return(var.recoded)
}
# ASSIGN FUNCTION
# recodeValuesDS