Skip to content

Commit 43398e8

Browse files
committed
featÑ mdPatterns fct
1 parent ea12b73 commit 43398e8

2 files changed

Lines changed: 122 additions & 0 deletions

File tree

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ export(matrixDimnamesDS)
8282
export(matrixInvertDS)
8383
export(matrixMultDS)
8484
export(matrixTransposeDS)
85+
export(mdPatternDS)
8586
export(meanDS)
8687
export(meanSdGpDS)
8788
export(mergeDS)

R/mdPatternDS.R

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
#'
2+
#' @title Missing data pattern with disclosure control
3+
#' @description This function is a serverside aggregate function that computes the
4+
#' missing data pattern using mice::md.pattern and applies disclosure control to
5+
#' prevent revealing small cell counts.
6+
#' @details This function calls the mice::md.pattern function to generate a matrix
7+
#' showing the missing data patterns in the input data. To ensure disclosure control,
8+
#' any pattern counts that are below the threshold (nfilter.tab, default=3) are
9+
#' suppressed.
10+
#'
11+
#' \strong{Suppression Method:}
12+
#'
13+
#' When a pattern count is below threshold:
14+
#' - Row name is changed to "suppressed(<N>)" where N is the threshold
15+
#' - All pattern values in that row are set to NA
16+
#' - Summary row is also set to NA (prevents back-calculation)
17+
#'
18+
#' \strong{Output Matrix Structure:}
19+
#'
20+
#' - Rows represent different missing data patterns (plus a summary row at the bottom)
21+
#' - Row names contain pattern counts (or "suppressed(<N>)" for invalid patterns)
22+
#' - Columns show 1 if variable is observed, 0 if missing
23+
#' - Last column shows total number of missing values per pattern
24+
#' - Last row shows total number of missing values per variable
25+
#'
26+
#' \strong{Note for Pooling:}
27+
#'
28+
#' When this function is called from ds.mdPattern with type='combine', suppressed
29+
#' patterns are excluded from pooling to prevent disclosure through subtraction.
30+
#' This means pooled counts may underestimate the true total when patterns are
31+
#' suppressed in some studies.
32+
#'
33+
#' @param x a character string specifying the name of a data frame or matrix
34+
#' containing the data to analyze for missing patterns.
35+
#' @return A list containing:
36+
#' \item{pattern}{The missing data pattern matrix with disclosure control applied}
37+
#' \item{valid}{Logical indicating if all patterns meet disclosure requirements}
38+
#' \item{message}{A message describing the validity status}
39+
#' @author Xavier Escribà montagut for DataSHIELD Development Team
40+
#' @import mice
41+
#' @export
42+
#'
43+
mdPatternDS <- function(x){
44+
45+
#############################################################
46+
# MODULE 1: CAPTURE THE nfilter SETTINGS
47+
thr <- dsBase::listDisclosureSettingsDS()
48+
nfilter.tab <- as.numeric(thr$nfilter.tab)
49+
#############################################################
50+
51+
# Parse the input data name with error handling
52+
x.val <- tryCatch(
53+
{
54+
eval(parse(text=x), envir = parent.frame())
55+
},
56+
error = function(e) {
57+
stop(paste0("Object '", x, "' does not exist on the server"), call. = FALSE)
58+
}
59+
)
60+
61+
# Check object class
62+
typ <- class(x.val)
63+
64+
# Check that input is a data frame or matrix
65+
if(!("data.frame" %in% typ || "matrix" %in% typ)){
66+
stop(paste0("The input object must be of type 'data.frame' or 'matrix'. Current type: ",
67+
paste(typ, collapse = ", ")), call. = FALSE)
68+
}
69+
70+
# Use x.val for further processing
71+
x <- x.val
72+
73+
# Call mice::md.pattern with plot=FALSE
74+
pattern <- mice::md.pattern(x, plot = FALSE)
75+
76+
# Apply disclosure control
77+
# Pattern counts are stored in row names (except last row which is empty/summary)
78+
# The last row contains variable-level missing counts
79+
80+
validity <- "valid"
81+
n_patterns <- nrow(pattern) - 1 # exclude the summary row
82+
83+
if(n_patterns > 0){
84+
# Check pattern counts (stored in row names, excluding last row)
85+
pattern_counts <- as.numeric(rownames(pattern)[1:n_patterns])
86+
87+
# Find patterns with counts below threshold
88+
invalid_idx <- which(pattern_counts > 0 & pattern_counts < nfilter.tab)
89+
90+
if(length(invalid_idx) > 0){
91+
validity <- "invalid"
92+
93+
# For invalid patterns, suppress by:
94+
# - Setting row name to "suppressed"
95+
# - Setting all pattern values to NA
96+
rnames <- rownames(pattern)
97+
for(idx in invalid_idx){
98+
rnames[idx] <- paste0("suppressed(<", nfilter.tab, ")")
99+
pattern[idx, ] <- NA
100+
}
101+
rownames(pattern) <- rnames
102+
103+
# Also need to recalculate the last row (summary) if patterns were suppressed
104+
# Set to NA to avoid disclosures
105+
pattern[nrow(pattern), seq_len(ncol(pattern))] <- NA
106+
}
107+
}
108+
109+
# Return the pattern with validity information
110+
return(list(
111+
pattern = pattern,
112+
valid = (validity == "valid"),
113+
message = ifelse(validity == "valid",
114+
"Valid: all pattern counts meet disclosure requirements",
115+
paste0("Invalid: some pattern counts below threshold (",
116+
nfilter.tab, ") have been suppressed"))
117+
))
118+
}
119+
120+
#AGGREGATE FUNCTION
121+
# mdPatternDS

0 commit comments

Comments
 (0)