@@ -306,8 +306,18 @@ measure_riskWORK <- function(data, keyVars, w=NULL, missing=-999, hid=NULL, max_
306306# ' @param missing a integer value to be used as missing value in the C++ routine
307307# ' @param ldiv_index indices (or names) of the variables used for l-diversity
308308# ' @param l_recurs_c l-Diversity Constant
309- ldiversity <- function (obj , ldiv_index = NULL , l_recurs_c = 2 , missing = - 999 , ... ) {
310- ldiversityX(obj = obj , ldiv_index = ldiv_index , l_recurs_c = l_recurs_c , missing = missing , ... )
309+ ldiversity <- function (obj ,
310+ ldiv_index = NULL ,
311+ l_recurs_c = 2 ,
312+ missing = - 999 ,
313+ ... ) {
314+ ldiversityX(
315+ obj = obj ,
316+ ldiv_index = ldiv_index ,
317+ l_recurs_c = l_recurs_c ,
318+ missing = missing ,
319+ ...
320+ )
311321}
312322
313323setGeneric ("ldiversityX ", function(obj, ldiv_index=NULL, l_recurs_c=2, missing=-999, ...) {
@@ -321,23 +331,27 @@ definition=function(obj, ldiv_index=NULL, l_recurs_c=2, missing=-999) {
321331 n <- obj @ manipNumVars
322332 s <- obj @ manipStrataVar
323333 ldiv_index <- ldiv_index
324- if ( is.null(ldiv_index ) ) {
334+ if (is.null(ldiv_index )) {
325335 sensVar <- get.sdcMicroObj(obj , " sensibleVar" )
326- if ( is.null(sensVar ) ) {
336+ if (is.null(sensVar )) {
327337 err <- paste0(" You need to specify argument 'sensibleVar' in 'createSdcObj()'" )
328- err <- paste0(err , " or specify it directly (argument 'ldiv_index') so that the" )
338+ err <- paste0(err ,
339+ " or specify it directly (argument 'ldiv_index') so that the" )
329340 err <- paste0(err , " ldiversity risk-measure can be calculated!\n " )
330341 stop(err )
331342 } else {
332343 ldiv_index <- sensVar
333344 }
334345 }
335- if (! is.null(k ))
346+ if (! is.null(k )) {
336347 o [, colnames(k )] <- k
337- if (! is.null(n ))
348+ }
349+ if (! is.null(n )) {
338350 o [, colnames(n )] <- n
339- if (! is.null(s ))
351+ }
352+ if (! is.null(s )) {
340353 o $ sdcGUI_strataVar <- s
354+ }
341355 kV <- colnames(obj @ origData )[get.sdcMicroObj(obj , " keyVars" )]
342356 obj @ risk $ ldiversity <- ldiversityWORK(
343357 data = o ,
@@ -371,48 +385,83 @@ ldiversityWORK <- function(data, keyVars, ldiv_index, missing=-999, l_recurs_c=2
371385 stop(" Please define valid key variables" , call. = FALSE )
372386 }
373387 }
388+
389+ # Index of sensitive variable(s)
374390 if (! is.null(ldiv_index )) {
375391 if (is.numeric(ldiv_index )) {
376392 ldiv_var <- colnames(data )[ldiv_index ]
377- ldiv_index <- length(variables ) + 1 : length(ldiv_index )
378393 } else if (is.character(ldiv_index )) {
379394 ldiv_var <- ldiv_index
380- ldiv_index <- length(variables ) + 1 : length(ldiv_index )
381395 }
382- if (any(ldiv_var %in% variables ))
396+
397+ # Calculate the 1-based index for the C++ matrix (KeyVars + SensVars)
398+ ldiv_index_cpp <- length(variables ) + 1 : length(ldiv_index )
399+
400+ if (any(ldiv_var %in% variables )) {
383401 stop(" Sensitivity variable should not be a keyVariable" )
384- } else ldiv_var <- character (0 )
402+ }
403+ } else {
404+ ldiv_var <- character (0 )
405+ ldiv_index_cpp <- - 99
406+ }
385407
408+ # Prep data (factors/strings -> numeric)
386409 n_key_vars <- length(variables )
387410 dataX <- data [, c(variables , ldiv_var ), drop = FALSE ]
388411 for (i in 1 : ncol(dataX )) {
389- if (! is.numeric(dataX [, i ]))
390- dataX [, i ] <- as.numeric(unlist(dataX [, i ]))
412+ if (! is.numeric(dataX [, i ])) {
413+ dataX [, i ] <- as.numeric(as.factor(dataX [, i ]))
414+ }
391415 }
392416 dataX <- as.matrix(dataX )
393- ind <- do.call(order , data.frame (dataX ))
394- dataX <- dataX [ind , , drop = FALSE ]
395- ind <- order(c(1 : nrow(dataX ))[ind ])
396- if (is.null(ldiv_index ))
397- ldiv_index = - 99
398- if (length(ldiv_index ) > 5 )
417+
418+ # Order data for C++ Function
419+ # Matrix is ordered in a way so that NAs are grouped together for the C++ group-matching
420+ # na.last = TRUE ensures that NAs appear at the end of their respective groups
421+ ind <- do.call(order , c(as.data.frame(dataX ), list (na.last = TRUE )))
422+ dataX_sorted <- dataX [ind , , drop = FALSE ]
423+
424+ # We need an index to be able to restore original order after
425+ # calling the c++ function
426+ back_ind <- order(ind )
427+
428+ # Call C++ function
429+ if (length(ldiv_index_cpp ) > 5 ) {
399430 stop(" Maximal number of sensitivity variables is 5" )
400- res <- measure_risk_cpp(dataX , 0 , n_key_vars , l_recurs_c , ldiv_index , missing )
401- res $ Fk <- res $ Res [, 3 ]
402- res $ Res <- res $ Res [ind , ]
403- if (all(ldiv_index != - 99 )) {
404- res $ Mat_Risk <- res $ Mat_Risk [ind , ]
405- names(res )[names(res ) == " Mat_Risk" ] <- " ldiversity"
406- colnames(res $ ldiversity ) <- c(paste(rep(ldiv_var , each = 3 ), rep(c(" Distinct_Ldiversity" ,
407- " Entropy_Ldiversity" , " Recursive_Ldiversity" ), length(ldiv_index )), sep = " _" ),
408- " MultiEntropy_Ldiversity" , " MultiRecursive_Ldiversity" )
431+ }
432+
433+ res <- measure_risk_cpp(
434+ data = dataX_sorted ,
435+ weighted_R = 0 ,
436+ n_key_vars_R = n_key_vars ,
437+ l_recurs_c_R = l_recurs_c ,
438+ ldiv_index_R = ldiv_index_cpp ,
439+ missing_value_R = missing
440+ )
441+
442+ # Re-order results back to original order
443+ res $ Fk <- res $ Res [back_ind , 3 ]
444+
445+ if (all(ldiv_index_cpp != - 99 )) {
446+ # Reorder the risk matrix to match original data input
447+ ldiv_mat <- res $ Mat_Risk [back_ind , , drop = FALSE ]
448+
449+ # Specifiy column names
450+ col_names <- c(paste(rep(ldiv_var , each = 3 ), rep(
451+ c(
452+ " Distinct_Ldiversity" ,
453+ " Entropy_Ldiversity" ,
454+ " Recursive_Ldiversity"
455+ ), length(ldiv_var )), sep = " _" ),
456+ " MultiEntropy_Ldiversity" ,
457+ " MultiRecursive_Ldiversity" )
458+ colnames(ldiv_mat ) <- col_names
459+ res_final <- ldiv_mat
409460 } else {
410- res <- res [names( res ) != " Mat_Risk " ]
461+ res_final <- res $ Res [ back_ind , ]
411462 }
412- ind <- order(res $ Res [, 1 ], decreasing = TRUE )
413- res <- res $ ldiversity
414- class(res ) <- " ldiversity"
415- invisible (res )
463+ class(res_final ) <- " ldiversity"
464+ invisible (res_final )
416465}
417466
418467# ' Print method for objects of class measure_risk
0 commit comments