@@ -274,8 +274,14 @@ match_on.bigglm <- function(x,
274274# ' redundancies among the variables by scaling down variables contributions in
275275# ' proportion to their correlations with other included variables.)
276276# '
277- # ' Euclidean distance is also available, via \code{method="euclidean"}, and
278- # ' ranked, Mahalanobis distance, via \code{method="rank_mahalanobis"}.
277+ # ' Euclidean distance is also available, via \code{method="euclidean"}, as
278+ # ' are two flavors of ranked-based Mahalanobis distance, via
279+ # ' \code{method="rank_mahalanobis"} or \code{method="pooled_cov_rank_mahalanobis"}.
280+ # ' Either rank-transforms the covariates first; they differ in whether
281+ # ' subsequent covariance of thus-transformed covariates is calculated
282+ # ' on all subjects or by pooling of with-group covariances across
283+ # ' treatment and control. The \code{method=} argument can be abbreviated
284+ # ' in the usual way (via [base::pmatch()]).
279285# '
280286# ' The treatment indicator \code{Z} as noted above must either be numeric
281287# ' (1 representing treated units and 0 control units) or logical
@@ -404,15 +410,19 @@ match_on.formula <- function(x,
404410 methodname <- as.character(class(method ))
405411 }
406412
407- which.method <- pmatch(methodname , c(" mahalanobis" , " euclidean" , " rank_mahalanobis" , " function" ), 4 )
413+ which.method <- pmatch(methodname ,
414+ c(" mahalanobis" , " euclidean" ,
415+ " rank_mahalanobis" , " pooled_cov_rank_mahalanobis" ,
416+ " function" ), 5 )
408417 tmp <- switch (which.method ,
409- makedist(z , data , compute_mahalanobis , within ),
410- makedist(z , data , compute_euclidean , within ),
411- makedist(z , data , compute_rank_mahalanobis , within ),
412- {
413- warning(" Passing a user-defined `method` to `match_on.formula` is not supported and results are not guaranteed. User-defined distances should use `match_on.function` instead." )
414- makedist(z , data , match.fun(method ), within )
415- }
418+ makedist(z , data , compute_mahalanobis , within ),
419+ makedist(z , data , compute_euclidean , within ),
420+ makedist(z , data , compute_rank_mahalanobis , within ),
421+ makedist(z , data , compute_pooled_cov_rank_mahalanobis , within ),
422+ {
423+ warning(" Passing a user-defined `method` to `match_on.formula` is not supported and results are not guaranteed. User-defined distances should use `match_on.function` instead." )
424+ makedist(z , data , match.fun(method ), within )
425+ }
416426 )
417427 rm(mf )
418428
@@ -509,19 +519,7 @@ compute_mahalanobis <- function(index, data, z) {
509519 cv <- mt + mc
510520 rm(mt , mc )
511521
512- inv.scale.matrix <- try(solve(cv ), silent = TRUE )
513-
514- if (inherits(inv.scale.matrix ," try-error" )) {
515- dnx <- dimnames(cv )
516- s <- svd(cv )
517- nz <- (s $ d > sqrt(.Machine $ double.eps ) * s $ d [1 ])
518- if (! any(nz )) stop(" covariance has rank zero" )
519-
520- inv.scale.matrix <- s $ v [, nz ] %*% (t(s $ u [, nz ])/ s $ d [nz ])
521- dimnames(inv.scale.matrix ) <- dnx [2 : 1 ]
522- rm(dnx , s , nz )
523- }
524-
522+ inv.scale.matrix <- safe_invert(cv )
525523 rm(cv )
526524
527525 return (mahalanobisHelper(data , index , inv.scale.matrix ))
@@ -548,19 +546,50 @@ compute_rank_mahalanobis <- function(index, data, z) {
548546 if (is.null(rownames(data )) | ! all(index %in% rownames(data )))
549547 stop(" data must have row names matching index" )
550548
551- # begin workaround solution to #128
552- all_treated <- rownames(data )[as.logical(z )]
553- all_control <- rownames(data )[! z ]
554- all_indices <- expand.grid(all_treated , all_control ,
555- KEEP.OUT.ATTRS = FALSE , stringsAsFactors = FALSE )
556- all_indices <- paste(all_indices [[1 ]], all_indices [[2 ]], sep = " %@%" )
557- short_indices <- paste(index [,1 ], index [,2 ], sep = " %@%" )
558- indices <- match(short_indices , all_indices )
559- if (any(is.na(indices ))) stop(" Unanticipated problem. (Make sure row names of data don't use the string '%@%'.)" )
560- # Now, since `r_smahal` is ignoring its `index` argument anyway:
561- rankdists <- sqrt(r_smahal(NULL , data , z ))
562- rankdists <- rankdists [indices ]
563- return (rankdists )
549+ data <- apply(data , 2 , rank )
550+ n <- nrow(data )
551+ m <- cov(data )
552+ cv <- scale_addressing_ties(nrow(data ), cov(data ))
553+ inv.scale.matrix <- safe_invert(cv )
554+ rm(cv )
555+
556+ return (mahalanobisHelper(data , index , inv.scale.matrix ))
557+ }
558+
559+ compute_pooled_cov_rank_mahalanobis <- function (index , data , z ) {
560+ if (! all(is.finite(data )))
561+ stop(" Infinite or NA values detected in data for Mahalanobis computations." )
562+
563+ if (is.null(rownames(data )) | ! all(index %in% rownames(data )))
564+ stop(" data must have row names matching index" )
565+
566+ data <- apply(data , 2 , rank )
567+
568+ if (sum(z ) == 1 ) {
569+ mt <- 0 # Addressing #168
570+ } else {
571+ treated <- data [z , ,drop = FALSE ]
572+ nt <- nrow(treated )
573+ mt <- scale_addressing_ties(nt , cov(treated ))
574+ mt <- mt * (sum(z ) - 1 ) / (length(z ) - 2 )
575+ }
576+
577+ if (sum(! z ) == 1 ) {
578+ mc <- 0 # Addressing #168
579+ } else {
580+ control <- data [! z , ,drop = FALSE ]
581+ nc <- nrow(control )
582+ mc <- scale_addressing_ties(nc , cov(control ))
583+ mc <- mc * (sum(! z ) - 1 ) / (length(! z ) - 2 )
584+ }
585+
586+ cv <- mt + mc
587+ rm(mt , mc )
588+
589+ inv.scale.matrix <- safe_invert(cv )
590+ rm(cv )
591+
592+ return (mahalanobisHelper(data , index , inv.scale.matrix ))
564593}
565594
566595# ' @details \bold{First argument (\code{x}): \code{function}.} The passed function
0 commit comments