Skip to content

Commit c1ee481

Browse files
speeding up the scoreCaliper function by avoiding recreating vectors over and over
1 parent 2ee7b1c commit c1ee481

2 files changed

Lines changed: 70 additions & 8 deletions

File tree

R/match_on.R

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -763,9 +763,6 @@ scoreCaliper <- function(x, z, caliper) {
763763
# there may be a speed increase in pulling out the guts of that function and calling them directly
764764
control <- sort(control)
765765

766-
treatedids <- c()
767-
controlids <- c()
768-
769766
# NB: for reasons unknown, you must add the double.eps in the function
770767
# call, saving it in a variable (e.g. width.eps <- width +
771768
# .Machine$double.eps) will not work.
@@ -775,15 +772,39 @@ scoreCaliper <- function(x, z, caliper) {
775772
starts <- length(control) - findInterval(-(treated - caliper -
776773
.Machine$double.eps), rev(-control))
777774

775+
edges <- pmax(0, (stops - starts))
776+
n <- sum(edges)
777+
778+
treatedids <- rep.int(1:k, times = edges)
779+
controlids <- integer(n)
780+
781+
idx <- 1
778782
for (i in 1:k) {
779-
if (starts[i] < length(control) && stops[i] > 0 && starts[i] < stops[i]) {
780-
tmp <- seq(starts[i] + 1, stops[i])
781-
controlids <- c(controlids, tmp)
782-
treatedids <- c(treatedids, rep(i, length(tmp)))
783+
if (starts[i] < length(control) && stops[i] > 0 && edges[i] > 0) {
784+
tmp <- seq.int(starts[i] + 1, stops[i])
785+
j <- length(tmp)
786+
controlids[idx:(idx + j - 1)] <- tmp
787+
idx <- idx + j
783788
}
784789
}
785790

786-
makeInfinitySparseMatrix(rep(0, length(treatedids)), controlids, treatedids, names(control), names(treated))
791+
v <- integer(n)
792+
793+
# I noticed that if a control was not reachable by any treated unit it would
794+
# not appear in the resulting matrix, so we set names explicitly if they don't exist.
795+
if (is.null(names(control))) {
796+
nmsc <- as.character(1:length(control))
797+
} else {
798+
nmsc <- names(control)
799+
}
800+
801+
if (is.null(names(treated))) {
802+
nmst <- as.character(1:k)
803+
} else {
804+
nmst <- names(treated)
805+
}
806+
807+
makeInfinitySparseMatrix(v, controlids, treatedids, nmsc, nmst)
787808
}
788809

789810
#' @details \bold{First argument (\code{x}): \code{matrix} or \code{InfinitySparseMatrix}.} These just return their

tests/testthat/test.match_on.R

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,47 @@ test_that("Numeric: simple differences of scores", {
359359
expect_error(match_on(scores2, z = z2, caliper = c(1,2)), "scalar")
360360
})
361361

362+
test_that("Score caliper edge cases", {
363+
set.seed(203003)
364+
n1 <- 500
365+
n0 <- 300
366+
n <- n1 + n0
367+
x <- runif(n)
368+
z <- c(rep(1, n1), rep(0, n0))
369+
370+
cal <- 0.2
371+
372+
sc <- scoreCaliper(x, z, cal)
373+
374+
# this should limit the number of edges
375+
expect_true(length(sc) < 500 * 300)
376+
377+
# but it should look like a 500 by 300 matrix
378+
expect_equal(dim(sc), c(n1, n0))
379+
380+
scm <- as.matrix(sc) # make it dense for testing
381+
382+
## everyone should get matched
383+
scmr <- rowSums(is.finite(scm))
384+
scmc <- colSums(is.finite(scm))
385+
386+
expect_true(all(scmr > 0))
387+
expect_true(all(scmc > 0))
388+
389+
# now introduce some extreme treated and control units
390+
x2 <- x
391+
x2[1] <- min(x) - 2 * cal
392+
x2[n] <- max(x) + 2 * cal
393+
394+
sc2 <- scoreCaliper(x2, z, cal)
395+
expect_true(length(sc2) < length(sc))
396+
expect_equal(dim(sc2), c(n1, n0))
397+
398+
scm2 <- as.matrix(sc2) # make it dense for testing
399+
expect_equivalent(rowSums(is.finite(scm2))[1], 0)
400+
expect_equivalent(colSums(is.finite(scm2))[n0], 0)
401+
})
402+
362403
test_that("Numeric, issues with vector names", {
363404
# #189
364405

0 commit comments

Comments
 (0)