Skip to content

Commit 7eec8b4

Browse files
committed
RS-20388: Handle missing data properly in survey logistic regression models
1 parent 07f7dae commit 7eec8b4

2 files changed

Lines changed: 27 additions & 2 deletions

File tree

R/variables.R

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,9 @@ Probabilities.Regression <- function(object, newdata = NULL, ...)
216216
StopForUserError(sQuote("Probabilities"), " is not applicable to linear regression models.")
217217
if (isTRUE(object$stacked) && IsRServer())
218218
StopForUserError("Saving probabilitiles is currently not supported for stacked data.")
219-
newdata <- ValidateNewData(object, newdata)
219+
na.action <- if ("na.action" %in% ...names()) list(...)[["na.action"]] else na.pass
220+
newdata <- ValidateNewData(object, newdata) |>
221+
structure(na.action = na.action) # Ensure NA rows are preserved, survey models may drop them otherwise
220222
if (object$type %in% c("Ordered Logit", "Multinomial Logit"))
221223
{
222224
probs <- suppressWarnings(predict(object$original, newdata = newdata,
@@ -230,7 +232,7 @@ Probabilities.Regression <- function(object, newdata = NULL, ...)
230232

231233
if (object$type == "Binary Logit")
232234
{
233-
probs <- suppressWarnings(predict(object$original, newdata = newdata, na.action = na.pass, type = "response"))
235+
probs <- suppressWarnings(predict(object$original, newdata = newdata, na.action = na.action, type = "response"))
234236
outcome.levels <- levels(Observed(object))
235237
if (length(outcome.levels) == 1L)
236238
{

tests/testthat/test-dataproblems.R

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -816,3 +816,26 @@ test_that("Removing missing entirely missing variables", {
816816
)
817817
)
818818
})
819+
820+
test_that("RS-20388: Survey weighted models preserve number of respondents in predictions", {
821+
some.weighted.data.for.logistic <- data.frame(
822+
y = rbinom(100, size = 1, prob = 0.5),
823+
x1 = rnorm(100),
824+
weights = runif(100, min = 0.5, max = 2)
825+
)
826+
# Set some rows to missing
827+
is.na(some.weighted.data.for.logistic$x1) <- sample(1:100, size = 10)
828+
model <- Regression(
829+
y ~ x1, data = some.weighted.data.for.logistic,
830+
type = "Binary Logit",
831+
weights = some.weighted.data.for.logistic$weights,
832+
missing = "Exclude cases with missing data"
833+
)
834+
probabilities <- Probabilities(model)
835+
probabilities |> expect_type("double")
836+
probabilities |> nrow() |> expect_equal(100L)
837+
838+
probabilities.with.missing <- Probabilities(model, na.action = na.omit)
839+
probabilities.with.missing |> expect_type("double")
840+
probabilities.with.missing |> nrow() |> expect_equal(90L)
841+
})

0 commit comments

Comments
 (0)