diff --git a/R/mcmc-augmented-data-update.R b/R/mcmc-augmented-data-update.R index 4392b26..acc7375 100644 --- a/R/mcmc-augmented-data-update.R +++ b/R/mcmc-augmented-data-update.R @@ -156,7 +156,7 @@ update_error_indicators1 <- function(i, augmented_data, observed_dates, group, # Sample new date using randomly selected delay -sample_from_delay <- function(i, estimated_dates, group, model_info, +sample_from_delay <- function(i, augmented_data, group, model_info, rng) { is_date_in_delay <- model_info$is_date_in_delay[i, , group] @@ -168,9 +168,18 @@ sample_from_delay <- function(i, estimated_dates, group, model_info, model_info$delay_to[which_delays]) ## Filter to only delays where the other date is available (needed for swap) - valid_delays <- which_delays[!is.na(estimated_dates[other_date_idx])] - other_date_idx <- other_date_idx[!is.na(estimated_dates[other_date_idx])] - + is_available_date <- !is.na(augmented_data$estimated_dates[other_date_idx]) + valid_delays <- which_delays[is_available_date] + other_date_idx <- other_date_idx[is_available_date] + + if (length(valid_delays) > 1) { + is_correct <- + vlapply(augmented_data$error_indicators[other_date_idx], isFALSE) + if (any(is_correct)) { + valid_delays <- valid_delays[is_correct] + other_date_idx <- other_date_idx[is_correct] + } + } ## If it is involved in several delays, randomly select one delay_idx <- if (length(valid_delays) == 1) 1 else @@ -178,7 +187,7 @@ sample_from_delay <- function(i, estimated_dates, group, model_info, selected_delay <- valid_delays[delay_idx] ## Find the other date in this date pair - other_date <- estimated_dates[other_date_idx[delay_idx]] + other_date <- augmented_data$estimated_dates[other_date_idx[delay_idx]] ## Is date i the 'from' or 'to' in this delay is_from <- (i == model_info$delay_from[selected_delay]) @@ -233,8 +242,7 @@ propose_estimated_dates <- function(sampling_order, augmented_data, observed_dates[i] + monty::monty_random_real(rng) } else { augmented_data$estimated_dates[i] <- - sample_from_delay(i, augmented_data$estimated_dates, group, - model_info, rng) + sample_from_delay(i, augmented_data, group, model_info, rng) } } @@ -327,6 +335,7 @@ calc_proposal_density <- function(sampling_order, augmented_data, is_date_in_delay <- model_info$is_date_in_delay[, , group] is_date_in_group <- model_info$is_date_in_group[, group] + is_date_connected <- model_info$is_date_connected[, , group] dates <- which(is_date_in_group) is_resampled <- @@ -344,8 +353,16 @@ calc_proposal_density <- function(sampling_order, augmented_data, if (!isFALSE(augmented_data$error_indicators[i])) { ## which dates were available for sampling + connected_dates <- available_dates[is_date_connected[i, available_dates]] + if (length(connected_dates) > 1) { + is_correct <- + vlapply(augmented_data$error_indicators[connected_dates], isFALSE) + if (sum(is_correct) > 0) { + connected_dates <- connected_dates[is_correct] + } + } is_delay_available <- - colSums(is_date_in_delay[available_dates, , drop = FALSE]) > 0 + colSums(is_date_in_delay[connected_dates, , drop = FALSE]) > 0 ## which delays could be sampled from can_sample_from_delay <- is_date_in_delay[i, ] & is_delay_available diff --git a/tests/testthat/test-mcmc-augmented-data-update.R b/tests/testthat/test-mcmc-augmented-data-update.R index 93a44ef..9da15f1 100644 --- a/tests/testthat/test-mcmc-augmented-data-update.R +++ b/tests/testthat/test-mcmc-augmented-data-update.R @@ -444,6 +444,7 @@ test_that("estimated dates proposed correctly", { ## group 2, propose new (error) death date + ## based on delay 2, onset (date 1) to death (date 4) group <- 2 to_update <- 4 observed_dates <- c(NA, NA, 40, 68, NA) @@ -457,7 +458,7 @@ test_that("estimated dates proposed correctly", { expect_equal(augmented_data$estimated_dates[-to_update], augmented_data_new$estimated_dates[-to_update]) expect_equal(augmented_data_new$estimated_dates[to_update], - sample_from_delay(to_update, augmented_data_new$estimated_dates, + sample_from_delay(to_update, augmented_data_new, group, model_info, rng1)) @@ -476,15 +477,16 @@ test_that("estimated dates proposed correctly", { group, model_info, rng) expect_equal(augmented_data_new$error_indicators, augmented_data$error_indicators) - estimated_dates <- rep(NA, 5) + cmp <- list(estimated_dates = rep(NA, 5), + error_indicators = augmented_data$error_indicators) ## date 4 will be sampled based on observed date - estimated_dates[4] <- observed_dates[4] + monty::monty_random_real(rng1) + cmp$estimated_dates[4] <- observed_dates[4] + monty::monty_random_real(rng1) ## will then sample date 1 (connected to date 4) and then date 3 - estimated_dates[1] <- - sample_from_delay(1, estimated_dates, group, model_info, rng1) - estimated_dates[3] <- - sample_from_delay(3, estimated_dates, group, model_info, rng1) - expect_equal(augmented_data_new$estimated_dates, estimated_dates) + cmp$estimated_dates[1] <- + sample_from_delay(1, cmp, group, model_info, rng1) + cmp$estimated_dates[3] <- + sample_from_delay(3, cmp, group, model_info, rng1) + expect_equal(augmented_data_new$estimated_dates, cmp$estimated_dates) }) @@ -525,13 +527,12 @@ test_that("proposal density calculated correctly", { ## group 2, updated missing onset date ## based on delay 1 (gamma), onset (date 1) to report (date 3) - ## and delay 2 (gamma), onset (date 1) to death (date 4) - ## the two delays are equally likely to be used + ## but not delay 2 (gamma), onset (date 1) to death (date 4) + ## because date 3 is correct and date 4 is not updated <- 1 - d <- log(sum(dgamma(augmented_data$estimated_dates[c(3, 4)] - - augmented_data$estimated_dates[1], - shape = params[1, c(1, 2)], - rate = params[2, c(1, 2)]))) - log(2) + d <- dgamma(augmented_data$estimated_dates[3] - + augmented_data$estimated_dates[1], + shape = params[1, 1], rate = params[2, 1], log = TRUE) expect_equal( calc_proposal_density(updated, augmented_data, group, model_info), d) @@ -552,6 +553,34 @@ test_that("proposal density calculated correctly", { expect_equal(calc_proposal_density( sampling_order, augmented_data, group, model_info), d) + ## group 2, updated missing onset date + ## based on delay 1 (gamma), onset (date 1) to report (date 3) + ## and delay 2 (gamma), onset (date 1) to death (date 4) + ## the two delays are equally likely to be used as dates 3 and 4 both correct + augmented_data <- list(estimated_dates = c(20.5, NA, 40.2, 50.1, NA), + error_indicators = c(NA, NA, FALSE, FALSE, NA)) + updated <- 1 + d <- log(sum(dgamma(augmented_data$estimated_dates[c(3, 4)] - + augmented_data$estimated_dates[1], + shape = params[1, c(1, 2)], + rate = params[2, c(1, 2)]))) - log(2) + expect_equal( + calc_proposal_density(updated, augmented_data, group, model_info), d) + + ## group 2, updated missing onset date + ## based on delay 1 (gamma), onset (date 1) to report (date 3) + ## and delay 2 (gamma), onset (date 1) to death (date 4) + ## the two delays are equally likely to be used as dates 3 and 4 both error + augmented_data <- list(estimated_dates = c(20.5, NA, 40.2, 50.1, NA), + error_indicators = c(NA, NA, FALSE, FALSE, NA)) + updated <- 1 + d <- log(sum(dgamma(augmented_data$estimated_dates[c(3, 4)] - + augmented_data$estimated_dates[1], + shape = params[1, c(1, 2)], + rate = params[2, c(1, 2)]))) - log(2) + expect_equal( + calc_proposal_density(updated, augmented_data, group, model_info), d) + # group 4, 3 dates group <- 4 @@ -572,13 +601,12 @@ test_that("proposal density calculated correctly", { ## group 4, updated error hospitalisation date ## based on delay 5 (log-normal), onset (date 1) to hospitalisation (date 2) - ## and delay 6 (log-normal), hospitalisation (date 2) to death (date 4) - ## the two delays are equally likely to be used + ## but not delay 6 (log-normal), hospitalisation (date 2) to death (date 4) + ## because date 1 is correct and date 4 is missing updated <- 2 - d <- log(sum(dlnorm(augmented_data$estimated_dates[c(2, 4)] - - augmented_data$estimated_dates[c(1, 2)], - meanlog = params[1, c(5, 6)], - sdlog = params[2, c(5, 6)]))) - log(2) + d <- dlnorm(augmented_data$estimated_dates[2] - + augmented_data$estimated_dates[1], + meanlog = params[1, 5], sdlog = params[2, 5], log = TRUE) expect_equal( calc_proposal_density(updated, augmented_data, group, model_info), d) @@ -604,6 +632,38 @@ test_that("proposal density calculated correctly", { log = TRUE)) expect_equal( calc_proposal_density(updated, augmented_data, group, model_info), d) + + + ## group 4, updated error hospitalisation date + ## based on delay 5 (log-normal), onset (date 1) to hospitalisation (date 2) + ## and delay 6 (log-normal), hospitalisation (date 2) to death (date 4) + ## the two delays are equally likely to be used + ## because both date 1 and 4 are correct + updated <- 2 + augmented_data <- list(estimated_dates = c(10.3, 15.4, 30.2, 40.1, NA), + error_indicators = c(FALSE, TRUE, FALSE, FALSE, NA)) + d <- log(sum(dlnorm(augmented_data$estimated_dates[c(2, 4)] - + augmented_data$estimated_dates[c(1, 2)], + meanlog = params[1, c(5, 6)], + sdlog = params[2, c(5, 6)]))) - log(2) + expect_equal( + calc_proposal_density(updated, augmented_data, group, model_info), d) + + + ## group 4, updated error hospitalisation date + ## based on delay 5 (log-normal), onset (date 1) to hospitalisation (date 2) + ## and delay 6 (log-normal), hospitalisation (date 2) to death (date 4) + ## the two delays are equally likely to be used + ## because neither date 1 nor 4 are correct + updated <- 2 + augmented_data <- list(estimated_dates = c(10.3, 15.4, 30.2, 40.1, NA), + error_indicators = c(TRUE, TRUE, FALSE, NA, NA)) + d <- log(sum(dlnorm(augmented_data$estimated_dates[c(2, 4)] - + augmented_data$estimated_dates[c(1, 2)], + meanlog = params[1, c(5, 6)], + sdlog = params[2, c(5, 6)]))) - log(2) + expect_equal( + calc_proposal_density(updated, augmented_data, group, model_info), d) })