mrc-ide
diff --git a/‎nipah_workflow.R‎
Lines changed: 13 additions & 0 deletions b/‎nipah_workflow.R‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎shared/bsl_data_synthesis.R‎
Lines changed: 20 additions & 7 deletions b/‎shared/bsl_data_synthesis.R‎
Lines changed: 20 additions & 7 deletions
diff --git a/‎shared/cleaned_outbreak_data.RDS‎
1.31 KB b/‎shared/cleaned_outbreak_data.RDS‎
1.31 KB
diff --git a/‎shared/nipah/bsl_model_fits.RDS‎
365 KB b/‎shared/nipah/bsl_model_fits.RDS‎
365 KB
diff --git a/‎shared/nipah_functions.R‎
Lines changed: 52 additions & 25 deletions b/‎shared/nipah_functions.R‎
Lines changed: 52 additions & 25 deletions
diff --git a/‎src/db_cleaning/nipah/nipah_cleaning.R‎
Lines changed: 68 additions & 25 deletions b/‎src/db_cleaning/nipah/nipah_cleaning.R‎
Lines changed: 68 additions & 25 deletions
diff --git a/‎src/db_compilation/redcap_compilation.R‎
Lines changed: 1 addition & 1 deletion b/‎src/db_compilation/redcap_compilation.R‎
Lines changed: 1 addition & 1 deletion
@@ -42,10 +42,23 @@ orderly_run("db_cleaning",list(pathogen="NIPAH", debug_mode=TRUE))
 orderly_run("nipah_latex_tables", list(pathogen="NIPAH"))
 
 # *---------------------------- Plots and analysis ----------------------------*
+orderly_run("nipah_serology", list(pathogen="NIPAH"))
+
+# orderly_run("nipah_map", list(pathogen="NIPAH"))
+
 orderly_run("nipah_transmission", list(pathogen="NIPAH"))
 
 orderly_run("nipah_severity", list(pathogen="NIPAH"))
 
+orderly_run("nipah_bsl_data_synthesis", list(pathogen="NIPAH"))
+
+# I assume the issue below is caused by the BSL library and other packages will
+# explicitly reference MASS when a function is needed
+# MASS::select masks dplyr::select
+# MASS::area masks patchwork::select
+select <- dplyr::select
+area <- patchwork::area
+
 orderly_run("nipah_delays", list(pathogen="NIPAH"))
 
 orderly_run("nipah_summary", list(pathogen="NIPAH"))
 
@@ -188,7 +188,7 @@ bsl_summarise_posteriors <- function(posterior_samples_list, L = 50) {
 }
 
 
-# Posterior predictive draw samples
+# Posterior predictive draw samples - confirm not same as below
 bsl_make_density_summary <- function(dist_name, post,
                                  x_seq = seq(0, 20, length.out = 300),
                                  n_draws = 200, L = 20) {
@@ -231,9 +231,11 @@ bsl_make_density_summary <- function(dist_name, post,
 # --------------------------
 # Posterior predictive density summaries (for plotting)
 # --------------------------
-bsl_make_density_summary <- function(dist_name, post,
-                                 x_seq = seq(0, 20, length.out = 400),
-                                 n_draws = 200, L = 20) {
+bsl_make_posterior_summary <- function(dist_name, post,
+                                       x_seq = seq(0, 20, length.out = 400),
+                                       n_draws = 200, L = 20,
+                                       posterior_cdf=FALSE){
+
   post <- as.matrix(post)
   draws <- sample(1:nrow(post), min(n_draws, nrow(post)))
   dens_mat <- matrix(NA, nrow = length(draws), ncol = length(x_seq))
@@ -251,17 +253,28 @@ bsl_make_density_summary <- function(dist_name, post,
         scale <- exp(loc_d); dweibull(x_seq, shape = phi, scale = scale)
       }
     })
+
     dens_mat[i, ] <- rowMeans(dens_l)
+    # dens_mat[i, ] <- t(apply(dens_l, 1, median))
+
   }
-  data.frame(
+
+  if (posterior_cdf){
+    dx <- c(x_seq[1], diff(x_seq))
+    dens_mat <- dens_mat * dx
+    dens_mat <- t(apply(dens_mat, 1, cumsum))
+  }
+
+  summary_df <- data.frame(
     x = x_seq,
-    mean = apply(dens_mat, 2, mean, na.rm = TRUE),
+    mean = apply(dens_mat, 2, median, na.rm = TRUE),
     low  = apply(dens_mat, 2, quantile, 0.025, na.rm = TRUE),
     high = apply(dens_mat, 2, quantile, 0.975, na.rm = TRUE),
     model = dist_name
   )
-}
 
+  return (summary_df)
+}
 
 # ===============================
 # AUTOMATED BSL DIAGNOSTIC REPORT
 
@@ -11,7 +11,7 @@ data_curation <- function(articles, outbreaks, models, parameters, plotting,swit
     mutate(new_refs = ifelse(refs %in% refs[duplicated(refs)],
                              paste0(sub("\\)$", "", refs),letters[counter],")"),
                              refs)) |>
-    select(-counter,-refs) |>
+    dplyr::select(-counter,-refs) |>
     rename(refs = new_refs) |>
     mutate(refs = str_to_title(refs))
 
@@ -70,7 +70,7 @@ data_curation <- function(articles, outbreaks, models, parameters, plotting,swit
     mutate(central = coalesce(parameter_value,
                               100*cfr_ifr_numerator/cfr_ifr_denominator,
                               0.5*(parameter_lower_bound+parameter_upper_bound))) |>
-    select(-c(no_unc))
+    dplyr::select(-c(no_unc))
 
   if (plotting) {
     parameters <- param4plot
@@ -113,7 +113,8 @@ curation <- function(articles, outbreaks, models, parameters, plotting) {
 # function to produce forest plot for given dataframe
 forest_plot <- function(df, label, color_column, lims, text_size = 11,
                         show_label = FALSE, custom_colours = NA,
-                        segment_show.legend=NA, sort=FALSE, qa_alpha=1) {
+                        segment_show.legend=NA, sort=FALSE, qa_alpha=1,
+                        point_size=3) {
   stopifnot(length(unique(df$parameter_unit[!is.na(df$parameter_unit)])) == 1)#values must have same units
 
   if (sort){
@@ -132,8 +133,8 @@ forest_plot <- function(df, label, color_column, lims, text_size = 11,
   df$segment_alpha <- 1
 
   if(qa_alpha!=1){
-    df[df$qa_score<0.5, ]$plot_alpha <- qa_alpha
-    df[df$qa_score<0.5, ]$segment_alpha <- 0.65 * qa_alpha
+    df[df$qa_score<=0.5, ]$plot_alpha <- qa_alpha
+    df[df$qa_score<=0.5, ]$segment_alpha <- 0.65 * qa_alpha
   }
 
   cats <- length(unique(df[[color_column]]))
@@ -142,22 +143,22 @@ forest_plot <- function(df, label, color_column, lims, text_size = 11,
                      y = urefs, yend = urefs, color = .data[[color_column]],),
                  linewidth=3, alpha = df$segment_alpha, show.legend = segment_show.legend) +
     geom_errorbar(aes(xmin=parameter_uncertainty_lower_value, xmax=parameter_uncertainty_upper_value,
-                      y = urefs),
+                      y = urefs, linetype="Uncertainty"),
                   width = 0.25, lwd=0.5, color = "black", alpha=df$plot_alpha) +
     geom_errorbar(data= df[!df$uncertainty_present,],
                   aes(xmin=parameter_2_lower_bound, xmax=parameter_2_upper_bound,
-                      y = urefs),
-                  width = 0.25, lwd=0.5, color = "black", linetype="dashed",
+                      y = urefs, linetype="Variability"),
+                  width = 0.25, lwd=0.5, color = "black",
                   lineend = "square", alpha=df[!(df$uncertainty_present),]$plot_alpha) +
     geom_errorbar(data= df[df$uncertainty_present,],
                   aes(xmin=parameter_2_lower_bound, xmax=parameter_2_upper_bound,
-                      y = urefs),
-                  width = 0.25, lwd=0.5, color = "black", linetype="dashed",
+                      y = urefs, linetype="Variability"),
+                  width = 0.25, lwd=0.5, color = "black",
                   lineend = "square", position = position_nudge(y=-0.25),
                   alpha=df[df$uncertainty_present,]$plot_alpha) +
     geom_point(aes(x = parameter_value, y = urefs,
                    shape = parameter_value_type, fill = .data[[color_column]]),
-               alpha=df$plot_alpha, size = 3, stroke = 1, color = "black")
+               alpha=df$plot_alpha, size = point_size, stroke = 1, color = "black")
 
   if (all(df$parameter_class=="Reproduction number")) {
     gg <- gg +
@@ -172,6 +173,9 @@ forest_plot <- function(df, label, color_column, lims, text_size = 11,
                                     Other = 23, `Central - unspecified`=25),
                          breaks = c("Mean", "Median", "Unspecified", "Other",
                                     "Central - unspecified")) +
+      scale_linetype_manual(name   = "Variation Type",
+                            values = c("Uncertainty" = "solid","Variability" = "dashed"),
+                            breaks = c("Uncertainty", "Variability")) +
       scale_x_continuous(limits = lims, expand = c(0, 0)) +
       scale_y_discrete(labels = setNames(df$refs, df$urefs)) +
       labs(x = label, y = NULL) +
@@ -187,6 +191,9 @@ forest_plot <- function(df, label, color_column, lims, text_size = 11,
                                     Other = 23, `Central - unspecified`=25),
                          breaks = c("Mean", "Median", "Unspecified", "Other",
                                     "Central - unspecified")) +
+      scale_linetype_manual(name   = "Variation Type",
+                            values = c("Uncertainty" = "solid","Variability" = "dashed"),
+                            breaks = c("Uncertainty", "Variability")) +
       scale_x_continuous(limits = lims, expand = c(0, 0)) +
       scale_y_discrete(labels = setNames(df$refs, df$urefs)) +
       labs(x = label, y = NULL) +
@@ -196,9 +203,13 @@ forest_plot <- function(df, label, color_column, lims, text_size = 11,
   }
 
   if (cats == 1) {
-    gg <- gg + guides(fill = "none", color="none", shape = guide_legend(title = NULL,order = 1))
+    gg <- gg + guides(fill = "none", color="none",
+                      shape = guide_legend(title = NULL,order = 1),
+                      linetype=guide_legend(title = NULL,order = 2))
   } else {
-    gg <- gg + guides(fill = "none", color = guide_legend(title = NULL,order = 1), shape = guide_legend(title = NULL,order = 2))}
+    gg <- gg + guides(fill = "none", color = guide_legend(title = NULL,order = 1),
+                      shape = guide_legend(title = NULL,order = 2),
+                      linetype=guide_legend(title = NULL, order = 3))}
 
   if(show_label)
     gg <- gg + geom_text_repel(aes(x = coalesce(parameter_value), y = urefs, label = population_country_ISO), nudge_y = 0.5, segment.color = "grey50" )
@@ -332,7 +343,7 @@ metamean_wrap <- function(dataframe, estmeansd_method,
            digits = digits, digits.sd = digits, digits.weight = digits,
            col.diamond.lines = "black",col.diamond.common = colour, col.diamond.random = colour,
            weight.study = "same", col.square.lines = "black", col.square = colour, col.study = "black", col.inside = "black",
-           at = seq(lims[1],lims[2],by=2), xlim = lims, xlab = label, fontsize = 10, colgap.forest.left = paste0( colgap_shift,"cm"))
+           at = seq(lims[1],lims[2],by=2), xlim = lims, xlab = label, fontsize = 13, colgap.forest.left = paste0( colgap_shift,"cm"))
     dev.off()
   } else {
     mtan <- metamean(data = dataframe,
@@ -357,7 +368,7 @@ metamean_wrap <- function(dataframe, estmeansd_method,
            digits = digits, digits.sd = digits, digits.weight = digits,
            col.diamond.lines = "black",col.diamond.common = colour, col.diamond.random = colour,
            weight.study = "same", col.square.lines = "black", col.square = colour, col.study = "black", col.inside = "black",
-           at = seq(lims[1],lims[2],by=2), xlim = lims, xlab = label, fontsize = 10)
+           at = seq(lims[1],lims[2],by=2), xlim = lims, xlab = label, fontsize = 13)
     dev.off()
   }
 
@@ -497,7 +508,7 @@ metagen_wrap <- function(dataframe, estmeansd_method,
            digits = digits, digits.sd = digits, digits.weight = digits,
            col.diamond.lines = "black",col.diamond.common = colour, col.diamond.random = colour,
            weight.study = "same", col.square.lines = "black", col.square = colour, col.study = "black", col.inside = "black",
-           at = seq(lims[1],lims[2],by=2), xlim = lims, xlab = label, fontsize = 10)
+           at = seq(lims[1],lims[2],by=2), xlim = lims, xlab = label, fontsize = 11.5)
     dev.off()
   } else {
     mtan <- metagen(data = dataframe,
@@ -525,7 +536,7 @@ metagen_wrap <- function(dataframe, estmeansd_method,
            digits = digits, digits.sd = digits, digits.weight = digits,
            col.diamond.lines = "black",col.diamond.common = colour, col.diamond.random = colour,
            weight.study = "same", col.square.lines = "black", col.square = colour, col.study = "black", col.inside = "black",
-           at = seq(lims[1],lims[2],by=2), xlim = lims, xlab = label, fontsize = 10)
+           at = seq(lims[1],lims[2],by=2), xlim = lims, xlab = label, fontsize = 11.5)
     dev.off()
   }
 
@@ -562,15 +573,24 @@ metaprop_wrap <- function(dataframe, subgroup,
                      method.tau = "ML")
 
     png(file = "temp.png", width = width, height = height, res = resolution)
+    par(mar = c(2, 2, 2, 1))
     forest(mtan, layout = "RevMan5",
            overall = plot_pooled, pooled.events = TRUE,
            print.subgroup.name = FALSE, sort.subgroup = sort_by_subg,
            study.results = plot_study,
            digits = digits,
-           col.diamond.lines = "black",col.diamond.common = colour, col.diamond.random = colour,
-           col.subgroup = "black", col.inside = "black",
-           weight.study = "same", #col.square.lines = "green", col.square = "blue", #not working
-           at = at, xlim = xlim, xlab="Case Fatality Ratio", fontsize=11)
+           col.diamond.lines = "black",col.diamond.common = colour,
+           col.diamond.random = colour,
+           col.square = colour, col.square.lines = "black",
+           col.study = "black", col.subgroup = "black",
+           col.inside = "black", weight.study = "same",
+           at = at, xlim = xlim, xlab="Case Fatality Ratio",
+           fs.predict.labels = 11.5,
+           fs.hetstat=11,
+           fs.test.subgroup = 11,
+           fs.axis = 11,
+           fontsize = 14,
+           plotwidth = "72.5mm")
     dev.off()
   } else {
     mtan <- metaprop(data = dataframe,
@@ -586,10 +606,17 @@ metaprop_wrap <- function(dataframe, subgroup,
            overall = plot_pooled, pooled.events = TRUE,
            study.results = plot_study,
            digits = digits,
-           col.diamond.lines = "black",col.diamond.common = colour, col.diamond.random = colour,
-           col.subgroup = "black", col.inside = "black",
-           weight.study = "same", #col.square.lines = "green", col.square = "blue", #not working
-           at = at, xlim = xlim, xlab="Case Fatality Ratio", fontsize=11)
+           col.diamond.lines = "black",col.diamond.common = colour,
+           col.diamond.random = colour,
+           col.square = colour, col.square.lines = "black",
+           col.subgroup = "black", col.inside = "black", weight.study = "same",
+           at = at, xlim = xlim, xlab="Case Fatality Ratio",
+           fs.predict.labels = 11.5,
+           fs.hetstat=11,
+           fs.test.subgroup = 11,
+           fs.axis = 11,
+           fontsize = 14,
+           plotwidth = "72.5mm")
     dev.off()
   }
 
 
@@ -279,6 +279,12 @@ param_cleaning <- function(df){
     paste0("Proportion asymptomatic is reported in the paper.",
            "Denominator is contacts of Nipah patients who gave blood specimen")
 
+  # Sero uncert issue: Reported as 8.35 in the paper but this is clearly a typo;
+  # other estimates in the table have the same estimate but a 95% CI upper bound
+  # of 38.35
+  sero_2892_sero_uncert_filter <- df$access_param_id=="008_003"
+  df[sero_2892_sero_uncert_filter, "parameter_uncertainty_upper_value"] <- 38.35
+
   # CovID: 3057
   delay_3057_incp_filter <- df$access_param_id=="151_001"
   df[delay_3057_incp_filter, "parameter_2_value_type"] <- "Range (paired)"
@@ -702,32 +708,69 @@ param_cleaning <- function(df){
   df[df$access_param_id=="093_003", "parameter_unit"] <- "Percentage (%)"
 
   # Split 2931 incubation row
-  hd_2931_row_filter <- df$access_param_id=="138_016"
-  df[hd_2931_row_filter, "distribution_type"] <- NA
-  df[hd_2931_row_filter, "distribution_par1_type"] <- NA
-  df[hd_2931_row_filter, "distribution_par1_value"] <- NA
-  df[hd_2931_row_filter, "distribution_par1_uncertainty"] <- NA
-  df[hd_2931_row_filter, "distribution_par2_type"] <- NA
-  df[hd_2931_row_filter, "distribution_par2_value"] <- NA
-  df[hd_2931_row_filter, "distribution_par2_uncertainty"] <- NA
-
-  new_2931_row <- df[hd_2931_row_filter, ]
-  new_2931_row$parameter_data_id  <- generate_new_id(df, "parameter_data_id", 10)
+  hd_2931_incp_row_filter <- df$access_param_id=="138_016"
+
+  new_2931_incp_row <- df[hd_2931_incp_row_filter, ]
+  new_2931_incp_row$parameter_data_id  <- generate_new_id(
+    df, "parameter_data_id", 10)
+  # No corresponding redcap entry so make an ID
+  new_2931_incp_row$access_param_id  <- "138_3141"
+  new_2931_incp_row$parameter_value <- 9.7
+  new_2931_incp_row$parameter_value_type <- "Mean"
+
+  new_2931_incp_row$parameter_statistical_approach <- "Estimated model parameter"
+  new_2931_incp_row$parameter_paired <- "No"
+  new_2931_incp_row$parameter_2_unit <- NA
+  new_2931_incp_row$method_2_from_supplement <- NA
+  new_2931_incp_row$parameter_2_statistical_approach <- NA
+
+  new_2931_incp_row$parameter_2_value_type <- NA
+  new_2931_incp_row$parameter_2_lower_bound <- NA
+  new_2931_incp_row$parameter_2_upper_bound <- NA
+
+  # Remove dist param values
+  df[hd_2931_incp_row_filter, "distribution_type"] <- NA
+  df[hd_2931_incp_row_filter, "distribution_par1_type"] <- NA
+  df[hd_2931_incp_row_filter, "distribution_par1_value"] <- NA
+  df[hd_2931_incp_row_filter, "distribution_par1_uncertainty"] <- NA
+  df[hd_2931_incp_row_filter, "distribution_par2_type"] <- NA
+  df[hd_2931_incp_row_filter, "distribution_par2_value"] <- NA
+  df[hd_2931_incp_row_filter, "distribution_par2_uncertainty"] <- NA
+
+  df <- rbind(df, new_2931_incp_row)
+
+  # Split 2931 serial interval row
+  hd_2931_si_row_filter <- df$access_param_id=="138_015"
+
+  new_2931_si_row <- df[hd_2931_si_row_filter, ]
+  new_2931_si_row$parameter_data_id  <- generate_new_id(
+    df, "parameter_data_id", 10)
+
   # No corresponding redcap entry so make an ID
-  new_2931_row$access_param_id  <- "138_3141"
-  new_2931_row$parameter_value <- 9.7
-  new_2931_row$parameter_value_type <- "Mean"
-
-  new_2931_row$parameter_statistical_approach <- "Estimated model parameter"
-  new_2931_row$parameter_paired <- "No"
-  new_2931_row$parameter_2_unit <- NA
-  new_2931_row$method_2_from_supplement <- NA
-  new_2931_row$parameter_2_statistical_approach <- NA
-
-  new_2931_row$parameter_2_value_type <- NA
-  new_2931_row$parameter_2_lower_bound <- NA
-  new_2931_row$parameter_2_upper_bound <- NA
-  df <- rbind(df, new_2931_row)
+  new_2931_si_row$access_param_id  <- "138_2718"
+  new_2931_si_row$parameter_value <- 13
+  new_2931_si_row$parameter_value_type <- "Median"
+
+  new_2931_si_row$parameter_statistical_approach <- "Estimated model parameter"
+  new_2931_si_row$parameter_paired <- "No"
+  new_2931_si_row$parameter_2_unit <- NA
+  new_2931_si_row$method_2_from_supplement <- NA
+  new_2931_si_row$parameter_2_statistical_approach <- NA
+
+  new_2931_si_row$parameter_2_value_type <- NA
+  new_2931_si_row$parameter_2_lower_bound <- NA
+  new_2931_si_row$parameter_2_upper_bound <- NA
+
+  # Remove dist param values
+  df[hd_2931_si_row_filter, "distribution_type"] <- NA
+  df[hd_2931_si_row_filter, "distribution_par1_type"] <- NA
+  df[hd_2931_si_row_filter, "distribution_par1_value"] <- NA
+  df[hd_2931_si_row_filter, "distribution_par1_uncertainty"] <- NA
+  df[hd_2931_si_row_filter, "distribution_par2_type"] <- NA
+  df[hd_2931_si_row_filter, "distribution_par2_value"] <- NA
+  df[hd_2931_si_row_filter, "distribution_par2_uncertainty"] <- NA
+
+  df <- rbind(df, new_2931_si_row)
 
   # Labels for IQR and Range are different for variability so copying from
   # uncertainty results in different labels
 
@@ -200,7 +200,7 @@ orderly_dependency(
 
 # Manually fixed files and "cleaning" script - these need to be in the
 # src/db_compilation folder
-orderly_resource(fixing_files)
+orderly_resource(setNames(fixing_files, fixing_files))
 
 ## Outputs
 orderly_artefact(