Skip to content

Commit 1cbdb4e

Browse files
authored
Merge pull request #80 from mrc-ide/nipah_workflow_updates
Adds more Nipah tasks & workflow updates
2 parents 6da2708 + 0146f71 commit 1cbdb4e

15 files changed

Lines changed: 1396 additions & 1075 deletions

File tree

nipah_workflow.R

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,23 @@ orderly_run("db_cleaning",list(pathogen="NIPAH", debug_mode=TRUE))
4242
orderly_run("nipah_latex_tables", list(pathogen="NIPAH"))
4343

4444
# *---------------------------- Plots and analysis ----------------------------*
45+
orderly_run("nipah_serology", list(pathogen="NIPAH"))
46+
47+
# orderly_run("nipah_map", list(pathogen="NIPAH"))
48+
4549
orderly_run("nipah_transmission", list(pathogen="NIPAH"))
4650

4751
orderly_run("nipah_severity", list(pathogen="NIPAH"))
4852

53+
orderly_run("nipah_bsl_data_synthesis", list(pathogen="NIPAH"))
54+
55+
# I assume the issue below is caused by the BSL library and other packages will
56+
# explicitly reference MASS when a function is needed
57+
# MASS::select masks dplyr::select
58+
# MASS::area masks patchwork::select
59+
select <- dplyr::select
60+
area <- patchwork::area
61+
4962
orderly_run("nipah_delays", list(pathogen="NIPAH"))
5063

5164
orderly_run("nipah_summary", list(pathogen="NIPAH"))

shared/bsl_data_synthesis.R

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ bsl_summarise_posteriors <- function(posterior_samples_list, L = 50) {
188188
}
189189

190190

191-
# Posterior predictive draw samples
191+
# Posterior predictive draw samples - confirm not same as below
192192
bsl_make_density_summary <- function(dist_name, post,
193193
x_seq = seq(0, 20, length.out = 300),
194194
n_draws = 200, L = 20) {
@@ -231,9 +231,11 @@ bsl_make_density_summary <- function(dist_name, post,
231231
# --------------------------
232232
# Posterior predictive density summaries (for plotting)
233233
# --------------------------
234-
bsl_make_density_summary <- function(dist_name, post,
235-
x_seq = seq(0, 20, length.out = 400),
236-
n_draws = 200, L = 20) {
234+
bsl_make_posterior_summary <- function(dist_name, post,
235+
x_seq = seq(0, 20, length.out = 400),
236+
n_draws = 200, L = 20,
237+
posterior_cdf=FALSE){
238+
237239
post <- as.matrix(post)
238240
draws <- sample(1:nrow(post), min(n_draws, nrow(post)))
239241
dens_mat <- matrix(NA, nrow = length(draws), ncol = length(x_seq))
@@ -251,17 +253,28 @@ bsl_make_density_summary <- function(dist_name, post,
251253
scale <- exp(loc_d); dweibull(x_seq, shape = phi, scale = scale)
252254
}
253255
})
256+
254257
dens_mat[i, ] <- rowMeans(dens_l)
258+
# dens_mat[i, ] <- t(apply(dens_l, 1, median))
259+
255260
}
256-
data.frame(
261+
262+
if (posterior_cdf){
263+
dx <- c(x_seq[1], diff(x_seq))
264+
dens_mat <- dens_mat * dx
265+
dens_mat <- t(apply(dens_mat, 1, cumsum))
266+
}
267+
268+
summary_df <- data.frame(
257269
x = x_seq,
258-
mean = apply(dens_mat, 2, mean, na.rm = TRUE),
270+
mean = apply(dens_mat, 2, median, na.rm = TRUE),
259271
low = apply(dens_mat, 2, quantile, 0.025, na.rm = TRUE),
260272
high = apply(dens_mat, 2, quantile, 0.975, na.rm = TRUE),
261273
model = dist_name
262274
)
263-
}
264275

276+
return (summary_df)
277+
}
265278

266279
# ===============================
267280
# AUTOMATED BSL DIAGNOSTIC REPORT

shared/cleaned_outbreak_data.RDS

1.31 KB
Binary file not shown.

shared/nipah/bsl_model_fits.RDS

365 KB
Binary file not shown.

shared/nipah_functions.R

Lines changed: 52 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ data_curation <- function(articles, outbreaks, models, parameters, plotting,swit
1111
mutate(new_refs = ifelse(refs %in% refs[duplicated(refs)],
1212
paste0(sub("\\)$", "", refs),letters[counter],")"),
1313
refs)) |>
14-
select(-counter,-refs) |>
14+
dplyr::select(-counter,-refs) |>
1515
rename(refs = new_refs) |>
1616
mutate(refs = str_to_title(refs))
1717

@@ -70,7 +70,7 @@ data_curation <- function(articles, outbreaks, models, parameters, plotting,swit
7070
mutate(central = coalesce(parameter_value,
7171
100*cfr_ifr_numerator/cfr_ifr_denominator,
7272
0.5*(parameter_lower_bound+parameter_upper_bound))) |>
73-
select(-c(no_unc))
73+
dplyr::select(-c(no_unc))
7474

7575
if (plotting) {
7676
parameters <- param4plot
@@ -113,7 +113,8 @@ curation <- function(articles, outbreaks, models, parameters, plotting) {
113113
# function to produce forest plot for given dataframe
114114
forest_plot <- function(df, label, color_column, lims, text_size = 11,
115115
show_label = FALSE, custom_colours = NA,
116-
segment_show.legend=NA, sort=FALSE, qa_alpha=1) {
116+
segment_show.legend=NA, sort=FALSE, qa_alpha=1,
117+
point_size=3) {
117118
stopifnot(length(unique(df$parameter_unit[!is.na(df$parameter_unit)])) == 1)#values must have same units
118119

119120
if (sort){
@@ -132,8 +133,8 @@ forest_plot <- function(df, label, color_column, lims, text_size = 11,
132133
df$segment_alpha <- 1
133134

134135
if(qa_alpha!=1){
135-
df[df$qa_score<0.5, ]$plot_alpha <- qa_alpha
136-
df[df$qa_score<0.5, ]$segment_alpha <- 0.65 * qa_alpha
136+
df[df$qa_score<=0.5, ]$plot_alpha <- qa_alpha
137+
df[df$qa_score<=0.5, ]$segment_alpha <- 0.65 * qa_alpha
137138
}
138139

139140
cats <- length(unique(df[[color_column]]))
@@ -142,22 +143,22 @@ forest_plot <- function(df, label, color_column, lims, text_size = 11,
142143
y = urefs, yend = urefs, color = .data[[color_column]],),
143144
linewidth=3, alpha = df$segment_alpha, show.legend = segment_show.legend) +
144145
geom_errorbar(aes(xmin=parameter_uncertainty_lower_value, xmax=parameter_uncertainty_upper_value,
145-
y = urefs),
146+
y = urefs, linetype="Uncertainty"),
146147
width = 0.25, lwd=0.5, color = "black", alpha=df$plot_alpha) +
147148
geom_errorbar(data= df[!df$uncertainty_present,],
148149
aes(xmin=parameter_2_lower_bound, xmax=parameter_2_upper_bound,
149-
y = urefs),
150-
width = 0.25, lwd=0.5, color = "black", linetype="dashed",
150+
y = urefs, linetype="Variability"),
151+
width = 0.25, lwd=0.5, color = "black",
151152
lineend = "square", alpha=df[!(df$uncertainty_present),]$plot_alpha) +
152153
geom_errorbar(data= df[df$uncertainty_present,],
153154
aes(xmin=parameter_2_lower_bound, xmax=parameter_2_upper_bound,
154-
y = urefs),
155-
width = 0.25, lwd=0.5, color = "black", linetype="dashed",
155+
y = urefs, linetype="Variability"),
156+
width = 0.25, lwd=0.5, color = "black",
156157
lineend = "square", position = position_nudge(y=-0.25),
157158
alpha=df[df$uncertainty_present,]$plot_alpha) +
158159
geom_point(aes(x = parameter_value, y = urefs,
159160
shape = parameter_value_type, fill = .data[[color_column]]),
160-
alpha=df$plot_alpha, size = 3, stroke = 1, color = "black")
161+
alpha=df$plot_alpha, size = point_size, stroke = 1, color = "black")
161162

162163
if (all(df$parameter_class=="Reproduction number")) {
163164
gg <- gg +
@@ -172,6 +173,9 @@ forest_plot <- function(df, label, color_column, lims, text_size = 11,
172173
Other = 23, `Central - unspecified`=25),
173174
breaks = c("Mean", "Median", "Unspecified", "Other",
174175
"Central - unspecified")) +
176+
scale_linetype_manual(name = "Variation Type",
177+
values = c("Uncertainty" = "solid","Variability" = "dashed"),
178+
breaks = c("Uncertainty", "Variability")) +
175179
scale_x_continuous(limits = lims, expand = c(0, 0)) +
176180
scale_y_discrete(labels = setNames(df$refs, df$urefs)) +
177181
labs(x = label, y = NULL) +
@@ -187,6 +191,9 @@ forest_plot <- function(df, label, color_column, lims, text_size = 11,
187191
Other = 23, `Central - unspecified`=25),
188192
breaks = c("Mean", "Median", "Unspecified", "Other",
189193
"Central - unspecified")) +
194+
scale_linetype_manual(name = "Variation Type",
195+
values = c("Uncertainty" = "solid","Variability" = "dashed"),
196+
breaks = c("Uncertainty", "Variability")) +
190197
scale_x_continuous(limits = lims, expand = c(0, 0)) +
191198
scale_y_discrete(labels = setNames(df$refs, df$urefs)) +
192199
labs(x = label, y = NULL) +
@@ -196,9 +203,13 @@ forest_plot <- function(df, label, color_column, lims, text_size = 11,
196203
}
197204

198205
if (cats == 1) {
199-
gg <- gg + guides(fill = "none", color="none", shape = guide_legend(title = NULL,order = 1))
206+
gg <- gg + guides(fill = "none", color="none",
207+
shape = guide_legend(title = NULL,order = 1),
208+
linetype=guide_legend(title = NULL,order = 2))
200209
} else {
201-
gg <- gg + guides(fill = "none", color = guide_legend(title = NULL,order = 1), shape = guide_legend(title = NULL,order = 2))}
210+
gg <- gg + guides(fill = "none", color = guide_legend(title = NULL,order = 1),
211+
shape = guide_legend(title = NULL,order = 2),
212+
linetype=guide_legend(title = NULL, order = 3))}
202213

203214
if(show_label)
204215
gg <- gg + geom_text_repel(aes(x = coalesce(parameter_value), y = urefs, label = population_country_ISO), nudge_y = 0.5, segment.color = "grey50" )
@@ -332,7 +343,7 @@ metamean_wrap <- function(dataframe, estmeansd_method,
332343
digits = digits, digits.sd = digits, digits.weight = digits,
333344
col.diamond.lines = "black",col.diamond.common = colour, col.diamond.random = colour,
334345
weight.study = "same", col.square.lines = "black", col.square = colour, col.study = "black", col.inside = "black",
335-
at = seq(lims[1],lims[2],by=2), xlim = lims, xlab = label, fontsize = 10, colgap.forest.left = paste0( colgap_shift,"cm"))
346+
at = seq(lims[1],lims[2],by=2), xlim = lims, xlab = label, fontsize = 13, colgap.forest.left = paste0( colgap_shift,"cm"))
336347
dev.off()
337348
} else {
338349
mtan <- metamean(data = dataframe,
@@ -357,7 +368,7 @@ metamean_wrap <- function(dataframe, estmeansd_method,
357368
digits = digits, digits.sd = digits, digits.weight = digits,
358369
col.diamond.lines = "black",col.diamond.common = colour, col.diamond.random = colour,
359370
weight.study = "same", col.square.lines = "black", col.square = colour, col.study = "black", col.inside = "black",
360-
at = seq(lims[1],lims[2],by=2), xlim = lims, xlab = label, fontsize = 10)
371+
at = seq(lims[1],lims[2],by=2), xlim = lims, xlab = label, fontsize = 13)
361372
dev.off()
362373
}
363374

@@ -497,7 +508,7 @@ metagen_wrap <- function(dataframe, estmeansd_method,
497508
digits = digits, digits.sd = digits, digits.weight = digits,
498509
col.diamond.lines = "black",col.diamond.common = colour, col.diamond.random = colour,
499510
weight.study = "same", col.square.lines = "black", col.square = colour, col.study = "black", col.inside = "black",
500-
at = seq(lims[1],lims[2],by=2), xlim = lims, xlab = label, fontsize = 10)
511+
at = seq(lims[1],lims[2],by=2), xlim = lims, xlab = label, fontsize = 11.5)
501512
dev.off()
502513
} else {
503514
mtan <- metagen(data = dataframe,
@@ -525,7 +536,7 @@ metagen_wrap <- function(dataframe, estmeansd_method,
525536
digits = digits, digits.sd = digits, digits.weight = digits,
526537
col.diamond.lines = "black",col.diamond.common = colour, col.diamond.random = colour,
527538
weight.study = "same", col.square.lines = "black", col.square = colour, col.study = "black", col.inside = "black",
528-
at = seq(lims[1],lims[2],by=2), xlim = lims, xlab = label, fontsize = 10)
539+
at = seq(lims[1],lims[2],by=2), xlim = lims, xlab = label, fontsize = 11.5)
529540
dev.off()
530541
}
531542

@@ -562,15 +573,24 @@ metaprop_wrap <- function(dataframe, subgroup,
562573
method.tau = "ML")
563574

564575
png(file = "temp.png", width = width, height = height, res = resolution)
576+
par(mar = c(2, 2, 2, 1))
565577
forest(mtan, layout = "RevMan5",
566578
overall = plot_pooled, pooled.events = TRUE,
567579
print.subgroup.name = FALSE, sort.subgroup = sort_by_subg,
568580
study.results = plot_study,
569581
digits = digits,
570-
col.diamond.lines = "black",col.diamond.common = colour, col.diamond.random = colour,
571-
col.subgroup = "black", col.inside = "black",
572-
weight.study = "same", #col.square.lines = "green", col.square = "blue", #not working
573-
at = at, xlim = xlim, xlab="Case Fatality Ratio", fontsize=11)
582+
col.diamond.lines = "black",col.diamond.common = colour,
583+
col.diamond.random = colour,
584+
col.square = colour, col.square.lines = "black",
585+
col.study = "black", col.subgroup = "black",
586+
col.inside = "black", weight.study = "same",
587+
at = at, xlim = xlim, xlab="Case Fatality Ratio",
588+
fs.predict.labels = 11.5,
589+
fs.hetstat=11,
590+
fs.test.subgroup = 11,
591+
fs.axis = 11,
592+
fontsize = 14,
593+
plotwidth = "72.5mm")
574594
dev.off()
575595
} else {
576596
mtan <- metaprop(data = dataframe,
@@ -586,10 +606,17 @@ metaprop_wrap <- function(dataframe, subgroup,
586606
overall = plot_pooled, pooled.events = TRUE,
587607
study.results = plot_study,
588608
digits = digits,
589-
col.diamond.lines = "black",col.diamond.common = colour, col.diamond.random = colour,
590-
col.subgroup = "black", col.inside = "black",
591-
weight.study = "same", #col.square.lines = "green", col.square = "blue", #not working
592-
at = at, xlim = xlim, xlab="Case Fatality Ratio", fontsize=11)
609+
col.diamond.lines = "black",col.diamond.common = colour,
610+
col.diamond.random = colour,
611+
col.square = colour, col.square.lines = "black",
612+
col.subgroup = "black", col.inside = "black", weight.study = "same",
613+
at = at, xlim = xlim, xlab="Case Fatality Ratio",
614+
fs.predict.labels = 11.5,
615+
fs.hetstat=11,
616+
fs.test.subgroup = 11,
617+
fs.axis = 11,
618+
fontsize = 14,
619+
plotwidth = "72.5mm")
593620
dev.off()
594621
}
595622

src/db_cleaning/nipah/nipah_cleaning.R

Lines changed: 68 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,12 @@ param_cleaning <- function(df){
279279
paste0("Proportion asymptomatic is reported in the paper.",
280280
"Denominator is contacts of Nipah patients who gave blood specimen")
281281

282+
# Sero uncert issue: Reported as 8.35 in the paper but this is clearly a typo;
283+
# other estimates in the table have the same estimate but a 95% CI upper bound
284+
# of 38.35
285+
sero_2892_sero_uncert_filter <- df$access_param_id=="008_003"
286+
df[sero_2892_sero_uncert_filter, "parameter_uncertainty_upper_value"] <- 38.35
287+
282288
# CovID: 3057
283289
delay_3057_incp_filter <- df$access_param_id=="151_001"
284290
df[delay_3057_incp_filter, "parameter_2_value_type"] <- "Range (paired)"
@@ -702,32 +708,69 @@ param_cleaning <- function(df){
702708
df[df$access_param_id=="093_003", "parameter_unit"] <- "Percentage (%)"
703709

704710
# Split 2931 incubation row
705-
hd_2931_row_filter <- df$access_param_id=="138_016"
706-
df[hd_2931_row_filter, "distribution_type"] <- NA
707-
df[hd_2931_row_filter, "distribution_par1_type"] <- NA
708-
df[hd_2931_row_filter, "distribution_par1_value"] <- NA
709-
df[hd_2931_row_filter, "distribution_par1_uncertainty"] <- NA
710-
df[hd_2931_row_filter, "distribution_par2_type"] <- NA
711-
df[hd_2931_row_filter, "distribution_par2_value"] <- NA
712-
df[hd_2931_row_filter, "distribution_par2_uncertainty"] <- NA
713-
714-
new_2931_row <- df[hd_2931_row_filter, ]
715-
new_2931_row$parameter_data_id <- generate_new_id(df, "parameter_data_id", 10)
711+
hd_2931_incp_row_filter <- df$access_param_id=="138_016"
712+
713+
new_2931_incp_row <- df[hd_2931_incp_row_filter, ]
714+
new_2931_incp_row$parameter_data_id <- generate_new_id(
715+
df, "parameter_data_id", 10)
716+
# No corresponding redcap entry so make an ID
717+
new_2931_incp_row$access_param_id <- "138_3141"
718+
new_2931_incp_row$parameter_value <- 9.7
719+
new_2931_incp_row$parameter_value_type <- "Mean"
720+
721+
new_2931_incp_row$parameter_statistical_approach <- "Estimated model parameter"
722+
new_2931_incp_row$parameter_paired <- "No"
723+
new_2931_incp_row$parameter_2_unit <- NA
724+
new_2931_incp_row$method_2_from_supplement <- NA
725+
new_2931_incp_row$parameter_2_statistical_approach <- NA
726+
727+
new_2931_incp_row$parameter_2_value_type <- NA
728+
new_2931_incp_row$parameter_2_lower_bound <- NA
729+
new_2931_incp_row$parameter_2_upper_bound <- NA
730+
731+
# Remove dist param values
732+
df[hd_2931_incp_row_filter, "distribution_type"] <- NA
733+
df[hd_2931_incp_row_filter, "distribution_par1_type"] <- NA
734+
df[hd_2931_incp_row_filter, "distribution_par1_value"] <- NA
735+
df[hd_2931_incp_row_filter, "distribution_par1_uncertainty"] <- NA
736+
df[hd_2931_incp_row_filter, "distribution_par2_type"] <- NA
737+
df[hd_2931_incp_row_filter, "distribution_par2_value"] <- NA
738+
df[hd_2931_incp_row_filter, "distribution_par2_uncertainty"] <- NA
739+
740+
df <- rbind(df, new_2931_incp_row)
741+
742+
# Split 2931 serial interval row
743+
hd_2931_si_row_filter <- df$access_param_id=="138_015"
744+
745+
new_2931_si_row <- df[hd_2931_si_row_filter, ]
746+
new_2931_si_row$parameter_data_id <- generate_new_id(
747+
df, "parameter_data_id", 10)
748+
716749
# No corresponding redcap entry so make an ID
717-
new_2931_row$access_param_id <- "138_3141"
718-
new_2931_row$parameter_value <- 9.7
719-
new_2931_row$parameter_value_type <- "Mean"
720-
721-
new_2931_row$parameter_statistical_approach <- "Estimated model parameter"
722-
new_2931_row$parameter_paired <- "No"
723-
new_2931_row$parameter_2_unit <- NA
724-
new_2931_row$method_2_from_supplement <- NA
725-
new_2931_row$parameter_2_statistical_approach <- NA
726-
727-
new_2931_row$parameter_2_value_type <- NA
728-
new_2931_row$parameter_2_lower_bound <- NA
729-
new_2931_row$parameter_2_upper_bound <- NA
730-
df <- rbind(df, new_2931_row)
750+
new_2931_si_row$access_param_id <- "138_2718"
751+
new_2931_si_row$parameter_value <- 13
752+
new_2931_si_row$parameter_value_type <- "Median"
753+
754+
new_2931_si_row$parameter_statistical_approach <- "Estimated model parameter"
755+
new_2931_si_row$parameter_paired <- "No"
756+
new_2931_si_row$parameter_2_unit <- NA
757+
new_2931_si_row$method_2_from_supplement <- NA
758+
new_2931_si_row$parameter_2_statistical_approach <- NA
759+
760+
new_2931_si_row$parameter_2_value_type <- NA
761+
new_2931_si_row$parameter_2_lower_bound <- NA
762+
new_2931_si_row$parameter_2_upper_bound <- NA
763+
764+
# Remove dist param values
765+
df[hd_2931_si_row_filter, "distribution_type"] <- NA
766+
df[hd_2931_si_row_filter, "distribution_par1_type"] <- NA
767+
df[hd_2931_si_row_filter, "distribution_par1_value"] <- NA
768+
df[hd_2931_si_row_filter, "distribution_par1_uncertainty"] <- NA
769+
df[hd_2931_si_row_filter, "distribution_par2_type"] <- NA
770+
df[hd_2931_si_row_filter, "distribution_par2_value"] <- NA
771+
df[hd_2931_si_row_filter, "distribution_par2_uncertainty"] <- NA
772+
773+
df <- rbind(df, new_2931_si_row)
731774

732775
# Labels for IQR and Range are different for variability so copying from
733776
# uncertainty results in different labels

src/db_compilation/redcap_compilation.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ orderly_dependency(
200200

201201
# Manually fixed files and "cleaning" script - these need to be in the
202202
# src/db_compilation folder
203-
orderly_resource(fixing_files)
203+
orderly_resource(setNames(fixing_files, fixing_files))
204204

205205
## Outputs
206206
orderly_artefact(

0 commit comments

Comments
 (0)