Skip to content

Commit 40409e4

Browse files
committed
make new figures and improve script
1 parent 8afea7d commit 40409e4

18 files changed

Lines changed: 247 additions & 8 deletions

scripts/hamaker2020_extended/results-plotting.R

Lines changed: 247 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,234 @@ for(i in 1:nrow(settings)) {
446446
# p_combined <- ggarrange(p_within, p_contextual, ncol = 1, nrow = 2, common.legend = TRUE, legend = "right")
447447
}
448448

449+
### VERSION WITH LEGEND AT BOTTOM
450+
451+
runname <- "April18_fullsimulation_combined"
452+
453+
# create a matrix with the different combinations of predictor and outcome type
454+
settings <- expand.grid(
455+
predictor.type = c("binary", "continuous"),
456+
outcome.type = c("binary", "continuous"),
457+
stringsAsFactors = FALSE
458+
)
459+
460+
# loop to make all plots
461+
for(i in 1:nrow(settings)) {
462+
set.predictor.type <- settings$predictor.type[i]
463+
set.outcome.type <- settings$outcome.type[i]
464+
465+
# create string for the file name
466+
type <- paste0("pred_", set.predictor.type, "_out_", set.outcome.type, "_")
467+
468+
#### Plot: Grid of sd.u0 and T_total ----
469+
470+
# read in the final data frame
471+
final_df <- readRDS(paste0("simulation_results_glmm/", runname, "/plotting_bias_df.RDS"))
472+
473+
# select relevant variables and cases (select and filter)
474+
plot_df <- final_df %>%
475+
# select T = 20, N = 200, sd.u0 = 1, predictor.type = "binary" and outcome.type = "continuous"
476+
filter(T_total %in% c(5, 20), N_total == 200, sd.u0 %in% c(1, 3), predictor.type == set.predictor.type, outcome.type == set.outcome.type,
477+
sdX.between == 3, g.01 == 3) %>%
478+
select(-c(ends_with("_success"), ends_with("_X"), ends_with("_X.cent"), ends_with("_X.cluster.means")))
479+
480+
plot_df_beta1 <- plot_df %>%
481+
select(-ends_with("_g.01_bias")) %>%
482+
# turn the bias variables into long format, with a new column indicating the model name
483+
pivot_longer(cols = ends_with("_bias"), names_to = "model", values_to = "beta1_bias") %>%
484+
# remove the "_g.10_bias" suffix from the model names
485+
mutate(model = str_remove(model, "_g.10_bias")) %>%
486+
# remove models with a 3 in the name
487+
filter(!str_detect(model, "3")) %>%
488+
# change model names
489+
mutate(model = recode(model,
490+
"l1" = "M1",
491+
"l2" = "M2",
492+
"l4" = "M3",
493+
"g.independence1" = "G1.independence",
494+
"g.exchangeable1" = "G1.exchangeable",
495+
"g.ar11" = "G1.AR1",
496+
"g.independence2" = "G2.independence",
497+
"g.exchangeable2" = "G2.exchangeable",
498+
"g.ar12" = "G2.AR1",
499+
"g.independence4" = "G3.independence",
500+
"g.exchangeable4" = "G3.exchangeable",
501+
"g.ar14" = "G3.AR1")) %>%
502+
# set factor levels of model to ensure correct order in the plot
503+
mutate(model = factor(model, levels = c("M1", "G1.independence", "G1.exchangeable", "G1.AR1",
504+
"M2", "G2.independence", "G2.exchangeable", "G2.AR1",
505+
"M3", "G3.independence", "G3.exchangeable", "G3.AR1"
506+
))) %>%
507+
# Turn variables into labels
508+
mutate(sd.u0_label = factor(sd.u0,
509+
levels = c(1, 3),
510+
labels = c(expression(sigma[u] == 1), expression(sigma[u] == 3)))) %>%
511+
mutate(T_total_label = factor(T_total,
512+
levels = c(5, 20),
513+
labels = c("T == 5", "T == 20"))) %>%
514+
# create new variable indicating method type (so M1 and G1 are "Method 1")
515+
mutate(method_type = case_when(
516+
str_detect(model, "M1") ~ "UC",
517+
str_detect(model, "M2") ~ "CWC",
518+
str_detect(model, "M3") ~ "MuCo",
519+
str_detect(model, "G1") ~ "UC",
520+
str_detect(model, "G2") ~ "CWC",
521+
str_detect(model, "G3") ~ "MuCo"
522+
)) %>%
523+
mutate(estimation_type = case_when(
524+
str_detect(model, "M1") ~ "GLMM",
525+
str_detect(model, "M2") ~ "GLMM",
526+
str_detect(model, "M3") ~ "GLMM",
527+
str_detect(model, "independence") ~ "GEE-indep",
528+
str_detect(model, "exchangeable") ~ "GEE-exch",
529+
str_detect(model, "AR1") ~ "GEE-AR1"
530+
)) %>%
531+
# set factor levels of method_type to ensure correct order in the plot
532+
mutate(method_type = factor(method_type, levels = c("UC", "CWC", "MuCo")),
533+
estimation_type = factor(estimation_type, levels = c("GLMM", "GEE-indep", "GEE-exch", "GEE-AR1"))) %>%
534+
# remove all bias values exceeding 100
535+
mutate(beta1_bias = ifelse(abs(beta1_bias) > 100, NA, beta1_bias))
536+
537+
538+
plot_df_g01 <- plot_df %>%
539+
select(-ends_with("_g.10_bias")) %>%
540+
# turn the bias variables into long format, with a new column indicating the model name
541+
pivot_longer(cols = ends_with("_bias"), names_to = "model", values_to = "g01_bias") %>%
542+
# remove the "_g.01_bias" suffix from the model names
543+
mutate(model = str_remove(model, "_g.01_bias")) %>%
544+
# remove models with a 3 in the name
545+
filter(model == "l4" | model == "g.independence4" | model == "g.exchangeable4" | model == "g.ar14") %>%
546+
# change model names
547+
mutate(model = recode(model,
548+
"l4" = "M3",
549+
"g.independence4" = "G3.independence",
550+
"g.exchangeable4" = "G3.exchangeable",
551+
"g.ar14" = "G3.AR1")) %>%
552+
# set factor levels of model to ensure correct order in the plot
553+
mutate(model = factor(model, levels = c("M3", "G3.independence", "G3.exchangeable", "G3.AR1"
554+
))) %>%
555+
# # create new variable indicating method type (so M1 and G1 are "Method 1")
556+
# mutate(method_type = case_when(
557+
# str_detect(model, "M3") ~ "MuCo",
558+
# str_detect(model, "G3") ~ "MuCo"
559+
# )) %>%
560+
mutate(estimation_type = case_when(
561+
str_detect(model, "M1") ~ "GLMM",
562+
str_detect(model, "M2") ~ "GLMM",
563+
str_detect(model, "M3") ~ "GLMM",
564+
str_detect(model, "independence") ~ "GEE-indep",
565+
str_detect(model, "exchangeable") ~ "GEE-exch",
566+
str_detect(model, "AR1") ~ "GEE-AR1"
567+
)) %>%
568+
# set factor levels of method_type to ensure correct order in the plot
569+
mutate(estimation_type = factor(estimation_type, levels = c("GLMM", "GEE-indep", "GEE-exch", "GEE-AR1"))) %>%
570+
# Turn label variables (sdX.between, g.01 and sd.u0) into strings with an underscore
571+
mutate(sd.u0_label = factor(sd.u0,
572+
levels = c(1, 3),
573+
labels = c(expression(sigma[u] == 1), expression(sigma[u] == 3)))) %>%
574+
mutate(T_total_label = factor(T_total,
575+
levels = c(5, 20),
576+
labels = c("T == 5", "T == 20"))) %>%
577+
# remove all bias values exceeding 100
578+
mutate(g01_bias = ifelse(abs(g01_bias) > 100, NA, g01_bias))
579+
580+
# The palette with grey:
581+
# cbPalette <- c("#E69F00", "#56B4E9", "#009E73", "#CC79A7")
582+
583+
# For the within-person effect
584+
ggplot(plot_df_beta1, aes(x = method_type, y = beta1_bias, col = estimation_type)) +
585+
geom_boxplot() + # Suppress default outlier points
586+
# geom_boxplot(position = position_dodge(width = 0.75)) + # align boxplots
587+
# geom_point(position = position_jitterdodge(jitter.width = 0.15, dodge.width = 0.75),
588+
# alpha = 0.01, size = 0.8) +
589+
geom_hline(yintercept = 0, linetype = "dashed") + # Dashed horizontal line at 0 +
590+
coord_cartesian(ylim = c(-1.5, 1.5)) +
591+
scale_y_continuous(breaks = seq(-1.5, 1.5, by = 0.5)) +
592+
# ylim(-3, 3) + # Set y-axis limits
593+
labs(x = "Method", y = "Bias") +
594+
facet_grid(sd.u0_label ~ T_total_label, labeller = label_parsed) + # Show T and N values in labels
595+
theme_bw() +
596+
# scale_x_discrete(breaks = waiver(), labels = new_labels) + # <<-- overwrite x-axis labels
597+
# theme(axis.text.x = element_text(angle = 45, hjust = 1)) + # optionally rotate
598+
# add 2 vertical lines dividing the methods
599+
# geom_vline(xintercept = c(4.5, 8.5), linetype = "solid", color = "grey") +
600+
# remove X axis labels
601+
theme(# remove vertical grid lines
602+
panel.grid.major.x = element_blank(),
603+
# increase font size for grid titles
604+
strip.text.x = element_text(size = 12),
605+
strip.text.y = element_text(size = 12),
606+
# increase font size for X entries
607+
axis.text.x = element_text(size = 12),
608+
axis.text.y = element_text(size = 12),
609+
axis.title.y = element_text(size = 13),
610+
axis.title.x = element_text(size = 13),
611+
# increase legend font size
612+
legend.text = element_text(size = 11),
613+
legend.title = element_text(size = 13),
614+
legend.position = "bottom"
615+
) +
616+
# change legend title to "Estimation"
617+
scale_color_brewer(name = "Estimation", palette = "Spectral")
618+
619+
# compute mean of GEE independence with method type UC
620+
# mean_beta1_bias <- plot_df_beta1 %>%
621+
# filter(estimation_type == "GEE-indep", method_type == "UC") %>%
622+
# group_by(sd.u0, T_total) %>%
623+
# summarise(mean_beta1_bias = mean(beta1_bias, na.rm = TRUE)) %>%
624+
# ungroup()
625+
626+
# save for test for main direct
627+
# ggsave("bias_plot_T_total-vs-sd.u0_within.pdf", width = 14, height = 8)
628+
629+
# save
630+
ggsave(paste0("simulation_results_glmm/", runname, "/figures/", type, "bias_plot_T_total-vs-sd.u0_within.pdf"), width = 9, height = 7)
631+
632+
# For the contextual effect
633+
ggplot(plot_df_g01, aes(x = estimation_type, y = g01_bias, col = estimation_type)) +
634+
geom_boxplot() +
635+
geom_hline(yintercept = 0, linetype = "dashed") + # Dashed horizontal line at 0
636+
coord_cartesian(ylim = c(-1.5, 1.5)) + # Set y-axis limits
637+
# add tick mark at Y for every 0.5
638+
scale_y_continuous(breaks = seq(-1.5, 1.5, by = 0.5)) +
639+
labs(x = "Method", y = "Bias") +
640+
facet_grid(sd.u0_label ~ T_total_label, labeller = label_parsed) + # Show T and N values in labels
641+
theme_bw() +
642+
# remove X axis labels
643+
theme(# remove vertical grid lines
644+
panel.grid.major.x = element_blank(),
645+
# remove X tick marks
646+
axis.ticks.x = element_blank(),
647+
# increase font size for grid titles
648+
strip.text.x = element_text(size = 12),
649+
strip.text.y = element_text(size = 12),
650+
# increase font size for X entries
651+
axis.text.x = element_text(size = 12, colour = NA),
652+
axis.text.y = element_text(size = 12),
653+
axis.title.y = element_text(size = 13),
654+
axis.title.x = element_text(size = 13, colour = NA),
655+
# remove legend
656+
# increase legend font size
657+
legend.text = element_text(size = 11, colour = NA),
658+
legend.title = element_text(size = 13, colour = NA),
659+
legend.position = "bottom"
660+
) +
661+
# change legend title to "Estimation"
662+
scale_color_brewer(name = "Estimation", palette = "Spectral") +
663+
guides(color = guide_legend(override.aes = list(color = NA)))
664+
# scale_color_manual(name = "Estimation", values = cbPalette)
665+
666+
# # save for test for main direct
667+
# ggsave("bias_plot_T_total-vs-sd.u0_contextual.pdf", width = 5, height = 7)
668+
669+
# save
670+
ggsave(paste0("simulation_results_glmm/", runname, "/figures/", type, "bias_plot_T_total-vs-sd.u0_contextual.pdf"), width = 3, height = 7)
671+
672+
# combine plots native with gridextra
673+
# p_combined <- ggarrange(p_within, p_contextual, ncol = 1, nrow = 2, common.legend = TRUE, legend = "right")
674+
}
675+
676+
449677
# ### PLOT 1: Grid of sdX.between and g.01 ----
450678
#
451679
# # read in the final data frame
@@ -1026,16 +1254,21 @@ GEE_check_df <- final_df %>%
10261254

10271255

10281256
# define a threshold for "extreme" bias
1029-
threshold <- 1E+11 # can also be set to 5, doesn't affect the number of extreme values
1257+
threshold1 <- 1E+11 # can also be set to 5, doesn't affect the number of extreme values
1258+
threshold2 <- 20 # can also be set to 5, doesn't affect the number of extreme values
10301259

10311260
extreme_prop_df <- GEE_check_df %>%
10321261
group_by(model, design_id) %>%
10331262
summarize(
1034-
proportion_extreme = mean(abs(beta1_bias) > threshold),
1263+
proportion_extreme1 = mean(abs(beta1_bias) > threshold1),
1264+
proportion_extreme2 = mean(abs(beta1_bias) > threshold2),
10351265
.groups = "drop"
1036-
) %>%
1037-
# remove rows with 0 proportion extreme
1038-
filter(proportion_extreme > 0)
1266+
)
1267+
1268+
# check whether the proportions are the same
1269+
extreme_prop_df %>%
1270+
filter(proportion_extreme1 != proportion_extreme2) %>%
1271+
select(model, design_id, proportion_extreme1, proportion_extreme2)
10391272

10401273
# count number of unique design_ids
10411274
length(unique(extreme_prop_df$design_id))
@@ -1054,12 +1287,18 @@ ggplot(extreme_prop_df, aes(x = proportion_extreme, fill = model)) +
10541287

10551288
# create a table with the number of extreme scenarios per model
10561289
extreme_count_df <- GEE_check_df %>%
1057-
group_by(model) %>%
1290+
group_by(model, predictor.type, outcome.type) %>%
10581291
summarize(
1059-
num_extreme = sum(abs(beta1_bias) > threshold),
1292+
num_extreme1 = sum(abs(beta1_bias) > threshold1),
1293+
num_extreme2 = sum(abs(beta1_bias) > threshold2),
10601294
.groups = "drop"
10611295
) %>%
1062-
arrange(desc(num_extreme))
1296+
arrange(desc(num_extreme1))
1297+
1298+
# check whether the counts are the same
1299+
extreme_count_df %>%
1300+
filter(num_extreme1 != num_extreme2) %>%
1301+
select(model, predictor.type, outcome.type, num_extreme1, num_extreme2)
10631302

10641303
# take the setting with most frequent extreme values (design_id = 299 and g.ar12) and plot the density of bias
10651304
plot1 <- GEE_check_df %>%
Binary file not shown.

0 commit comments

Comments
 (0)