@@ -446,6 +446,234 @@ for(i in 1:nrow(settings)) {
446446 # p_combined <- ggarrange(p_within, p_contextual, ncol = 1, nrow = 2, common.legend = TRUE, legend = "right")
447447}
448448
449+ # ## VERSION WITH LEGEND AT BOTTOM
450+
451+ runname <- " April18_fullsimulation_combined"
452+
453+ # create a matrix with the different combinations of predictor and outcome type
454+ settings <- expand.grid(
455+ predictor.type = c(" binary" , " continuous" ),
456+ outcome.type = c(" binary" , " continuous" ),
457+ stringsAsFactors = FALSE
458+ )
459+
460+ # loop to make all plots
461+ for (i in 1 : nrow(settings )) {
462+ set.predictor.type <- settings $ predictor.type [i ]
463+ set.outcome.type <- settings $ outcome.type [i ]
464+
465+ # create string for the file name
466+ type <- paste0(" pred_" , set.predictor.type , " _out_" , set.outcome.type , " _" )
467+
468+ # ### Plot: Grid of sd.u0 and T_total ----
469+
470+ # read in the final data frame
471+ final_df <- readRDS(paste0(" simulation_results_glmm/" , runname , " /plotting_bias_df.RDS" ))
472+
473+ # select relevant variables and cases (select and filter)
474+ plot_df <- final_df %> %
475+ # select T = 20, N = 200, sd.u0 = 1, predictor.type = "binary" and outcome.type = "continuous"
476+ filter(T_total %in% c(5 , 20 ), N_total == 200 , sd.u0 %in% c(1 , 3 ), predictor.type == set.predictor.type , outcome.type == set.outcome.type ,
477+ sdX.between == 3 , g.01 == 3 ) %> %
478+ select(- c(ends_with(" _success" ), ends_with(" _X" ), ends_with(" _X.cent" ), ends_with(" _X.cluster.means" )))
479+
480+ plot_df_beta1 <- plot_df %> %
481+ select(- ends_with(" _g.01_bias" )) %> %
482+ # turn the bias variables into long format, with a new column indicating the model name
483+ pivot_longer(cols = ends_with(" _bias" ), names_to = " model" , values_to = " beta1_bias" ) %> %
484+ # remove the "_g.10_bias" suffix from the model names
485+ mutate(model = str_remove(model , " _g.10_bias" )) %> %
486+ # remove models with a 3 in the name
487+ filter(! str_detect(model , " 3" )) %> %
488+ # change model names
489+ mutate(model = recode(model ,
490+ " l1" = " M1" ,
491+ " l2" = " M2" ,
492+ " l4" = " M3" ,
493+ " g.independence1" = " G1.independence" ,
494+ " g.exchangeable1" = " G1.exchangeable" ,
495+ " g.ar11" = " G1.AR1" ,
496+ " g.independence2" = " G2.independence" ,
497+ " g.exchangeable2" = " G2.exchangeable" ,
498+ " g.ar12" = " G2.AR1" ,
499+ " g.independence4" = " G3.independence" ,
500+ " g.exchangeable4" = " G3.exchangeable" ,
501+ " g.ar14" = " G3.AR1" )) %> %
502+ # set factor levels of model to ensure correct order in the plot
503+ mutate(model = factor (model , levels = c(" M1" , " G1.independence" , " G1.exchangeable" , " G1.AR1" ,
504+ " M2" , " G2.independence" , " G2.exchangeable" , " G2.AR1" ,
505+ " M3" , " G3.independence" , " G3.exchangeable" , " G3.AR1"
506+ ))) %> %
507+ # Turn variables into labels
508+ mutate(sd.u0_label = factor (sd.u0 ,
509+ levels = c(1 , 3 ),
510+ labels = c(expression(sigma [u ] == 1 ), expression(sigma [u ] == 3 )))) %> %
511+ mutate(T_total_label = factor (T_total ,
512+ levels = c(5 , 20 ),
513+ labels = c(" T == 5" , " T == 20" ))) %> %
514+ # create new variable indicating method type (so M1 and G1 are "Method 1")
515+ mutate(method_type = case_when(
516+ str_detect(model , " M1" ) ~ " UC" ,
517+ str_detect(model , " M2" ) ~ " CWC" ,
518+ str_detect(model , " M3" ) ~ " MuCo" ,
519+ str_detect(model , " G1" ) ~ " UC" ,
520+ str_detect(model , " G2" ) ~ " CWC" ,
521+ str_detect(model , " G3" ) ~ " MuCo"
522+ )) %> %
523+ mutate(estimation_type = case_when(
524+ str_detect(model , " M1" ) ~ " GLMM" ,
525+ str_detect(model , " M2" ) ~ " GLMM" ,
526+ str_detect(model , " M3" ) ~ " GLMM" ,
527+ str_detect(model , " independence" ) ~ " GEE-indep" ,
528+ str_detect(model , " exchangeable" ) ~ " GEE-exch" ,
529+ str_detect(model , " AR1" ) ~ " GEE-AR1"
530+ )) %> %
531+ # set factor levels of method_type to ensure correct order in the plot
532+ mutate(method_type = factor (method_type , levels = c(" UC" , " CWC" , " MuCo" )),
533+ estimation_type = factor (estimation_type , levels = c(" GLMM" , " GEE-indep" , " GEE-exch" , " GEE-AR1" ))) %> %
534+ # remove all bias values exceeding 100
535+ mutate(beta1_bias = ifelse(abs(beta1_bias ) > 100 , NA , beta1_bias ))
536+
537+
538+ plot_df_g01 <- plot_df %> %
539+ select(- ends_with(" _g.10_bias" )) %> %
540+ # turn the bias variables into long format, with a new column indicating the model name
541+ pivot_longer(cols = ends_with(" _bias" ), names_to = " model" , values_to = " g01_bias" ) %> %
542+ # remove the "_g.01_bias" suffix from the model names
543+ mutate(model = str_remove(model , " _g.01_bias" )) %> %
544+ # remove models with a 3 in the name
545+ filter(model == " l4" | model == " g.independence4" | model == " g.exchangeable4" | model == " g.ar14" ) %> %
546+ # change model names
547+ mutate(model = recode(model ,
548+ " l4" = " M3" ,
549+ " g.independence4" = " G3.independence" ,
550+ " g.exchangeable4" = " G3.exchangeable" ,
551+ " g.ar14" = " G3.AR1" )) %> %
552+ # set factor levels of model to ensure correct order in the plot
553+ mutate(model = factor (model , levels = c(" M3" , " G3.independence" , " G3.exchangeable" , " G3.AR1"
554+ ))) %> %
555+ # # create new variable indicating method type (so M1 and G1 are "Method 1")
556+ # mutate(method_type = case_when(
557+ # str_detect(model, "M3") ~ "MuCo",
558+ # str_detect(model, "G3") ~ "MuCo"
559+ # )) %>%
560+ mutate(estimation_type = case_when(
561+ str_detect(model , " M1" ) ~ " GLMM" ,
562+ str_detect(model , " M2" ) ~ " GLMM" ,
563+ str_detect(model , " M3" ) ~ " GLMM" ,
564+ str_detect(model , " independence" ) ~ " GEE-indep" ,
565+ str_detect(model , " exchangeable" ) ~ " GEE-exch" ,
566+ str_detect(model , " AR1" ) ~ " GEE-AR1"
567+ )) %> %
568+ # set factor levels of method_type to ensure correct order in the plot
569+ mutate(estimation_type = factor (estimation_type , levels = c(" GLMM" , " GEE-indep" , " GEE-exch" , " GEE-AR1" ))) %> %
570+ # Turn label variables (sdX.between, g.01 and sd.u0) into strings with an underscore
571+ mutate(sd.u0_label = factor (sd.u0 ,
572+ levels = c(1 , 3 ),
573+ labels = c(expression(sigma [u ] == 1 ), expression(sigma [u ] == 3 )))) %> %
574+ mutate(T_total_label = factor (T_total ,
575+ levels = c(5 , 20 ),
576+ labels = c(" T == 5" , " T == 20" ))) %> %
577+ # remove all bias values exceeding 100
578+ mutate(g01_bias = ifelse(abs(g01_bias ) > 100 , NA , g01_bias ))
579+
580+ # The palette with grey:
581+ # cbPalette <- c("#E69F00", "#56B4E9", "#009E73", "#CC79A7")
582+
583+ # For the within-person effect
584+ ggplot(plot_df_beta1 , aes(x = method_type , y = beta1_bias , col = estimation_type )) +
585+ geom_boxplot() + # Suppress default outlier points
586+ # geom_boxplot(position = position_dodge(width = 0.75)) + # align boxplots
587+ # geom_point(position = position_jitterdodge(jitter.width = 0.15, dodge.width = 0.75),
588+ # alpha = 0.01, size = 0.8) +
589+ geom_hline(yintercept = 0 , linetype = " dashed" ) + # Dashed horizontal line at 0 +
590+ coord_cartesian(ylim = c(- 1.5 , 1.5 )) +
591+ scale_y_continuous(breaks = seq(- 1.5 , 1.5 , by = 0.5 )) +
592+ # ylim(-3, 3) + # Set y-axis limits
593+ labs(x = " Method" , y = " Bias" ) +
594+ facet_grid(sd.u0_label ~ T_total_label , labeller = label_parsed ) + # Show T and N values in labels
595+ theme_bw() +
596+ # scale_x_discrete(breaks = waiver(), labels = new_labels) + # <<-- overwrite x-axis labels
597+ # theme(axis.text.x = element_text(angle = 45, hjust = 1)) + # optionally rotate
598+ # add 2 vertical lines dividing the methods
599+ # geom_vline(xintercept = c(4.5, 8.5), linetype = "solid", color = "grey") +
600+ # remove X axis labels
601+ theme(# remove vertical grid lines
602+ panel.grid.major.x = element_blank(),
603+ # increase font size for grid titles
604+ strip.text.x = element_text(size = 12 ),
605+ strip.text.y = element_text(size = 12 ),
606+ # increase font size for X entries
607+ axis.text.x = element_text(size = 12 ),
608+ axis.text.y = element_text(size = 12 ),
609+ axis.title.y = element_text(size = 13 ),
610+ axis.title.x = element_text(size = 13 ),
611+ # increase legend font size
612+ legend.text = element_text(size = 11 ),
613+ legend.title = element_text(size = 13 ),
614+ legend.position = " bottom"
615+ ) +
616+ # change legend title to "Estimation"
617+ scale_color_brewer(name = " Estimation" , palette = " Spectral" )
618+
619+ # compute mean of GEE independence with method type UC
620+ # mean_beta1_bias <- plot_df_beta1 %>%
621+ # filter(estimation_type == "GEE-indep", method_type == "UC") %>%
622+ # group_by(sd.u0, T_total) %>%
623+ # summarise(mean_beta1_bias = mean(beta1_bias, na.rm = TRUE)) %>%
624+ # ungroup()
625+
626+ # save for test for main direct
627+ # ggsave("bias_plot_T_total-vs-sd.u0_within.pdf", width = 14, height = 8)
628+
629+ # save
630+ ggsave(paste0(" simulation_results_glmm/" , runname , " /figures/" , type , " bias_plot_T_total-vs-sd.u0_within.pdf" ), width = 9 , height = 7 )
631+
632+ # For the contextual effect
633+ ggplot(plot_df_g01 , aes(x = estimation_type , y = g01_bias , col = estimation_type )) +
634+ geom_boxplot() +
635+ geom_hline(yintercept = 0 , linetype = " dashed" ) + # Dashed horizontal line at 0
636+ coord_cartesian(ylim = c(- 1.5 , 1.5 )) + # Set y-axis limits
637+ # add tick mark at Y for every 0.5
638+ scale_y_continuous(breaks = seq(- 1.5 , 1.5 , by = 0.5 )) +
639+ labs(x = " Method" , y = " Bias" ) +
640+ facet_grid(sd.u0_label ~ T_total_label , labeller = label_parsed ) + # Show T and N values in labels
641+ theme_bw() +
642+ # remove X axis labels
643+ theme(# remove vertical grid lines
644+ panel.grid.major.x = element_blank(),
645+ # remove X tick marks
646+ axis.ticks.x = element_blank(),
647+ # increase font size for grid titles
648+ strip.text.x = element_text(size = 12 ),
649+ strip.text.y = element_text(size = 12 ),
650+ # increase font size for X entries
651+ axis.text.x = element_text(size = 12 , colour = NA ),
652+ axis.text.y = element_text(size = 12 ),
653+ axis.title.y = element_text(size = 13 ),
654+ axis.title.x = element_text(size = 13 , colour = NA ),
655+ # remove legend
656+ # increase legend font size
657+ legend.text = element_text(size = 11 , colour = NA ),
658+ legend.title = element_text(size = 13 , colour = NA ),
659+ legend.position = " bottom"
660+ ) +
661+ # change legend title to "Estimation"
662+ scale_color_brewer(name = " Estimation" , palette = " Spectral" ) +
663+ guides(color = guide_legend(override.aes = list (color = NA )))
664+ # scale_color_manual(name = "Estimation", values = cbPalette)
665+
666+ # # save for test for main direct
667+ # ggsave("bias_plot_T_total-vs-sd.u0_contextual.pdf", width = 5, height = 7)
668+
669+ # save
670+ ggsave(paste0(" simulation_results_glmm/" , runname , " /figures/" , type , " bias_plot_T_total-vs-sd.u0_contextual.pdf" ), width = 3 , height = 7 )
671+
672+ # combine plots native with gridextra
673+ # p_combined <- ggarrange(p_within, p_contextual, ncol = 1, nrow = 2, common.legend = TRUE, legend = "right")
674+ }
675+
676+
449677 # ### PLOT 1: Grid of sdX.between and g.01 ----
450678 #
451679 # # read in the final data frame
@@ -1026,16 +1254,21 @@ GEE_check_df <- final_df %>%
10261254
10271255
10281256# define a threshold for "extreme" bias
1029- threshold <- 1E+11 # can also be set to 5, doesn't affect the number of extreme values
1257+ threshold1 <- 1E+11 # can also be set to 5, doesn't affect the number of extreme values
1258+ threshold2 <- 20 # can also be set to 5, doesn't affect the number of extreme values
10301259
10311260extreme_prop_df <- GEE_check_df %> %
10321261 group_by(model , design_id ) %> %
10331262 summarize(
1034- proportion_extreme = mean(abs(beta1_bias ) > threshold ),
1263+ proportion_extreme1 = mean(abs(beta1_bias ) > threshold1 ),
1264+ proportion_extreme2 = mean(abs(beta1_bias ) > threshold2 ),
10351265 .groups = " drop"
1036- ) %> %
1037- # remove rows with 0 proportion extreme
1038- filter(proportion_extreme > 0 )
1266+ )
1267+
1268+ # check whether the proportions are the same
1269+ extreme_prop_df %> %
1270+ filter(proportion_extreme1 != proportion_extreme2 ) %> %
1271+ select(model , design_id , proportion_extreme1 , proportion_extreme2 )
10391272
10401273# count number of unique design_ids
10411274length(unique(extreme_prop_df $ design_id ))
@@ -1054,12 +1287,18 @@ ggplot(extreme_prop_df, aes(x = proportion_extreme, fill = model)) +
10541287
10551288# create a table with the number of extreme scenarios per model
10561289extreme_count_df <- GEE_check_df %> %
1057- group_by(model ) %> %
1290+ group_by(model , predictor.type , outcome.type ) %> %
10581291 summarize(
1059- num_extreme = sum(abs(beta1_bias ) > threshold ),
1292+ num_extreme1 = sum(abs(beta1_bias ) > threshold1 ),
1293+ num_extreme2 = sum(abs(beta1_bias ) > threshold2 ),
10601294 .groups = " drop"
10611295 ) %> %
1062- arrange(desc(num_extreme ))
1296+ arrange(desc(num_extreme1 ))
1297+
1298+ # check whether the counts are the same
1299+ extreme_count_df %> %
1300+ filter(num_extreme1 != num_extreme2 ) %> %
1301+ select(model , predictor.type , outcome.type , num_extreme1 , num_extreme2 )
10631302
10641303# take the setting with most frequent extreme values (design_id = 299 and g.ar12) and plot the density of bias
10651304plot1 <- GEE_check_df %> %
0 commit comments