diff --git a/AssumptionPlotter/AssumptionPlotter.Rproj b/AssumptionPlotter/AssumptionPlotter.Rproj index 270314b..f0d6187 100644 --- a/AssumptionPlotter/AssumptionPlotter.Rproj +++ b/AssumptionPlotter/AssumptionPlotter.Rproj @@ -18,4 +18,4 @@ StripTrailingWhitespace: Yes BuildType: Package PackageUseDevtools: Yes PackageInstallArgs: --no-multiarch --with-keep.source -PackageRoxygenize: rd,collate,namespace +PackageRoxygenize: rd,collate,namespace,vignette diff --git a/AssumptionPlotter/DESCRIPTION b/AssumptionPlotter/DESCRIPTION index 142095b..d8b4e1f 100644 --- a/AssumptionPlotter/DESCRIPTION +++ b/AssumptionPlotter/DESCRIPTION @@ -1,28 +1,39 @@ Package: AssumptionPlotter Type: Package -Title: Assumption Plotter +Title: AssumptionPlotter Version: 0.1.0 Authors@R: c( person( "Emma", "Akrong", - email = "emma.akrong@student.uva.nl", + email = "emma.akrong@gmail.com", role = c("aut", "cre") ) ) -Description: This package attempt to provide an intuitive visualization of whether - the user's dataset meets the assumptions of statistical models, as well as - whether a statistical model is expected to capture what the user wants - the model to capture. +Description: This package creates an app that allows for visual inspection of + EMA/ESM data. The purpose is to give EMA/ESM researchers a nice overview of + what their data looks like, and possibly guide decisions on what statistical + model to use. It was developed with the datasets from openESM in mind, and + can currently plot built-in datasets, the users own data, and data from + openESM accessed through the link to download the tsv file in its dedicated + Zenodo page. License: Encoding: UTF-8 LazyData: true RoxygenNote: 7.3.2 Config/testthat/edition: 3 Imports: + bslib, + dplyr, + DT, ggplot2, + grDevices, knitr, quarto, + readr, + sass, + shiny, testthat, + tidyr, usethis VignetteBuilder: quarto Depends: diff --git a/AssumptionPlotter/NAMESPACE b/AssumptionPlotter/NAMESPACE index 97a152f..e0be47d 100644 --- a/AssumptionPlotter/NAMESPACE +++ b/AssumptionPlotter/NAMESPACE @@ -1,9 +1,22 @@ # Generated by roxygen2: do not edit by hand -export(data_log_plot) +export(assumption_plot) +export(clean_df) +export(pie_bar_chart) +export(rank_participants) export(run_plotter) -export(simulate_logistic) +import(bslib) +import(dplyr) import(ggplot2) +import(grDevices) +import(knitr) +import(quarto) +import(sass) import(shiny) import(testthat) +import(tidyr) import(usethis) +importFrom(DT,DTOutput) +importFrom(DT,renderDT) +importFrom(readr,read_csv) +importFrom(readr,read_tsv) diff --git a/AssumptionPlotter/R/assumption_plot.R b/AssumptionPlotter/R/assumption_plot.R new file mode 100644 index 0000000..146baa3 --- /dev/null +++ b/AssumptionPlotter/R/assumption_plot.R @@ -0,0 +1,256 @@ +#' AssumptionPlotter Plot +#' +#' @details +#' This function creates the plots in the Plots tab of AssumptionPlotter +#' @param df Selected dataset. +#' @param participant Participant to be plotted. +#' @param variables All variables to be plotted. +#' @param expected_days Expected days of the study +#' @param beeps_per_day Expected beeps per day +#' @param include_day Option to include day labels. +#' @param include_day_line Option to include day lines. +#' @param impute Method for imputing NA's. +#' @param add_trend Include a regression line showing the trend in the data. +#' @param trend_type Type of trend line to be passed to geom_smooth(). +#' @param theme_choice Decide what theme the plot should have. +#' @param palette Chooses either no palette or custom. +#' @param palette_options Palette options. Any of grDevices::hcl.pals(). +#' @param text_font What text font the plot should have. +#' @param axis_text_size Adjust the text size of the plot's axes. +#' @param legend_text_size Adjust the text size of the legend. +#' @return Interactive plot in AssumptionPlotter. +#' @import ggplot2 +#' @import dplyr +#' @import tidyr +#' @import grDevices +#' @export +#' @examples +#' \dontrun{ +#' assumption_plot( +#' df, +#' participant, +#' variables, +#' expected_days, +#' beeps_per_day, +#' include_day = TRUE, +#' include_day_line = TRUE, +#' impute = c("none", "mean", "mode"), +#' add_trend = FALSE, +#' trend_type = c("lm", "loess"), +#' theme_choice = c("classic", "minimal", "bw", "dark"), +#' palette = c("none", "custom"), +#' palette_option = "Reds", +#' text_font = c("sans", "serif", "mono"), +#' axis_text_size = 12, +#' legend_text_size = 12 +#' ) +#' } +#' + +assumption_plot <- function( + df, + participant, + variables, + expected_days, + beeps_per_day, + include_day = TRUE, + include_day_line = TRUE, + impute = "none", + add_trend = FALSE, + trend_type = "lm", + theme_choice = "classic", + palette = "none", + palette_option = "Reds", + text_font = "sans", + axis_text_size = 12, + legend_text_size = 12){ + + # Create error message for when all variables are deselected + + if(length(variables)==0){ + stop("The plot cannot render when no variables are selected") + } + + # Subset data + ## In the future it would be cool to implement the option to plot multiple + ## participants at once (so you can compare them). + df <- df %>% + filter(id == participant) + + + # Keep relevant variables + keep_cols <- c( + "id", + "day", + "beep", + "missing", + variables + ) + + df <- df[,keep_cols] + + + # Specify imputation method (per variable) + if(impute!="none"){ + + for(v in variables){ + + if(impute=="mean"){ + + df[[v]][is.na(df[[v]])] <-mean(df[[v]],na.rm=TRUE) + } + + if(impute == "mode"){ + + mode_val <- names(sort(table(df[[v]]), decreasing = TRUE))[1] + + if(is.numeric(df[[v]])) { + mode_val <- as.numeric(mode_val) + } + + df[[v]][is.na(df[[v]])] <- mode_val + } + + } + + } + + + # Ensure that all variables are numeric + df[variables] <- lapply(df[variables], as.numeric) + + + # Create plotting index + df <- df %>% + mutate( + plot_x = (day - 1) * beeps_per_day + beep + ) %>% + arrange(plot_x) + + + # Make df long format (required for plotting with ggplot) + long_df <- df %>% + pivot_longer( + cols = -c(id, day, beep, missing, plot_x), + names_to = "Variables", # Risky name since name is same as argument + values_to = "value" + ) + + + # Create location of vertical day lines on the x-axis + day_lines <- seq(beeps_per_day+.5, # Make line appear between last and first day + max(long_df$plot_x), + by = beeps_per_day) + + # Create location of days + ## Note that this is not ideal currently. Might make sense to define max days + ## and beeps by participant. Whether to plot predetermined expected days and + ## beeps for all participants or a specific one could be plot argument. + + day_labels <- data.frame( + day = 1:expected_days, + x = (0:(expected_days - 1)) * beeps_per_day + + (beeps_per_day + 1) / 2, + label = paste("Day", 1:expected_days) + ) + + + # Initialize plot + p <- ggplot2::ggplot(long_df, aes(x = plot_x, y = value, color = Variables))+ + geom_line()+ + geom_point()+ + ylab("Value")+ + xlab("Time") + + + # Option to include day labels + if(include_day){ + p <- p + + geom_label( + data = day_labels, + aes(x = x, y = Inf, label = label), + inherit.aes = FALSE, + vjust = 1.5, + size = 4, + alpha = .8 + ) + } + + + # Option to include lines separating days + if(include_day_line){ + p <- p + + geom_vline(xintercept = day_lines, alpha = .15, linetype = "dashed") + } + + # Option to add a trend line + if(add_trend){ + + p <- p + + geom_smooth(method = trend_type, + se = FALSE) + } + + + # Create x-axis tick labels + breaks <- sort(unique(long_df$plot_x)) + + labels <-rep(1:beeps_per_day, length.out = length(breaks)) + + ## Edit x-axis tick labels + p <- p + + scale_x_continuous(breaks = breaks, labels = labels) + + + # Ensure that y-lim is large enough to show all data points of all variables + p <- p + + coord_cartesian(ylim = c( + min(long_df$value, na.rm = TRUE), + max(long_df$value, na.rm = TRUE) + )) + + + # Switch themes + p <- switch(theme_choice, + minimal = p + theme_minimal(base_family = text_font), + classic = p + theme_classic(base_family = text_font), + bw = p + theme_bw(base_family = text_font), + dark = p + theme_dark(base_family = text_font), + p) + + + # Switch the colors of the plot + + ## Get number of variables + nr_var <- length(variables) + + p <- switch(palette, + none = p + scale_color_hue(), + custom = p + scale_color_manual(values = + grDevices::hcl.colors(nr_var, palette_option, + rev=TRUE)), + p) + + + # Edit font sizes + + p <- p + + theme( + text = element_text(size = axis_text_size), + axis.title = element_text(size = axis_text_size+2), + axis.text = element_text(size = axis_text_size), + legend.title = element_text(size = legend_text_size+2), + legend.text = element_text(size = legend_text_size) + ) + + + + # Return plot + return(p) + +} + + + + + diff --git a/AssumptionPlotter/R/clean_df.R b/AssumptionPlotter/R/clean_df.R new file mode 100644 index 0000000..3a68432 --- /dev/null +++ b/AssumptionPlotter/R/clean_df.R @@ -0,0 +1,100 @@ +#' Edit dataset to fit plotting +#' +#' @details +#' This function edits data files (from openESM). Technically, +#' the openesm::get_data function should read the files. However, +#' because the function fails to connect to zenodo, some datasets have been +#' manually saved in the package. To make the plotting smooth, this function +#' cleans the files further for analysis. To load additional datasets, go to the +#' dedicated zenodo webpage from openESM, copy the link for the dataset, and +#' paste it in the app (in the data tab under the option "paste link"). You will +#' also need to include some additional information from the dedicated openESM +#' page. If the format is correct, the analysis should still work. Make sure to +#' NOT pick the raw data. You can also add your own df as long as it is a csv +#' and the columns have the correct names. +#' The function also adds a column indicating whether a beep has been missed. +#' @param df The data frame. +#' @param id_col Provide the name of the id column +#' @param day_col Name of day column. +#' @param exp_day Expected days of the study. +#' @param beep_col Name of beep column. +#' @param exp_beep Expected beeps per day. +#' @param variables A vector containing the names of all columns to be analysed. +#' Each variable must be within quotation marks. +#' @import dplyr +#' @return Data file with all possible days and beeps and with a column indicating +#' whether a beep was missed. +#' @export +#' @examples +#' \dontrun{ +#' +#' "menghini_2023_orig" <- readr::read_tsv("https://zenodo.org/records/17347538/files/0022_menghini_ts.tsv?download=1") +#' names <- colnames(menghini_2023_orig)[c(9:17,22:28)] +#' +#' menghini_2023 <- clean_df( +#' df = menghini_2023_orig, +#' id_col = "id", +#' day_col = "day", +#' exp_day = 3, +#' beep_col = "beep", +#' exp_beep = 7, +#' variables = names +#' ) +#' } + + +clean_df <- function(df, + id_col = "id", + day_col = "day", + exp_day, + beep_col = "beep", + exp_beep, + variables){ + + # Remove any rows where id, day, or beep is missing a value + df <- df %>% + select(any_of(c(id_col, day_col, beep_col, variables))) %>% + filter( + !is.na(.data[[id_col]]), + !is.na(.data[[day_col]]), + !is.na(.data[[beep_col]]) + ) + + # Only select defined variables + df <- df %>% + select(any_of(c(id_col, day_col, beep_col, variables))) + + + # Change variable names incase original had different one + df_std <- df %>% + rename( + id = all_of(id_col), + day = all_of(day_col), + beep = all_of(beep_col) + ) + + + # Impute missing day x beep combinations with NA's + new_df <- df_std %>% + complete( + id, + day = 1:exp_day, + beep = 1:exp_beep + ) %>% + left_join(df_std, by = c("id", "day", "beep", variables)) + + + # create variable that indicates whether a row has NA's + new_df <- new_df %>% + mutate( + missing = if_any(all_of(variables), is.na) + ) + + + # order data according to first all participant then days then beeps + new_df <- new_df %>% + arrange(id, day, beep) %>% + relocate(missing, .after = beep) + + return(new_df) +} diff --git a/AssumptionPlotter/R/data_log_plot.R b/AssumptionPlotter/R/data_log_plot.R deleted file mode 100644 index 443de9f..0000000 --- a/AssumptionPlotter/R/data_log_plot.R +++ /dev/null @@ -1,50 +0,0 @@ -#' Scatter plot of data points -#' -#' @param x (Simulated) data denoting position of data point on the x-axis. -#' @param y (Simulated) data denoting position of data point on the y-axis. -#' @param type What shape your data should be in. -#' @param pch Shape of your data points in the plot. -#' @param col1 Color of your data points in the plot. -#' @param col2 Color of your logistic curve in the plot. -#' @param incl_mean Whether you want the mean of your data points displayed. -#' @return A plot showing how well your data points follow the logistic function. -#' @export -#' @examples -#' data_log_plot(x = rnorm(10,0,1), y = rbinom(10,1,0.4)) - -data_log_plot <- function(x, y, type = "logistic", pch = 16, col1 =rgb(0,0,0,0.6), - col2 = rgb(1,0,0,1), incl_mean = TRUE) { - if(type == "logistic") { - # fit logistic model - fit <- glm(y ~ x, family = binomial) - - # scatter plot - plot(x, y, - pch = pch, - col = col1, - xlab = "x", - ylab = "Outcome") - - # prediction grid - xgrid <- seq(min(x), max(x), length.out = 500) - - # predicted probabilities - pred <- predict(fit, - newdata = data.frame(x = xgrid), - type = "response") - - # draw sigmoid curve - lines(xgrid, pred, col = "red", lwd = 3) - - if(incl_mean) { - abline(v = mean(x), lty = 2, col= "gray40") - text(.1+mean(x), ) - } - - }else { - stop("The function only supports the logistic function at the moment") - } -} - - - diff --git a/AssumptionPlotter/R/edit_datasets.R b/AssumptionPlotter/R/edit_datasets.R deleted file mode 100644 index f6f2ef4..0000000 --- a/AssumptionPlotter/R/edit_datasets.R +++ /dev/null @@ -1,78 +0,0 @@ -#' Edit dataset to fit plotting -#' -#' @details -#' This function edits the data files (from openESM). Technically, the openesm::get_data function should read the files. However, because the function fails to connect to zenodo, some datasets have been manually saved in the package. To make the plotting smooth, this function cleans the files further for analysis. Load additional datasets, go to the dedicated zenodo webpage from openESM, copy the link for the dataset, and add the dataset manually in the app. If the format is correct, the analysis should still work. Make sure to NOT pick the raw data. -#' @param df The data frame. Note that the df should be in wide format. -#' @param id_col Provide the index for which column the participant ID is in. If there is no participant ID (the data frame is for only one participant), write "none". -#' @param day_col Index number for the column containing the days. -#' @param exp_day Expected days of the study. -#' @param beep_col Index number of the column containing the beeps for the day. -#' @param exp_beep Expected beeps per day. -#' @param variables A vector containing the names of all columns to be analysed. Each variable must be within quotation marks. -#' @import dplyr -#' @returns Creates a data file in the data folder of the package. -#' @examples -#' \dontrun{ -#' read_openESM_data("0022_menghini_ts", "https://zenodo.org/records/17347538/files/0022_menghini_ts.tsv?download=1") -#' } - -edit_df <- function(df, id_col, day_col, exp_day, beep_col, exp_beep, variables){ - - missing <- data.frame( - id = c(), - beeps = numeric(), - days = numeric() - ) - - participants <- unique(df[[id_col]]) - - for(i in 1:length(participants)){ - sub_df <- df[df[[id_col]]==participants[i],] - - expected_pairs <- expand.grid( - beeps = 1:exp_beep, - days = 1:exp_day - ) - - actual_pairs <- data.frame( - beeps = sub_df[[beep_col]], - days = sub_df[[day_col]] - ) - miss <- anti_join(expected_pairs, actual_pairs, by = c("beeps", "days")) - - add <- data.frame( - id = rep(participants[i], times=nrow(miss)), - days = miss$days, - beeps = miss$beeps - ) - - missing <- rbind(missing, add) - - } - - - new_df <- data.frame( - ID = df[,id_col], - time = 1:nrow(df), - day = df[,day_col], - beep = df[,beep_col] - ) - - new_var <- paste0("var_", variables) - - for(i in 1:length(new_var)){ - new_df[new_var[i]] <- df[[variables[i]]] - } - - return(missing) - #return(new_df) - #print(missing) - -} - -buu <- data.frame(x=1:10, y=letters[1:10]) -buu[h[1]] <- 36:45 - -buu[[h[1]]] - -mode(lu[,2:3]) diff --git a/AssumptionPlotter/R/external_packages.R b/AssumptionPlotter/R/external_packages.R index 9c0a52f..ffa99aa 100644 --- a/AssumptionPlotter/R/external_packages.R +++ b/AssumptionPlotter/R/external_packages.R @@ -1,4 +1,15 @@ #' @import ggplot2 #' @import usethis #' @import testthat +#' @import knitr +#' @import quarto +#' @import shiny +#' @import bslib +#' @importFrom DT renderDT DTOutput +#' @import sass +#' @importFrom readr read_tsv read_csv +#' @import dplyr +#' @import tidyr +#' @import grDevices + NULL diff --git a/AssumptionPlotter/R/pie_bar_chart.R b/AssumptionPlotter/R/pie_bar_chart.R new file mode 100644 index 0000000..d5203c7 --- /dev/null +++ b/AssumptionPlotter/R/pie_bar_chart.R @@ -0,0 +1,119 @@ +#' Pie or Bar Chart of Missing Data +#' +#' @details +#' This function creates a piechart of all missing data points +#' @param df Selected dataset. +#' @param participant Participant being plotted +#' @param type Whether it should return a pie or bar chart. +#' @param plot_all Whether the function should generate a plot for everyone or only. +#' @param text_font Font style. +#' @param axis_text_size Text sixe of axes. +#' @param legend_text_size Text size of legend. +#' @param theme_choice The bar chart has the same theme as the assumption plot +#' the participant +#' @return A pie or bar chart showing the ratio of missing values for either a +#' participant or the entire dataset. +#' @import ggplot2 +#' @import dplyr +#' @import tidyr +#' @export +#' @examples +#' \dontrun{ +#' pie_bar_chart( +#' df, +#' participant, +#' type = c("pie", "bar"), +#' plot_all = FALSE, +#' text_font = c("sans", "serif", "mono"), +#' axis_text_size = 12, +#' legend_text_size = 12, +#' theme_choice = c("classic", "minimal", "bw", "void") +#' ) +#' } +#' +#' + + +pie_bar_chart <- function(df, + participant, + type = "pie", + plot_all = FALSE, + text_font = "sans", + axis_text_size = 12, + legend_text_size = 12, + theme_choice = "classic"){ + + # Subset data if we are not plotting everyone + if(!plot_all){ + df <- df %>% + filter(id == participant) + }else{ + df <- df + } + + + # Keep relevant variables + missing <- df$missing + + + # Make df + plot_df <- data.frame( + Missing = missing + ) + + plot_df <- df %>% + mutate(Status = ifelse(missing, "Missing", "Included")) %>% + dplyr::count(Status) + + + # Create different types of plots + if(type == "pie"){ + + p <- + ggplot(plot_df, aes(x = "", y = n, fill = Status)) + + geom_bar(stat = "identity", width = 1) + + coord_polar("y", start = 0)+ + scale_fill_manual(values = + c("Missing" = "red", + "Included" = "blue"))+ + theme_void(base_family = text_font)+ + theme( + legend.title = element_text(size = legend_text_size+2), + legend.text = element_text(size = legend_text_size) + ) + } else if(type == "bar"){ + p <- + ggplot(plot_df, aes(x = Status, y = n, fill = Status)) + + geom_col() + + xlab("Data points")+ + ylab("Count")+ + scale_fill_manual(values = + c("Missing" = "red", + "Included" = "blue")) + + + # Change theme + p <- switch(theme_choice, + minimal = p + theme_minimal(base_family = text_font), + classic = p + theme_classic(base_family = text_font), + bw = p + theme_bw(base_family = text_font), + void = p + theme_void(base_family = text_font), + p) + + # Edit font size + p <- p + + theme( + text = element_text(size = axis_text_size), + axis.title = element_text(size = axis_text_size+2), + axis.text = element_text(size = axis_text_size), + legend.title = element_text(size = legend_text_size+2), + legend.text = element_text(size = legend_text_size) + ) + + + } + + return(p) + +} + diff --git a/AssumptionPlotter/R/rank_participants.R b/AssumptionPlotter/R/rank_participants.R new file mode 100644 index 0000000..dd1e978 --- /dev/null +++ b/AssumptionPlotter/R/rank_participants.R @@ -0,0 +1,62 @@ +#' Create table that ranks participants after number of missing values +#' +#' @details +#' This function creates the table in the summary tab. It shows the participants +#' with the most and least missing values and the sum and proportion of these +#' missing values. +#' @param df Data frame that has been cleaned with clean_df(). +#' @param show How many rows the table should have. +#' @return Table showing the participants with most and least missing values. +#' @import dplyr +#' @export +#' @examples +#' \dontrun{ +#' rank_participants(menghini_2013, 5) +#' } + + +rank_participants <- function(df, show = 5){ + + keep_cols <- c("id", "missing") + df <- df[,keep_cols] + + df <- df %>% + group_by(id) %>% + summarise( + missing_sum = sum(missing, na.rm = TRUE), + missing_prop = mean(missing, na.rm = TRUE) + ) %>% + arrange(desc(missing_sum)) + + if(show > nrow(df)){ + stop("Selected rows exceed number of participants") + } + + most <- df %>% + slice_head(n = show) + + least <- df %>% + slice_tail(n = show) %>% + arrange(missing_sum) + + rank_df <- data.frame( + "ID_most" = as.character(most$id), + "sum_most" = most$missing_sum, + "prop_most" = paste0(round(most$missing_prop*100),"%"), + "ID_least" = as.character(least$id), + "sum_least" = least$missing_sum, + "prop_least" = paste0(round(least$missing_prop*100),"%") + ) + + colnames(rank_df) <- c("ID most:", + "Sum", + "Percent", + "ID least:", + "Sum", + "Percent") + + return(rank_df) + +} + + diff --git a/AssumptionPlotter/R/run_app.R b/AssumptionPlotter/R/run_plotter.R similarity index 100% rename from AssumptionPlotter/R/run_app.R rename to AssumptionPlotter/R/run_plotter.R diff --git a/AssumptionPlotter/R/simulate_logistic.R b/AssumptionPlotter/R/simulate_logistic.R deleted file mode 100644 index 02f4f38..0000000 --- a/AssumptionPlotter/R/simulate_logistic.R +++ /dev/null @@ -1,15 +0,0 @@ -#' Simulate data for logistic regression -#' -#' @param n Number of observations -#' @param beta0 Intercept -#' @param beta1 Slope -#' @return A data frame with simulated data -#' @export -#' @examples -#' simulate_logistic(n = 100, beta0 = 0, beta1 = 1) -simulate_logistic <- function(n = 100, beta0 = 0, beta1 = 1) { - x <- rnorm(n) - p <- 1 / (1 + exp(-(beta0 + beta1 * x))) - y <- rbinom(n, 1, p) - data.frame(x = x, y = y) -} diff --git a/AssumptionPlotter/data/contreras_2020.rda b/AssumptionPlotter/data/contreras_2020.rda new file mode 100644 index 0000000..33b2a9b Binary files /dev/null and b/AssumptionPlotter/data/contreras_2020.rda differ diff --git a/AssumptionPlotter/data/geschwind_2013.rda b/AssumptionPlotter/data/geschwind_2013.rda new file mode 100644 index 0000000..97cef1a Binary files /dev/null and b/AssumptionPlotter/data/geschwind_2013.rda differ diff --git a/AssumptionPlotter/data/menghini_2023.rda b/AssumptionPlotter/data/menghini_2023.rda index 1a51b5d..c7ee469 100644 Binary files a/AssumptionPlotter/data/menghini_2023.rda and b/AssumptionPlotter/data/menghini_2023.rda differ diff --git a/AssumptionPlotter/inst/app/app.R b/AssumptionPlotter/inst/app/app.R index 61467de..e238ac2 100644 --- a/AssumptionPlotter/inst/app/app.R +++ b/AssumptionPlotter/inst/app/app.R @@ -1,210 +1,396 @@ -# -# This is a Shiny web application. You can run the application by clicking -# the 'Run App' button above. -# -# Find out more about building applications with Shiny here: -# -# https://shiny.posit.co/ -# - -library(shiny) -library(AssumptionPlotter) -library(datasets) -library(bslib) -library(DT) -library(sass) - -# Define UI for application that draws a histogram -ui <- page_navbar( - # id = "main_page", - # title = HTML(" - # AssumptionPlotter - # "), - id = "main_page", +############################################################################# +# USER INTERFACE # +############################################################################# + +# Using page_navbar because it looks nicer +ui <- bslib::page_navbar( + id = "main_page", title = tags$span( - tags$img(src = "logo.png", height = "30px"), - tags$b("AssumptionPlotter") - ), - bg = "red", - inverse = TRUE, + tags$img(src = "logo.png", height = "30px"), # logo is CC0 + tags$b("AssumptionPlotter")), + bg = "#b22222", fillable = TRUE, - theme = bs_theme( - base_font = font_google("Merriweather") + + # Edit font style + theme = bslib::bs_theme( + base_font = bslib::font_google("Merriweather") ), - nav_panel(title = "Start", - p(HTML(" + + # First tab: Start page + bslib::nav_panel(title = "Start", + + # Welcome message + HTML("
Welcome to AssumptionPlotter!
-For now, this package aims to give you an intuition for whether - a selected set of statistical models can account for what you want - to observe in your intensive longitudinal data.
+For now, this package allows you to visually inspect your + intensive longitudinal data.
-Thus, this package is useful for anyone conducting EMA/ESM - research.
+Thus, this package is useful for anyone conducting + EMA/ESM research.
-The package includes the following steps:
+The package includes the following functions:
Unfortunately for now, you cannot use your own data.
- " +While the package aims to account for some errors in data + frame formatting, it is still not guaranteed that all data files + will work.
"), + + tags$p("The package was developed with datasets from", + tags$a("openESM", + href = "https://openesmdata.org/datasets/", + target = "_blank"), # Creates a new tab when + # you navigate. But when running app in RStudio, it + # also opens a blank window. Doesn't happen when ran + # from browser. + + "in mind."), + p("App is recommended to be used in full-screen mode as some + features otherwise might be hidden."), + # Button to easily transport you to the data page + actionButton( + inputId= "go_data", + label= "Choose your dataset") + + ), # End start page + + + + # Second tab: Data page + bslib::nav_panel(title = "Data", + # Choose what data you want to use + radioButtons( + inputId = "data_pick", + label = "Dataset (note that naviagting between + options will reset filled in information)", + choices = c( + "Built in datasets from openESM" = "builtin", + "Upload your own data" = "upload", + "Zenodo link of .tsv file" = "zenodo" + ), + selected = "builtin", + width = "100%" + ), + + # In server, UI will change depending on data_pick choice + uiOutput("data_select"), + + # Button to transport to plot tab + actionButton( + inputId = "go_plot", + label = "Plot your data"), + + # Show table of chosen dataset + DT::DTOutput("chosenData") # Defined in server + + ), # End data page + + + # Third tab: Plotting page + bslib::nav_panel(title = "Plot", + + # Create sidebar and main content field + bslib::layout_sidebar( + + # start sidebar (options) + sidebar = bslib::sidebar( + HTML("Options"), + + # Plotting options + ## Participant defined in server + uiOutput("participant_ui"), + + ## Variables defined in server + uiOutput("variable_ui"), + + ### Option to select all variables + actionButton( + inputId = "all_vars", + label = "Select all"), + + ### Option to deselect all variables + actionButton( + inputId = "clear_vars", + label = "Deselect all"), + + ## Trend line: create input for assumption_plot() + checkboxInput( + inputId = "add_trend", + label = "Show trend line", + value = FALSE), # default + + ### If trend line included, what kind + conditionalPanel( + condition = "input.add_trend == true", + selectInput( + inputId = "trend_type", + label = "Trend line type", + choices = c("lm", "loess"), + selected = "lm")), + + ## Only show imputation options if you want to impute + ### (This is made a bit clumsily. Might make sense to + ### change this in both the plotting function and here. + ### Currently an extra step needs to be taken in the server.) + checkboxInput( + inputId = "impute_toggle", + label = "Impute missing data", + value = FALSE), # default + + conditionalPanel( + condition = "input.impute_toggle == true", + + selectInput( + inputId = "impute_method", + label = "Imputation method", + choices = c("mean", "mode"), + selected = "mean")), + + hr(), # add horizontal line + + ## Day label and lines + ### Label + checkboxInput( + inputId = "day_label", + label = "Include day labels", + value = TRUE), # default + + ### Lines + checkboxInput( + inputId = "day_lines", + label = "Include day lines", + value = TRUE), # default + + ## Edit color palettes + checkboxInput( + inputId = "edit_palette", + label = "Edit color palette", + value = FALSE), + + conditionalPanel( + condition = "input.edit_palette == true", + + ### Choose which palette + selectInput( + inputId = "palette_option", + label = "Choose palette", + choices = grDevices::hcl.pals(), + selected = "Zissou 1")), + + ## Choose theme + selectInput( + inputId = "theme_choice", + label = "Theme", + choices = c("classic", "minimal", "bw", "dark"), + selected = "classic" ), + + ## Choose font + selectInput( + inputId = "text_font", + label = "Font family", + choices = c("sans", "serif", "mono"), + selected = "sans"), + + ## Edit font sizes + ### Axis + sliderInput( + inputId = "axis_size", + label = "Axis text size", + min = 1, + max = 30, + value = 14), + + ### Legend + sliderInput( + inputId = "legend_size", + label = "Legend text size", + min = 1, + max = 30, + value = 14) + + ), # End of options (sidebar) + + + # Start plot page main content + + HTML("Assumption Plot"), + plotOutput("assumption_plot") + + ), # end of layout_sidebar + + # Navigate back to data tab + actionButton( + inputId = "go_summary", + label = "See summary of missing values") + + ), # End plot page + + + + # Fourth tab: Summary of missing values tab + bslib::nav_panel(title = "Summary", + + # Create sidebar and main content field + bslib::layout_sidebar( + + # Start sidebar(options for summary page) + sidebar = bslib::sidebar( + HTML("Options"), + + # Plotting options: + ## Participant defined in server + ### Cannot use the same input because it kept on crashing + ### for me. However, after identifying the real bug, + ### (i thought the bug was that it couldn't use the same + ### input$participant for both plots, but in reality I'd + ### just forgotten to close an html tag properly), it + ### might be possible to just use participant_ui here too. + uiOutput("participant_ui_summary"), + + # ## Choose theme + # ### Not included for now + # uiOutput("theme_ui") + + ## Choose text font defined in server because I want the + ## font chosen in the plot page to automatically be + ## applied to the summary plots. + ### same disclaimer about just using text_font as for + ### participant_ui_summary + uiOutput("font_ui"), + + ## Edit summary plots font sizes + ### Axis + sliderInput( + inputId = "axis_size_sum", + label = "Axis text size", + min = 1, + max = 30, + value = 14), - )), - actionButton( - inputId= "go_data", - label= "Choose your dataset" - ) + ### Legend + sliderInput( + inputId = "legend_size_sum", + label = "Legend text size", + min = 1, + max = 30, + value = 14) - ), - nav_panel(title = "Data", - p(""), - radioButtons( - "data_pick", - "Dataset", - - choices = c( - "Datasets from openESM" = "builtin", - "Upload your own data" = "upload" - ) - ), - - uiOutput("data_select"), - actionButton( - inputId= "go_plot", - label= "Plot your data" - ), - DT::DTOutput("chosenData") - ), - nav_panel(title = "Plot", - p("Data visualization"), - layout_sidebar( - sidebar = sidebar(HTML(" - Options -hello
- ")), - # uiOutput("summary_box"), - navset_card_tab( - nav_panel( - title= "Plot", - card( - card_header("Assumption Plot"), - plotOutput("test_plot") - ) - ), - nav_panel( - title = "Checks", - p(HTML(" -Your file should needs to meet at least the following criteria - for the app to run smoothly:
-Welcome to AssumptionPlotter!
+ +For now, this package aims to give you an intuition for whether + a selected set of statistical models can account for what you want + to observe in your intensive longitudinal data.
+ +Thus, this package is useful for anyone conducting + EMA/ESM research.
+ +The package includes the following steps:
+Unfortunately for now, you cannot use your own data.
+ " + + )), + + # Button to easily transport you to the data page + actionButton( + inputId= "go_data", + label= "Choose your dataset" + ) + + ), + + + # Second tab: Data page + bslib::nav_panel(title = "Data", + + # Choose what data you want to use + radioButtons( + inputId = "data_pick", + label = "Dataset", + choices = c( + "Datasets from openESM" = "builtin", + "Upload your own data" = "upload" + ), + selected = "builtin" + ), + + # In server, UI will change depending on data_pick choice + uiOutput("data_select"), + + # Button to transport to plot tab + actionButton( + inputId = "go_plot", + label = "Plot your data"), + + # Show table of chosen dataset + DT::DTOutput("chosenData") # Defined in server + ), + + + # Third tab: Plot tab + bslib::nav_panel(title = "Plot", + + # Create a layout where there is a sidebar and main content + bslib::layout_sidebar( + + # Edit the sidebar + sidebar = bslib::sidebar( + HTML("Options"), + + # Plotting options + + # ## Participant defined in server + # uiOutput("participant_ui"), + # + # ## Variables defined in server + # uiOutput("variable_ui"), + # + # ### Option to select all variables + # actionButton( + # inputId = "all_vars", + # label = "Select all"), + # + # ### Option to deselect all variables + # actionButton( + # inputId = "clear_vars", + # label = "Deselect all"), + # + # ## Trend line: create input for assumption_plot() + # checkboxInput( + # inputId = "add_trend", + # label = "Show trend line", + # value = FALSE), # default + + # ### If trend line included, what kind + # conditionalPanel( + # condition = "input.add_trend == true", + # selectInput( + # inputId = "trend_type", + # label = "Trend line type", + # choices = c("lm", "loess"), + # selected = "lm")), + # + # ## Only show imputation options if you want to impute + # ### (This is made a bit clumsily, might make sense to + # ### change this in both the plotting function and here. + # ### Currently an extra step needs to be taken in the server.) + # checkboxInput( + # inputId = "impute_toggle", + # label = "Impute missing data", + # value = FALSE), # default + # + # conditionalPanel( + # condition = "input.impute_toggle == true", + # + # selectInput( + # inputId = "impute_method", + # label = "Imputation method", + # choices = c("mean", "mode"), + # selected = "mean")), + + hr(), # add horizontal line + + # Day label and lines + ## Label + checkboxInput( + inputId = "day_label", + label = "Include day labels", + value = TRUE), # default + + ## Lines + checkboxInput( + inputId = "day_lines", + label = "Include day lines", + value = TRUE), # default + + ## Edit colors? + checkboxInput( + inputId = "edit_palette", + label = "Edit plot colors", + value = FALSE), + + conditionalPanel( + condition = "input.edit_palette == true", + + # Choose which theme + selectInput( + inputId = "palette_option", + label = "Choose palette", + choices = grDevices::hcl.pals(), + selected = "Zissou 1")), + + ## Choose theme + selectInput( + inputId = "theme_choice", + label = "Theme", + choices = c("classic", "minimal", "bw", "void"), + selected = "classic" ), + + # Choose font + selectInput( + inputId = "text_font", + label = "Font family", + choices = c("sans", "serif", "mono"), + selected = "sans"), + + # Edit font sizes + ## Axis + sliderInput( + inputId = "axis_size", + label = "Axis text size", + min = 5, + max = 30, + value = 12), + + ## Legend + sliderInput( + inputId = "legend_size", + label = "Legend text size", + min = 5, + max = 30, + value = 12) + + ), # End of options + + + # Create a card with tabs next to sidebar + bslib::navset_card_tab( + + # Plot tab + bslib::nav_panel(title = "Assumption Plot", + # plotOutput("assumption_plot") + ), + + # Summary tab + bslib::nav_panel(title = "Summary", + HTML(" + Reminders of what to check for in 'Assumption Plot': +Your file should needs to meet at least the following criteria + for the app to run smoothly:
+
+ AssumptionPlotter
author: "Emma Akrong"
-date: "May 24, 2026"
+date: "May 31, 2026"
+format:
+ html:
+ theme: simplex
+ css: vignette-style.css
+ toc: true
+ toc-depth: 4
+ toc-location: left
vignette: >
%\VignetteIndexEntry{AssumptionPlotter}
%\VignetteEngine{quarto::html}
@@ -12,68 +21,204 @@ knitr:
comment: '#>'
---
-```{r}
+```{r, eval=F}
#| label: setup
library(AssumptionPlotter)
```
-This package aims to give a an overview of whether the user's data matches the statistical model they intend to use.
+This package was created in `R` code, and therefore requires you to have it installed. It was created in **Rstudio**, so the app is likely most straightforward to run from there. You can find the instructions to download RStudio here.
+
+For problems with the package, please contact emma.akrong@gmail.com
+
+Required packages:
+
+```{r, eval=FALSE}
+library("ggplot2") # For plots
+library("testthat") # To run unit tests
+library("usethis") # To edit package
+library("knitr") # To render/create quarto file
+library("quarto") # To render/create quarto
+library("shiny") # To create app
+library("bslib") # To format user interface of app
+library("DT") # To create interactive table in app's data page
+library("sass") # To edit font
+library("readr") # To read csv/tsv files into tibbles
+library("dplyr") # To edit data frames for plots/tables
+library("tidyr") # To edit data frames for plots/tables
+library("grDevices") # Access color palettes for assumption plot
+```
+
+
+## Welcome to AssumptionPlotter!
+
+At the moment, this package creates an app that allows for visual inspection of EMA/ESM data.
+
+The package was developed with the datasets from openESM in mind, and can currently plot built-in datasets, the users own data, and data from openESM accessed through the link to download the dataset from its dedicated Zenodo page. However, the last option is not guaranteed to work.
+
+The purpose is to give EMA/ESM researchers a nice overview of what their data looks like, and possibly guide decisions on what statistical model to use. It plots according to the expected days and beeps of the study, and therefore requires all participants to have the same study lengths. You can currently only plot the variables of one participant at the time.
+
+While "*PlotESM*" currently might be a more fitting name, ideally some statistical tests will later be implemented.
+
+The main functions created by this package are:
+
+```{r, eval=FALSE}
+run_plotter() # Launches the shiny app
+clean_df() # Cleans data uploaded/linked to by the user
+assumption_plot() # Main plot of the app
+pie_bar_chart() # Plots summarizing how many missing values
+rank_participants() # Table showing who had the most and least missing values
+```
+
+You can get more information about them by e.g., running `?run_plotter()` in your console (accessing the function's help file), once you've installed the package.
+
+We will now take a tour of the package.
-While it tests whether the data matches meets the statistical assumptions of the models, it also aims to give an intuition of whether the statistical model will capture what the user wants see in the data.
-For example, in the case of a study with intensive longitudinal data, the user might expect to see change. This package aims to show how e.g., a VAR model would not be able to reflect this expectation.
+## Running AssumptionPlotter
+### Install Package
-# Running AssumptionPlotter
+To install the package, run this code in your console:
-The first step after installing the package is to run the function `run.plotter()`.
+```{r, eval=FALSE}
+remotes::install_github("Programming-The-Next-Step-2026/Assumption-Plotter",
+ ref = "week-4",
+ subdir = "AssumptionPlotter",
+ build_manual = TRUE,
+ build_vignettes = TRUE)
+```
+
+It might ask you to update your packages. You can choose not to, but it is not guaranteed that the app will run as intended.
+
+### Launch App
+
+The first step after installing the package is to open it in your library and run the function `run_plotter()`.
```{r, eval=F}
-run.plotter()
+library(AssumptionPlotter)
+
+run_plotter()
```
+This will launch the app and take you to the start page:
+
+
+
+It is recommended that you use the app in full-screen mode as some features (e.g., the table in the data page) otherwise might be hidden. Also note that if you're running the app in RStudio, blank screens might pop up when you press a hyperlink. This does not happen when you run it in your browser.
+
+Press the "*Choose your dataset*" button or navigate to the data tab in the menu bar. You cannot plot anything until a dataset has been chosen.
+
+Note that you in the menu bar can find the link to the openESM webpage and the github repository (see the green circle in the plot).
+
+In all following images, pay attention to information outlined in the green or red circles.
+
+
+### Choose Data
+
+In the "Data" page you have a few options. You can use the built in datasets, upload your own, or use the link to the tsv file in Zenodo.
+
+#### Built-in data
+
+To familiarise yourself with the app, you can choose one of the built-in datasets.
+
+
+
+These datasets were chosen at random and are by no means perfect. However, they can for example highlight how messy the data is (e.g., in `geschwind_2013` most participants have no data in the last days, although it's indicated that there should be 10 days).
+
+You can press the hyperlink to access the data's dedicated openESM page. To preview more of the data, just choose another option in "Show" at the top-left of the table.
+
+You can then press the "*Plot your data*" button.
+
+
+#### Upload data
+
+By choosing "*Upload your own data*", you will be instructed to follow a few steps:
+
+
+
+`clean_df()` that cleans the data for the plotting functions, requires you to input the name of the id, day, and beep column (the pre-filled names are "id", "day", and "beep"). As well as how many days and beeps per day each participant should have. You also need to manually fill in the names of all the variables you want to plot.
+
+In the app, it says that only csv files are accepted. This is true, however, you can edit this in the server in the "app.R" file (line 514; e.g.: `accept = c(".csv", ".tsv")`). You can also edit line 610 to use a different read function. The data is currently read using `readr::read_csv()`, which technically also could work for tsv files.
+
+When you're happy, you can move to the Plot page.
+
+
+#### Zenodo link
+
+The final option is to upload the data using the dataset's download file, which can be found in Zenodo. An example link is circled in green:
+
+
+
+To access the link you will have to take a few actions. Start in the data's dedicated openESM page and follow the following steps:
+
+
+
+On this page you can find the expected days and beeps (circled on the right).
+
+If you scroll down a bit, you will also find the names of the variables (columns). Fill these out in the app. Note that some pages have the names of the id, day, and beep columns at the bottom.
+
+Fill in all required information in the app and then press the Zenodo DOI link.
+
+
+
+Pressing the Zenodo DOI link should show you the downloadable files. Make sure to not pick the raw or static files, but the one circled in red.
+
+
-# TO ADD
+Then access the link by right clicking, copy the link address, and paste the link to the app.
-This will all be added before Friday.
+
-::: {layout-ncol=3}
+Note that this doesn't work for all data. For example, in the development, I could not get "0011_kuppens" to work.
-
+If the data previews successfully. you can move on to plotting.
-
+### Plot Data
-
+When you move to the plot tab, you should see all the variables plotted per day and beep in the Assumption Plot.
-
+
-
+A connected line indicates that the participant responded to consecutive beeps. If there is only a dot, it means that the beep was not preceded nor followed by a response. There should be data at every beep. If a beep is empty, this indicates missing data.
-
+In the "*Options*" sidebar you will see different plotting options. Scroll down to see all options. Note that you can control its width. The plot is reactive so should change immediately when change options (with some possible rendering delay). In `assumption_plot()` you can specify the following things that also can be controlled in the plot:
+1. What variables to show.
+1. Whether to include a trend line (either linear or loess).
+1. Whether to impute missing data (either the mean or mode of the variable).
+1. Whether to show the day labels.
+1. Whether to include lines separating the days.
+1. Whether to edit the plot's color palette (supports any palette from `grDevices::hcl.pals()`).
+1. What ggplot theme the plot should have (options can be edited in "app.R").
+1. What font the plot should have.
+1. Font size of the axes.
+1. Font size of the legend.
-
+In "app.R", all of these have preset values. These are not necessarily the same as those seen in "examples" in the help file.
-
+You can play around with the different options. Note that the plot will not render if no variables are chosen. It might also take a while for the plot to render, depending on how quickly the app can load the data.
-
+If you're curious about a summary of the missing values, you can press the "*See summary of missing values*" button at the bottom of the screen and move on to the "*Summary*" page.
-
-
+### Summary Page
-
+The "*Summary*" page starts with some tips for what to look for in the Assumption Plot in the "*Plot*" page. This could be next to the plot, but I wanted to make the plot as big as possible.
-
+
-
+Next, you will see a pie and bar chart showing the ratio of included to missing data for the selected participant. When navigating from the "*Plot*" page to the "*Summary*" page, it will remember what participant and font should be plotted. However, if you change the participant in the "*Summary*" page and then move back to the "*Plot*", you'll have to manually change it again for the Assumption Plot.
-
+You can also choose to plot the missing data in the entire dataset in the pie and bar chart. This way you can compare the participant's adherence to all other participants.
+Under the plots you find a table summarising which participants had the most and least missing values. This could e.g., be useful for knowing who to pick for the Assumption Plot when you're interested in trends in the data. You can edit the amount of rows the table should show (note that this cannot exceed the sample size) and then, as seen in the bottom-right of the page when you hover over the table, you can expand the table.
-
+At the bottom of the "*Summary*" page there is button to allow for easy navigation between the "*Summary*" and "*Plot*" pages.
-
+