modelbased/R/visualisation_recipe.R at a4abdbd6bbf4e2090134c139821e4aaee7d0e583 · easystats/modelbased · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
#' @title Automated plotting for 'modelbased' objects
#' @name visualisation_recipe.estimate_predicted
#'
#' @description
#' Most **modelbased** objects can be visualized using either the `plot()`
#' function, which internally calls the `visualisation_recipe()` function and
#' relies on `{ggplot2}`. There is also a `tinyplot()` method, which uses the
#' `{tinyplot}` package and relies on the core R graphic system. See the
#' examples below for more information and examples on how to create and
#' customize plots.
#'
#' The plotting works by mapping any predictors from the `by` argument to the
#' x-axis, colors, alpha (transparency) and facets. Thus, the appearance of the
#' plot depends on the order of the variables that you specify in the `by`
#' argument. For instance, the plots corresponding to
#' `estimate_relation(model, by=c("Species", "Sepal.Length"))` and
#' `estimate_relation(model, by=c("Sepal.Length", "Species"))` will look
#' different.
#'
#' The automated plotting is primarily meant for convenient visual checks, but
#' for publication-ready figures, we recommend re-creating the figures using the
#' `{ggplot2}` package directly.
#'
#' @param x A modelbased object.
#' @param show_data Logical, if `TRUE`, display the "raw" data as a background
#' to the model-based estimation. For mixed models, you can additionally use the
#' `collapse_group` argument to "collapse" data points by random effects
#' grouping factors. Argument `show_data` will be ignored for plotting objects
#' returned by `estimate_slopes()` or `estimate_grouplevel()`.
#' @param join_dots Logical, if `TRUE` and for categorical focal terms in `by`,
#' dots (estimates) are connected by lines, i.e. plots will be a combination of
#' dots with error bars and connecting lines. If `FALSE` (default), only dots
#' and error bars are shown. It is possible to set a global default value using
#' `options()`, e.g. `options(modelbased_join_dots = TRUE)`.
#' @param numeric_as_discrete Maximum number of unique values in a numeric
#' predictor to treat that predictor as discrete. Defaults to `8`. Numeric
#' predictors are usually mapped to a continuous color scale, unless they have
#' only few unique values. In the latter case, numeric predictors are assumed to
#' represent "categories", e.g. when only the mean value and +/- 1 standard
#' deviation around the mean are chosen as representative values for that
#' predictor. Use `FALSE` to always use continuous color scales for numeric
#' predictors. It is possible to set a global default value using `options()`,
#' e.g. `options(modelbased_numeric_as_discrete = 10)`.
#' @param show_residuals Logical, if `TRUE`, display residuals of the model as a
#' background to the model-based estimation. Residuals will be computed for the
#' predictors in the data grid, using [`residualize_over_grid()`]. For mixed
#' models, you can additionally use the `collapse_group` argument to "collapse"
#' data points from residuals by random effects grouping factors.
#' @param collapse_group This argument only takes effect when either `show_data`
#' or `show_residuals` is `TRUE`. For mixed effects models, name of the grouping
#' variable of random effects. If `collapse_group = TRUE`, data points
#' "collapsed" by the first random effect groups are added to the plot. Else, if
#' `collapse_group` is a name of a group factor, data is collapsed by that
#' specific random effect. See [`collapse_by_group()`] for further details.
#' @param point,line,pointrange,ribbon,facet,grid Additional
#' aesthetics and parameters for the geoms (see customization example).
#' @param ... Arguments passed from `plot()` to `visualisation_recipe()`, or
#' to `tinyplot()` if you use that method.
#'
#' @details There are two options to remove the confidence bands or errors bars
#' from the plot. To remove error bars, simply set the `pointrange` geom to
#' `point`, e.g. `plot(..., pointrange = list(geom = "point"))`. To remove the
#' confidence bands from line geoms, use `ribbon = "none"`.
#'
#' @return An object of class `visualisation_recipe` that describes the layers
#' used to create a plot based on `{ggplot2}`. The related `plot()` method is in
#' the `{see}` package.
#'
#' @section Global Options to Customize Plots:
#' Some arguments for `plot()` can get global defaults using `options()`:
#'
#' - `modelbased_join_dots`: `options(modelbased_join_dots = <logical>)` will
#'   set a default value for the `join_dots`.
#'
#' - `modelbased_numeric_as_discrete`: `options(modelbased_numeric_as_discrete = <number>)`
#'   will set a default value for the `modelbased_numeric_as_discrete` argument.
#'   Can also be `FALSE`.
#'
#' - `modelbased_ribbon_alpha`: `options(modelbased_ribbon_alpha = <number>)`
#'   will set a default value for the `alpha` argument of the `ribbon` geom.
#'   Should be a number between `0` and `1`.
#'
#' - `modelbased_tinyplot_dodge`: `options(modelbased_tinyplot_dodge = <number>)`
#'   will set a default value for the `dodge` argument (spacing between geoms)
#'   when using `tinyplot::plt()`. Should be a number between `0` and `1`.
#'
#' @examplesIf all(insight::check_if_installed(c("marginaleffects", "see", "ggplot2", "lme4"), quietly = TRUE)) && getRversion() >= "4.1.0"
#' library(ggplot2)
#' library(see)
#' # ==============================================
#' # estimate_relation, estimate_expectation, ...
#' # ==============================================
#' # Simple Model ---------------
#' x <- estimate_relation(lm(mpg ~ wt, data = mtcars))
#' layers <- visualisation_recipe(x)
#' layers
#' plot(layers)
#'
#' # visualization_recipe() is called implicitly when you call plot()
#' plot(estimate_relation(lm(mpg ~ qsec, data = mtcars)))
#'
#' \dontrun{
#' # It can be used in a pipe workflow
#' lm(mpg ~ qsec, data = mtcars) |>
#'   estimate_relation(ci = c(0.5, 0.8, 0.9)) |>
#'   plot()
#'
#' # Customize aesthetics ----------
#'
#' plot(x,
#'   point = list(color = "red", alpha = 0.6, size = 3),
#'   line = list(color = "blue", size = 3),
#'   ribbon = list(fill = "green", alpha = 0.7)
#' ) +
#'   theme_minimal() +
#'   labs(title = "Relationship between MPG and WT")
#'
#' # Customize raw data -------------
#'
#' plot(x, point = list(geom = "density_2d_filled"), line = list(color = "white")) +
#'   scale_x_continuous(expand = c(0, 0)) +
#'   scale_y_continuous(expand = c(0, 0)) +
#'   theme(legend.position = "none")
#'
#' # Single predictors examples -----------
#'
#' plot(estimate_relation(lm(Sepal.Length ~ Species, data = iris)))
#'
#' # 2-ways interaction ------------
#'
#' # Numeric * numeric
#' x <- estimate_relation(lm(mpg ~ wt * qsec, data = mtcars))
#' plot(x)
#'
#' # Numeric * factor
#' x <- estimate_relation(lm(Sepal.Width ~ Sepal.Length * Species, data = iris))
#' plot(x)
#'
#' # ==============================================
#' # estimate_means
#' # ==============================================
#' # Simple Model ---------------
#' x <- estimate_means(lm(Sepal.Width ~ Species, data = iris), by = "Species")
#' layers <- visualisation_recipe(x)
#' layers
#' plot(layers)
#'
#' # Customize aesthetics
#' layers <- visualisation_recipe(x,
#'   point = list(width = 0.03, color = "red"),
#'   pointrange = list(size = 2, linewidth = 2),
#'   line = list(linetype = "dashed", color = "blue")
#' )
#' plot(layers)
#'
#' # Two levels ---------------
#' data <- mtcars
#' data$cyl <- as.factor(data$cyl)
#'
#' model <- lm(mpg ~ cyl * wt, data = data)
#'
#' x <- estimate_means(model, by = c("cyl", "wt"))
#' plot(x)
#'
#' # GLMs ---------------------
#' data <- data.frame(vs = mtcars$vs, cyl = as.factor(mtcars$cyl))
#' x <- estimate_means(glm(vs ~ cyl, data = data, family = "binomial"), by = c("cyl"))
#' plot(x)
#'
#' # ==============================================
#' # Adding original data to the plot
#' # ==============================================
#' data(efc, package = "modelbased")
#' efc$e15relat <- as.factor(efc$e15relat)
#' efc$c161sex <- as.factor(efc$c161sex)
#' levels(efc$c161sex) <- c("male", "female")
#' model <- lme4::lmer(neg_c_7 ~ c161sex + (1 | e15relat), data = efc)
#'
#' me <- estimate_means(model, "c161sex")
#' plot(me, show_data = TRUE)
#'
#' # data points: collapse by / average over random effects groups -------
#' plot(me, show_data = TRUE, collapse_group = "e15relat")
#' }
#' @export
visualisation_recipe.estimate_predicted <- function(
  x,
  show_data = FALSE,
  show_residuals = FALSE,
  collapse_group = NULL,
  point = NULL,
  line = NULL,
  pointrange = NULL,
  ribbon = NULL,
  facet = NULL,
  grid = NULL,
  join_dots = NULL,
  numeric_as_discrete = NULL,
  ...
) {
  # Process argument ---------------------------------------------------------
  # --------------------------------------------------------------------------

  # set defaults
  if (is.null(join_dots)) {
    join_dots <- getOption("modelbased_join_dots", FALSE)
  }
  if (is.null(numeric_as_discrete)) {
    numeric_as_discrete <- getOption("modelbased_numeric_as_discrete", 8)
  }

  .visualization_recipe(
    x,
    show_data = show_data,
    show_residuals = show_residuals,
    collapse_by = collapse_group,
    point = point,
    line = line,
    pointrange = pointrange,
    ribbon = ribbon,
    facet = facet,
    grid = grid,
    join_dots = join_dots,
    numeric_as_discrete = numeric_as_discrete,
    ...
  )
}


#' @export
visualisation_recipe.estimate_means <- visualisation_recipe.estimate_predicted


#' @rdname visualisation_recipe.estimate_predicted
#'
#' @examplesIf all(insight::check_if_installed(c("marginaleffects", "see", "ggplot2"), quietly = TRUE))
#' # ==============================================
#' # estimate_slopes
#' # ==============================================
#' model <- lm(Sepal.Width ~ Species * Petal.Length, data = iris)
#' x <- estimate_slopes(model, trend = "Petal.Length", by = "Species")
#'
#' layers <- visualisation_recipe(x)
#' layers
#' plot(layers)
#'
#' \dontrun{
#' # Customize aesthetics and add horizontal line and theme
#' layers <- visualisation_recipe(x, pointrange = list(size = 2, linewidth = 2))
#' plot(layers) +
#'   geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
#'   theme_minimal() +
#'   labs(y = "Effect of Petal.Length", title = "Marginal Effects")
#'
#' model <- lm(Petal.Length ~ poly(Sepal.Width, 4), data = iris)
#' x <- estimate_slopes(model, trend = "Sepal.Width", by = "Sepal.Width", length = 20)
#' plot(visualisation_recipe(x))
#'
#' model <- lm(Petal.Length ~ Species * poly(Sepal.Width, 3), data = iris)
#' x <- estimate_slopes(model, trend = "Sepal.Width", by = c("Sepal.Width", "Species"))
#' plot(visualisation_recipe(x))
#' }
#' @export
visualisation_recipe.estimate_slopes <- function(
  x,
  line = NULL,
  pointrange = NULL,
  ribbon = NULL,
  facet = NULL,
  grid = NULL,
  ...
) {
  .visualization_recipe(
    x,
    show_data = FALSE,
    show_residuals = FALSE,
    line = line,
    pointrange = pointrange,
    ribbon = ribbon,
    facet = facet,
    grid = grid,
    ...
  )
}


#' @rdname visualisation_recipe.estimate_predicted
#'
#' @examplesIf all(insight::check_if_installed(c("ggplot2", "marginaleffects", "see", "lme4"), quietly = TRUE))
#' # ==============================================
#' # estimate_grouplevel
#' # ==============================================
#' \dontrun{
#' data <- lme4::sleepstudy
#' data <- rbind(data, data)
#' data$Newfactor <- rep(c("A", "B", "C", "D"))
#'
#' # 1 random intercept
#' model <- lme4::lmer(Reaction ~ Days + (1 | Subject), data = data)
#' x <- estimate_grouplevel(model)
#' layers <- visualisation_recipe(x)
#' layers
#' plot(layers)
#'
#' # 2 random intercepts
#' model <- lme4::lmer(Reaction ~ Days + (1 | Subject) + (1 | Newfactor), data = data)
#' x <- estimate_grouplevel(model)
#' plot(x) +
#'   geom_hline(yintercept = 0, linetype = "dashed") +
#'   theme_minimal()
#' # Note: we need to use hline instead of vline because the axes is flipped
#'
#' model <- lme4::lmer(Reaction ~ Days + (1 + Days | Subject) + (1 | Newfactor), data = data)
#' x <- estimate_grouplevel(model)
#' plot(x)
#' }
#' @export
visualisation_recipe.estimate_grouplevel <- function(
  x,
  line = NULL,
  pointrange = NULL,
  ribbon = NULL,
  facet = NULL,
  grid = NULL,
  ...
) {
  if (is.null(facet)) {
    facet <- list(scales = "free")
  } else {
    facet <- utils::modifyList(facet, list(scales = "free"))
  }

  .visualization_recipe(
    x,
    show_data = FALSE,
    show_residuals = FALSE,
    line = line,
    pointrange = pointrange,
    ribbon = ribbon,
    facet = facet,
    grid = grid,
    join_dots = FALSE,
    ...
  )
}