Skip to content

Commit 8671d5d

Browse files
munoztd0teunbrand
authored andcommitted
Allow choosing quantile definition in boxplots (#6820)
* update: geom_boxplot to add quantile type argument * update: change quantile_type to quantile.type * update: NEWS.md * update: document() * tiny doc polish
1 parent 480f388 commit 8671d5d

4 files changed

Lines changed: 42 additions & 2 deletions

File tree

NEWS.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22

33
* Adapt to changes in `rlang::warn_dots_used()` (@lionel-, #6830).
44
* Fixed bug where `guide_axis_theta()` didn't anticipate old-style text elements (#6803).
5+
* `geom_boxplot()`/`stat_boxplot()` gain a `quantile.type` parameter (default `7`)
6+
to control the percentile definition used for hinges and median; set `quantile.type = 2`
7+
to match SAS's default `PCTLDEF = 5`, enabling parity with SAS boxplots out-of-the-box.
8+
(@munoztd0, #6819)
59

610
# ggplot2 4.0.2
711

R/stat-boxplot.R

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,15 +53,16 @@ StatBoxplot <- ggproto("StatBoxplot", Stat,
5353

5454
extra_params = c("na.rm", "orientation"),
5555

56-
compute_group = function(data, scales, width = NULL, na.rm = FALSE, coef = 1.5, flipped_aes = FALSE) {
56+
compute_group = function(data, scales, width = NULL, na.rm = FALSE, coef = 1.5, flipped_aes = FALSE, quantile.type = 7) {
5757
data <- flip_data(data, flipped_aes)
5858
qs <- c(0, 0.25, 0.5, 0.75, 1)
5959

6060
if (!is.null(data$weight)) {
6161
mod <- quantreg::rq(y ~ 1, weights = weight, data = data, tau = qs)
6262
stats <- as.numeric(stats::coef(mod))
6363
} else {
64-
stats <- as.numeric(stats::quantile(data$y, qs))
64+
# Follow base R default (type = 7) unless overridden by user
65+
stats <- as.numeric(stats::quantile(data$y, qs, type = quantile.type))
6566
}
6667
names(stats) <- c("ymin", "lower", "middle", "upper", "ymax")
6768
iqr <- diff(stats[c(2, 4)])
@@ -99,6 +100,8 @@ StatBoxplot <- ggproto("StatBoxplot", Stat,
99100

100101
#' @rdname geom_boxplot
101102
#' @param coef Length of the whiskers as multiple of IQR. Defaults to 1.5.
103+
#' @param quantile.type An integer between 1 and 9 setting the quantile algorithm
104+
#' per [`stats::quantile(type)`][stats::quantile]. Defaults to `7`
102105
#' @inheritParams stat_identity
103106
#' @export
104107
#' @eval rd_computed_vars(

man/geom_boxplot.Rd

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-geom-boxplot.R

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,3 +109,32 @@ test_that("boxplot draws correctly", {
109109
)
110110
)
111111
})
112+
113+
test_that("quantile.type changes hinges for small samples (unweighted)", {
114+
df <- data_frame(x = 1, y = c(1, 2, 3, 4))
115+
116+
p_default <- ggplot(df, aes(x, y)) + stat_boxplot()
117+
d_default <- get_layer_data(p_default)
118+
119+
p_t2 <- ggplot(df, aes(x, y)) + stat_boxplot(quantile.type = 2)
120+
d_t2 <- get_layer_data(p_t2)
121+
122+
# Lower/upper hinges should differ under different quantile definitions
123+
expect_false(isTRUE(all.equal(d_default$lower, d_t2$lower)))
124+
expect_false(isTRUE(all.equal(d_default$upper, d_t2$upper)))
125+
})
126+
127+
test_that("quantile.type = 7 matches default behavior (backward compatible)", {
128+
set.seed(123)
129+
df <- data_frame(x = 1, y = rnorm(25))
130+
131+
p_default <- ggplot(df, aes(x, y)) + stat_boxplot()
132+
d_default <- get_layer_data(p_default)
133+
134+
p_t7 <- ggplot(df, aes(x, y)) + stat_boxplot(quantile.type = 7)
135+
d_t7 <- get_layer_data(p_t7)
136+
137+
expect_equal(d_default$lower, d_t7$lower)
138+
expect_equal(d_default$middle, d_t7$middle)
139+
expect_equal(d_default$upper, d_t7$upper)
140+
})

0 commit comments

Comments
 (0)