-
Notifications
You must be signed in to change notification settings - Fork 18
Expand file tree
/
Copy pathplotting.R
More file actions
250 lines (195 loc) · 7.43 KB
/
plotting.R
File metadata and controls
250 lines (195 loc) · 7.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
library(dplyr)
# We will again be using the NPAS data set.
# Let's clean it up a bit
# (data from https://www.kaggle.com/lucasgreenwell/nerdy-personality-attributes-scale-responses)
df_NPAS <- read.csv("NPAS data.csv") |>
# this is similar to na.omit() but for specific variables:
tidyr::drop_na(Q1:Q26, gender, age) |>
mutate(
Nerdy = across(Q1:Q26, .fns = as.numeric) |> rowMeans(na.rm = TRUE),
gender = factor(
gender,
levels = c(2, 1, 3),
labels = c("woman", "man", "other")
)
) |>
select(-(Q1:Q26)) |>
filter(age < 100) |>
sample_n(200) # get just some of the data
glimpse(df_NPAS)
# *** ggplot2 *** ---------------------------------------------------------
library(ggplot2)
# When using `ggplot` there are two main concepts to understand:
# 1. Plots are built ("drawn") - layer by layer.
# 2. Data (variables) are mapped to visual features of the plot.
# Like saying "on the x axis we have 'age'",
# or "color represents 'gender'".
#
# This course will not expand much on using `ggplot`, as its use changes
# dramatically between users, data type and whole fields, but let's look at some
# of the basics.
# Basic steps - think:
# 1. What variables do want to plot?
# 2. What data do we want to plot? Raw data (individual scores)? Summarized data
# (group means)?
# 2. What type of plot are we making? (a scatter plot? A bar plot? error bars?)
# 3. How do variables MAP onto the plot - in other words what visual feature
# represents different variables? Does color change by group? Does location on
# the X axis change according to age? Etc...
# Example 1 ---------------------------------------------------------------
?ggplot
# The main function is `ggplot()`, and it takes a data frame.
ggplot(df_NPAS)
# This drew nothing, because we did not tell it what to draw or how...
# `aes()` is the mapping function - it lets us MAP variables onto visual
# features. For example, I want the X-axis to represent `Nerdy`, and color to
# represent `gender`.
ggplot(df_NPAS, mapping = aes(x = Nerdy, color = gender))
# This still does nothing, because we didn't add any layers - we didn't tell
# ggplot what to draw!
# All the actual "drawing" is specified with the "geoms". Here we might want to
# draw a histogram. So lets add that - literally, with a `+`!
ggplot(df_NPAS, mapping = aes(x = Nerdy, color = gender)) +
geom_histogram()
# Starting to get somewhere!
# Seems like we don't want the color to represent gender, but maybe the filling
# instead? Just MAP onto `fill`.
ggplot(df_NPAS, mapping = aes(x = Nerdy, fill = gender)) +
geom_histogram()
# Or maybe I want a density plot instead?
ggplot(df_NPAS, mapping = aes(x = Nerdy, color = gender)) +
geom_density()
ggplot(df_NPAS, mapping = aes(x = Nerdy, color = gender)) +
geom_density(
mapping = aes(linetype = gender),
size = 1,
fill = "yellow",
alpha = 0.4
)
# note that `linetype` is in `aes()` - so it varies according to some variable,
# whereas shape, fill, and alpha (the opacity) are not inside `aes()` so there
# are FIXED to a constant.
# ggplot(df_NPAS, mapping = aes(x = Nerdy, color = gender, linetype = gender)) +
# geom_density(
# size = 1,
# fill = "yellow",
# alpha = 0.4
# )
# Example 2 ---------------------------------------------------------------
# Let's try another example, with the same data and variables (Nerdy & gender):
ggplot(df_NPAS, aes(x = gender, y = Nerdy))
ggplot(df_NPAS, aes(x = gender, y = Nerdy)) +
geom_point()
# because the x-variable is categorical, we might get a better understanding of
# the data with a box plot:
ggplot(df_NPAS, aes(x = gender, y = Nerdy)) +
geom_boxplot()
ggplot(df_NPAS, aes(x = gender, y = Nerdy)) +
geom_boxplot() +
geom_violin()
# Can't really see the boxes! Order of geoms matters!
ggplot(df_NPAS, aes(x = gender, y = Nerdy)) +
geom_violin() +
geom_boxplot(mapping = aes(color = gender), fill = NA)
# Note that both geom_violin and geom_boxplot didn't draw the raw data as is,
# but summarized the data first. In some cases, we might want to summarize the
# data ourselves in some way before plotting (with `group_by() |> summarize()`)
# Example 3 ---------------------------------------------------------------
# What will this draw?
ggplot(df_NPAS, aes(x = age, y = Nerdy)) +
geom_point(mapping = aes(color = gender))
# We can also map color on to a continuous variable:
ggplot(df_NPAS, aes(x = age, y = Nerdy)) +
geom_point(aes(color = TIPI2))
ggplot(df_NPAS, aes(x = age, y = Nerdy)) +
geom_point(aes(color = gender)) +
geom_smooth() # add regression line
ggplot(df_NPAS, aes(x = age, y = Nerdy)) +
geom_point(aes(color = gender)) +
geom_smooth(aes(color = gender), method = "lm")
# we can split into subplots using facets:
ggplot(df_NPAS, aes(x = age, y = Nerdy)) +
geom_point(aes(color = gender)) +
geom_smooth(aes(color = gender), method = "lm") +
facet_grid(rows = vars(urban))
# We can "prettify" the plot with themes, change the "scales", and more...
ggplot(df_NPAS, aes(x = age, y = Nerdy)) +
geom_point(aes(color = gender), alpha = 0.7, shape = 3) +
geom_smooth(aes(color = gender), method = "lm", size = 1.5, fill = "gray") +
facet_grid(
cols = vars(urban),
# We can change the facet labels:
labeller = as_labeller(c(
"0" = "N/A",
"1" = "Rural",
"2" = "Suburban",
"3" = "Urban"
))
) +
# scale_*() functions can be used to control the appearance of different
# scales (x, y, color, fill, size...) - things that we've mapped.
scale_color_manual(
values = c(woman = "red4", man = "steelblue4", other = "purple1"),
labels = c("Woman", "Man", "Other")
) +
coord_cartesian(ylim = c(0, 5)) +
labs(x = "Age [years]", color = "Gender") +
theme_light() +
theme(legend.position = "bottom")
# Learn more about themes here:
# https://tidyverse.org/blog/2025/10/ggplot2-styling/
# and many many more...
# - The basics: https://ggplot2-book.org/
# - Explore options + cheat sheet here: https://ggplot2.tidyverse.org/
# - How to get the plot you want: https://www.r-graph-gallery.com/index.html
# - Learn how to better visualize your data: https://clauswilke.com/dataviz/
#
# ggplot is a powerful tool - with many other packages interfacing or expanding
# on it. We will see some of them later on.
# Take 5 minutes, look at the data - think what you'd like to plot.
# Hebrew / Arabic plots ---------------------------------------------------
# ggplot2 for the most part supports right-to-left (RTL) scripts:
# https://tidyverse.org/blog/2025/01/text-rendering-updates/
# Exporting plots ---------------------------------------------------------
# You "can" export images via the "Export" menu in the "Plots" tab,
# but this save the images in very poor quality.
# for best results, make sure {ragg} is installed:
library(ragg)
p <- ggplot(df_NPAS, aes(x = age, y = Nerdy)) +
geom_point(aes(color = gender))
# As tiff
ggsave(
filename = "p2.tiff",
plot = p,
# - Size -
units = "mm",
width = 480,
height = 300,
# - Resolution -
dpi = 600,
scaling = 1 # play with this one to get it juuuust right
)
# As png
ggsave(
filename = "p2.png",
plot = p,
# - Size -
units = "mm",
width = 480,
height = 300,
# - Resolution -
dpi = 600,
scaling = 1 # play with this one to get it juuuust right
)
# As pdf
ggsave(
filename = "p2.pdf",
plot = p,
# - Size -
units = "mm",
width = 480,
height = 300,
# - Resolution -
dpi = 600,
scale = 1 # play with this one to get it juuuust right
)