-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathpenguins.R
More file actions
74 lines (68 loc) · 2.13 KB
/
penguins.R
File metadata and controls
74 lines (68 loc) · 2.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# 1. Inspect the data structure
# str() shows each column’s type, length, and example values—
# crucial for spotting factors, numerics, and NAs before analysis.
str(penguins)
# 2. Summary statistics
# summary() dispatches to methods per class:
# for numerics it gives Min/Max/Median/Mean/Quartiles; for factors, counts.
# This gives a rapid univariate overview to guide deeper plots or tests.
summary(penguins)
# 3. Histogram of flipper length
# hist() bins the data, showing skewness or multimodality.
# Choosing ~20 breaks balances detail vs. noise.
hist(
penguins$flipper_len,
breaks = 20,
main = "Flipper Length Distribution",
xlab = "Flipper length (mm)",
ylab = "Count"
)
# 4. Boxplot of body mass by species
# boxplot() compares medians, IQRs, and potential outliers for each species.
# Great for group-wise spread and center comparisons.
boxplot(
body_mass ~ species,
data = penguins,
main = "Body Mass by Penguin Species",
xlab = "Species",
ylab = "Body mass (g)"
)
# 5. Scatterplot of bill length vs. depth colored by species
# plot() with `col=` uses integer factor codes to color points.
# legend() maps colors back to species levels—key for identifying clusters.
with(penguins, {
cols <- as.numeric(species)
plot(
bill_len,
bill_dep,
pch = 16,
col = cols,
xlab = "Bill length (mm)",
ylab = "Bill depth (mm)",
main = "Bill Dimensions by Species"
)
legend(
"topright",
legend = levels(species),
col = seq_along(levels(species)),
pch = 16
)
})
# 6. Fit a simple linear model: bill depth ~ bill length
# lm() estimates intercept and slope; summary() shows R² and p-values—
# quantifies strength and significance of the linear relation.
fit <- lm(bill_dep ~ bill_len, data = penguins)
summary(fit)
# 7. Add regression line to the scatterplot
# abline() draws the fitted line on existing plot—helps assess fit visually.
with(penguins, {
plot(
bill_len,
bill_dep,
pch = 16,
xlab = "Bill length (mm)",
ylab = "Bill depth (mm)",
main = "Regression of Bill Depth on Length"
)
abline(fit) # overlay best-fit line
})