forked from rdpeng/RepData_PeerAssessment1
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathanalysis.R
More file actions
87 lines (65 loc) · 2.57 KB
/
analysis.R
File metadata and controls
87 lines (65 loc) · 2.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# setwd("~/Documents/github_repositories/reproducible_research/project_1")
library('lubridate')
library('ggplot2')
library('dplyr')
library('knitr')
library('markdown')
data <- read.csv('activity.csv')
## part 1: plot steps per day ...
data$date <- as.POSIXct(data$date)
plot1 <- ggplot(data, aes(x = date, y = steps)) +
geom_histogram(stat = 'identity') +
labs(title = "Steps Per Day", x = 'Date', y = 'Number of Steps') +
scale_x_datetime()
plot1 ## plot
## rearrange data for calculations
tot.steps.day <- data %>%
group_by(date) %>%
summarise(tot.steps = sum(steps))
## mean calculation
mean.steps.day <- mean(tot.steps.day$tot.steps, na.rm = TRUE)
## median calculation
median.steps.day <- median(tot.steps.day$tot.steps, na.rm = TRUE)
int.mean <- data %>%
group_by(interval) %>%
summarise(mean.steps = mean(steps, na.rm = TRUE))
plot2 <- ggplot(int.mean, aes(x = interval, y = mean.steps)) +
theme(legend.position = 'none') +
geom_line(aes(color = mean.steps), size = 1.4) +
geom_hline(aes(yintercept = mean(int.mean$mean.steps))) +
annotate('text', x = 4, y = 45, label = 'mean') +
labs(title = 'Mean Steps Per Interval') +
plot2
## interval with max mean steps per day
max.int.steps <-filter(int.mean, mean.steps == max(mean.steps))
## part 3
inc.cases <- sum(!complete.cases(data))
data3 <- data
data3 <- left_join(data3, int.mean, by = 'interval')
data3$steps[which(is.na(data3$steps))] <- data3$mean.steps[is.na(data3$steps)]
plot4 <- ggplot(data3, aes(x = date, y = steps)) +
geom_histogram(stat = 'identity') +
labs(title = "Steps Per Day", x = 'Date', y = 'Number of Steps') +
scale_x_datetime()
plot4
tot.steps.day.data3 <- data3 %>%
group_by(date) %>%
summarise(tot.steps = sum(steps))
imp.mean <- mean(tot.steps.day.data3$tot.steps)
imp.median <-median(tot.steps.day.data3$tot.steps)
data.steps.sum <- sum(data$steps, na.rm = TRUE)
data3.steps.sum <- sum(data3$steps)
## Minimal change after imputing NA values. This could be a result of my imputation method.
## part 4
data4 <- data3
data4$wday <- wday(data4$date)
data4$week.cat <- NA
data4$week.cat[which(data4$wday > 5)] <- 'weekend'
data4$week.cat[is.na(data4$week.cat)] <- 'weekday'
data4$week.cat <- as.factor(data4$week.cat)
int.data4 <- data4 %>%
group_by(week.cat, interval) %>%
summarise(mean.steps = mean(steps))
plot5 <- ggplot(int.data4, aes(x = interval, y = mean.steps, group = week.cat)) +
geom_line() + facet_wrap(~week.cat, ncol = 1) +
labs(title = 'Weekday vs weekend: mean steps per internval', x = 'Interval', y = 'Mean steps')