-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path02_item_analysis.R
More file actions
106 lines (86 loc) · 3.61 KB
/
02_item_analysis.R
File metadata and controls
106 lines (86 loc) · 3.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
### Item Analysis
### Descriptive item statistics, item-total correlations, item histograms,
### difficulty-discrimination plots
if (!exists("itemdata")) source("01_data_preparation.R")
library(ggplot2)
library(ggrepel)
library(scales)
### ── Item statistics ──────────────────────────────────────────────────────────
itemstatistics <- (itemdata
%>% left_join(iteminfo, by = "itemid")
%>% group_by(userID, dimid)
%>% mutate(dimvalue = sum(value))
%>% ungroup()
%>% group_by(itemid)
%>% summarise(
N = n(),
M = mean(value),
SD = sd(value),
MIN = min(value),
MAX = max(value),
TotCorr = cor(value, dimvalue - value),
.groups = "drop"
)
%>% mutate(P = M / MAX)
)
cat("Item statistics computed.\n")
print(as.data.frame(itemstatistics), row.names = FALSE)
### ── Item histograms ──────────────────────────────────────────────────────────
itemhistvalues <- (itemdata
%>% group_by(itemid, value)
%>% summarise(N = n(), .groups = "drop_last")
%>% mutate(rel = N / sum(N))
%>% ungroup()
)
for (currentitemid in unique(itemdata$itemid)) {
currentitemvalues <- itemhistvalues %>% dplyr::filter(itemid == currentitemid)
itemcontent <- iteminfo[iteminfo$itemid == currentitemid, ]$text
currentitemplot <- ggplot(data = currentitemvalues) +
geom_bar(stat = "identity", aes(x = value, y = rel), fill = "#161d9e") +
theme_bw() +
labs(x = "Response", y = "Relative Frequency") +
scale_y_continuous(labels = function(x) paste0(x * 100, "%")) +
scale_x_continuous(breaks = 1:5, labels = c(
"1\nDisagree", "2", "3\nNeutral", "4", "5\nAgree")) +
ggtitle(paste0(currentitemid, ": ", itemcontent))
currentitemplot %>% ggsave(
filename = file.path(outputpath, "itemstatistics", paste0("hist-", currentitemid, ".png")))
}
cat("Item histograms saved.\n")
### ── Difficulty-discrimination plots (per dimension) ──────────────────────────
### quadratic regression helper for P-TotCorr curve
quadY <- function(a, xvalues) {
a * xvalues^2 + (-a) * xvalues
}
quadReg <- function(x, y, amin, amax, h) {
possibleas <- seq(amin, amax, h)
errors <- sapply(possibleas, function(a) {
paraby <- a * x^2 + (-a) * x
mean((y - paraby)^2)
})
possibleas[which.min(errors)]
}
for (currentdim in dimensions) {
currentdimname <- unique(iteminfo$dimname[iteminfo$dimid == currentdim])
currentdimitemdata <- (itemstatistics
%>% left_join(iteminfo, by = "itemid")
%>% dplyr::filter(dimid == currentdim)
%>% select(itemid, P, TotCorr, dimname)
)
paraLine <- data.frame(x = seq(0, 1, 0.01))
paraLine$y <- quadY(
quadReg(currentdimitemdata$P, currentdimitemdata$TotCorr, -4, -0.01, 0.01),
paraLine$x)
currentdimplot <- ggplot(data = currentdimitemdata, aes(x = P, y = TotCorr)) +
geom_line(data = paraLine, aes(x = x, y = y), color = "green") +
geom_point(size = 2) +
theme_bw() +
labs(x = "Item Difficulty (P)", y = "Corrected Item-Total Correlation") +
ggtitle(paste0(currentdimname, " (", currentdim, ")")) +
geom_label_repel(aes(label = itemid), size = 3)
currentdimplot %>% ggsave(
filename = file.path(outputpath, "itemstatistics", paste0("totCorr-", currentdim, ".png")))
currentdimplot %>% ggsave(
filename = file.path(outputpath, "itemstatistics", paste0("totCorr-", currentdim, ".pdf")))
}
cat("Difficulty-discrimination plots saved.\n")