-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathAverageTechReplicates.R
More file actions
218 lines (200 loc) · 7.95 KB
/
Copy pathAverageTechReplicates.R
File metadata and controls
218 lines (200 loc) · 7.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
# adapted from 3-AverageTechReplicates.R
# load packages
library("argparse")
library("ggplot2")
library("gridExtra")
parser <- ArgumentParser(description = "AverageTechReplicates")
parser$add_argument("--init_filepath", dest = "init_file",
help = "File path for the init RData file", required = TRUE)
parser$add_argument("-n", "--nr_replicates", dest = "nr_replicates", type = "integer",
help = "Number of replicates", required = TRUE)
parser$add_argument("--run_name", dest = "run_name",
help = "The run name/analysis ID", required = TRUE)
parser$add_argument("--matrix", dest = "dims_matrix",
help = "The matrix used, e.g. Plasma, Research, ...")
parser$add_argument("--highest_mz_file", dest = "highest_mz_file",
help = "File path for the highest Mz RData file", required = TRUE)
parser$add_argument("--breaks_filepath", dest = "breaks_filepath",
help = "File path for the breaks RData file", required = TRUE)
args <- parser$parse_args()
highest_mz <- get(load(args$highest_mz_file))
thresh2remove <- 1000000000
remove_from_repl_pattern <- function(bad_samples, repl_pattern, nr_replicates) {
# collect list of samples to remove from replication pattern
remove_from_group <- NULL
for (sample_nr in seq_along(repl_pattern)){
repl_pattern_1sample <- repl_pattern[[sample_nr]]
remove <- NULL
for (file_nr in seq_along(repl_pattern_1sample)) {
if (repl_pattern_1sample[file_nr] %in% bad_samples) {
remove <- c(remove, file_nr)
}
}
if (length(remove) == nr_replicates) {
remove_from_group <- c(remove_from_group, sample_nr)
}
if (!is.null(remove)) {
repl_pattern[[sample_nr]] <- repl_pattern[[sample_nr]][-remove]
}
}
if (length(remove_from_group) != 0) {
repl_pattern <- repl_pattern[-remove_from_group]
}
return(list("pattern" = repl_pattern))
}
# load init_file: contains repl_pattern
load(args$init_file)
# load breaks_file: contains breaks_fwhm, breaks_fwhm_avg,
# trim_left_neg, trim_left_pos, trim_right_neg & trim_right_pos
load(args$breaks_filepath)
# lower the threshold for non Plasma matrices
if (dims_matrix != "Plasma") {
thresh2remove <- 1000000000
}
# lower the threshold for DBS or high m/z specific
if (dims_matrix == "DBS") {
thresh2remove <- 500000000
}
if (highest_mz > 700) {
thresh2remove <- 1000000
}
# remove technical replicates which are below the threshold
remove_neg <- NULL
remove_pos <- NULL
cat("Pklist sum threshold to remove technical replicate:", thresh2remove, "\n")
for (sample_nr in seq_along(repl_pattern)) {
tech_reps <- as.vector(unlist(repl_pattern[sample_nr]))
tech_reps_array_pos <- NULL
tech_reps_array_neg <- NULL
sum_neg <- 0
sum_pos <- 0
nr_pos <- 0
nr_neg <- 0
for (file_nr in seq_along(tech_reps)) {
load(paste(tech_reps[file_nr], ".RData", sep = ""))
cat("\n\nParsing", tech_reps[file_nr])
# negative scanmode
cat("\n\tNegative peak_list sum", sum(peak_list$neg[, 1]))
if (sum(peak_list$neg[, 1]) < thresh2remove) {
cat(" ... Removed")
remove_neg <- c(remove_neg, tech_reps[file_nr])
} else {
nr_neg <- nr_neg + 1
sum_neg <- sum_neg + peak_list$neg
}
tech_reps_array_neg <- cbind(tech_reps_array_neg, peak_list$neg)
# positive scanmode
cat("\n\tPositive peak_list sum", sum(peak_list$pos[, 1]))
if (sum(peak_list$pos[, 1]) < thresh2remove) {
cat(" ... Removed")
remove_pos <- c(remove_pos, tech_reps[file_nr])
} else {
nr_pos <- nr_pos + 1
sum_pos <- sum_pos + peak_list$pos
}
tech_reps_array_pos <- cbind(tech_reps_array_pos, peak_list$pos)
}
# save to file
if (nr_neg != 0) {
sum_neg[, 1] <- sum_neg[, 1] / nr_neg
colnames(sum_neg) <- names(repl_pattern)[sample_nr]
save(sum_neg, file = paste0(names(repl_pattern)[sample_nr], "_neg_avg.RData"))
}
if (nr_pos != 0) {
sum_pos[, 1] <- sum_pos[, 1] / nr_pos
colnames(sum_pos) <- names(repl_pattern)[sample_nr]
save(sum_pos, file = paste0(names(repl_pattern)[sample_nr], "_pos_avg.RData"))
}
}
pattern_list <- remove_from_repl_pattern(remove_neg, repl_pattern, args$nr_replicates)
repl_pattern_filtered <- pattern_list$pattern
save(repl_pattern_filtered, file = "negative_repl_pattern.RData")
write.table(
remove_neg,
file = "miss_infusions_negative.txt",
row.names = FALSE,
col.names = FALSE,
sep = "\t"
)
pattern_list <- remove_from_repl_pattern(remove_pos, repl_pattern, args$nr_replicates)
repl_pattern_filtered <- pattern_list$pattern
save(repl_pattern_filtered, file = "positive_repl_pattern.RData")
write.table(
remove_pos,
file = "miss_infusions_positive.txt",
row.names = FALSE,
col.names = FALSE,
sep = "\t"
)
## generate TIC plots
# get all txt files
tic_files <- list.files("./", full.names = TRUE, pattern = "*TIC.txt")
all_samps <- sub("_TIC\\..*$", "", basename(tic_files))
# determine maximum intensity
highest_tic_max <- 0
for (file in tic_files) {
tic <- read.table(file)
this_tic_max <- max(tic$tic_intensity)
if (this_tic_max > highest_tic_max) {
highest_tic_max <- this_tic_max
max_sample <- sub("_TIC\\..*$", "", basename(file))
}
}
# create a list with information for all TIC plots
tic_plot_list <- list()
plot_nr <- 0
for (sample_nr in seq_along(repl_pattern)) {
tech_reps <- as.vector(unlist(repl_pattern[sample_nr]))
sample_name <- names(repl_pattern)[sample_nr]
for (file_nr in seq_along(tech_reps)) {
plot_nr <- plot_nr + 1
# read file with retention time, intensity and dims_threshold values
repl1_nr <- read.table(paste0(tech_reps[file_nr], "_TIC.txt"))
# get threshold values per technical replicate
dims_thresh_pos <- repl1_nr[1, "threshold"]
dims_thresh_neg <- repl1_nr[nrow(repl1_nr), "threshold"]
# for replicates with bad TIC, determine what color the border of the plot should be
bad_color_pos <- tech_reps[file_nr] %in% remove_pos
bad_color_neg <- tech_reps[file_nr] %in% remove_neg
if (bad_color_neg && bad_color_pos) {
plot_color <- "#F8766D"
} else if (bad_color_pos) {
plot_color <- "#ED8141"
} else if (bad_color_neg) {
plot_color <- "#BF80FF"
} else {
plot_color <- "white"
}
tic_plot <- ggplot(repl1_nr, aes(retention_time, tic_intensity)) +
geom_line(linewidth = 0.3) +
geom_hline(yintercept = highest_tic_max, col = "grey", linetype = 2, linewidth = 0.3) +
geom_vline(xintercept = trim_left_pos, col = "red", linetype = 2, linewidth = 0.3) +
geom_vline(xintercept = trim_right_pos, col = "red", linetype = 2, linewidth = 0.3) +
geom_vline(xintercept = trim_left_neg, col = "red", linetype = 2, linewidth = 0.3) +
geom_vline(xintercept = trim_right_neg, col = "red", linetype = 2, linewidth = 0.3) +
geom_segment(x = trim_left_pos, y = dims_thresh_pos, xend = trim_right_pos, yend = dims_thresh_pos, colour = "green", lty = 2) +
geom_segment(x = trim_left_neg, y = dims_thresh_neg, xend = trim_right_neg, yend = dims_thresh_neg, colour = "blue", lty = 2) +
labs(x = "t (s)", y = "tic_intensity", title = paste0(tech_reps[file_nr], " || ", sample_name)) +
theme(plot.background = element_rect(fill = plot_color),
axis.text = element_text(size = 4),
axis.title = element_text(size = 4),
plot.title = element_text(size = 6))
tic_plot_list[[plot_nr]] <- tic_plot
}
}
# create a layout matrix dependent on number of replicates
layout <- matrix(1:(10 * args$nr_replicates), 10, args$nr_replicates, TRUE)
# put TIC plots in matrix
tic_plot_pdf <- marrangeGrob(
grobs = tic_plot_list,
nrow = 10, ncol = args$nr_replicates,
layout_matrix = layout,
top = quote(paste(
"TICs of run", args$run_name,
" \n colors: red = both modes misinjection, orange = pos mode misinjection, purple = neg mode misinjection \n ",
g, "/", npages
))
)
# save to file
ggsave(filename = paste0(args$run_name, "_TICplots.pdf"),
tic_plot_pdf, width = 21, height = 29.7, units = "cm")