Skip to content

Commit b943ee4

Browse files
rMATS import (incomplete)
1 parent 03f0de9 commit b943ee4

8 files changed

Lines changed: 43259 additions & 10 deletions

File tree

R/betASapp.R

Lines changed: 96 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
betASapp_ui <- function(){
88
# :::: Variables ::::
99
# tools <- c("vast-tools", "MISO", "SUPPA", "Other")
10-
availabletools <- c("vast-tools")
10+
availabletools <- c("vast-tools", "rMATS")
1111
yAxisStats <- c("Pdiff (probability of differential splicing)", "F-statistic (median(|between|)/median(|within|))", "False discovery rate (FDR)")
1212
yAxisStats_multiple <- c("Pdiff (probability that |between| > |within|)", "F-statistic (median(|between|)/median(|within|))")
1313
eventTypes <- c("Exon skipping (ES)"="EX", "Intron retention (IR)"="IR", "Alternative splice site (Altss)"="Altss")
@@ -70,7 +70,7 @@ betASapp_ui <- function(){
7070
"<a href='", "https://www.ebi.ac.uk/arrayexpress/experiments/E-MTAB-6814/",
7171
"'>Human RNA-seq time-series of the development of seven major organs</a></p>")),
7272

73-
helpText("(betAS currently supports inclusion level tables from: vast-tools)"),
73+
helpText("(betAS currently supports inclusion level tables from: vast-tools and rMATS (*.MATS.JC.txt tables))"),
7474
radioButtons("sourcetool", label = "Table source:", choices = availabletools),
7575

7676
h5("Filter events from loaded table:"),
@@ -344,6 +344,12 @@ betASapp_server <- function(){
344344

345345
testTable <- readRDS(file = "test/INCLUSION_LEVELS_FULL-hg19-98-v251.rds")
346346

347+
if(input$sourcetool == "rMATS"){
348+
349+
testTable <- read.delim(file = "test/SE.MATS.JC.txt")
350+
351+
}
352+
347353
return(testTable)
348354

349355
}
@@ -394,6 +400,17 @@ betASapp_server <- function(){
394400

395401
})
396402

403+
filterRMATSTable <- reactive({
404+
405+
if(input$sourcetool == "rMATS"){
406+
407+
filteredList <- filterrMATS(dataset())
408+
return(filteredList)
409+
410+
}
411+
412+
})
413+
397414
selectAlternatives <- reactive({
398415

399416
alternativeList <- alternativeVastTools(req(filterVastToolsTable()), minPsi = input$psirange[1], maxPsi = input$psirange[2])
@@ -412,24 +429,94 @@ betASapp_server <- function(){
412429

413430
})
414431

432+
selectAlternativesRM <- reactive({
433+
434+
alternativeList <- alternativerMATS(req(filterRMATSTable()), minPsi = input$psirange[1], maxPsi = input$psirange[2])
435+
436+
if(nrow(alternativeList$PSI) == 0){
437+
438+
showNotification("There are no events with PSI values within such range.",
439+
closeButton = TRUE,
440+
duration = 5,
441+
type = c("error"))
442+
return(NULL)
443+
444+
}
445+
446+
return(alternativeList)
447+
448+
})
449+
415450
# create a reactive expression
416451
psidataset <- reactive({
417-
return(filterVastToolsTable()$PSI)
452+
453+
if(input$sourcetool == "vast-tools"){
454+
455+
return(filterVastToolsTable()$PSI)
456+
457+
}
458+
459+
if(input$sourcetool == "rMATS"){
460+
461+
return(filterRMATSTable()$PSI)
462+
463+
}
464+
418465
})
419466

420467
qualdataset <- reactive({
421-
return(filterVastToolsTable()$Qual)
468+
469+
if(input$sourcetool == "vast-tools"){
470+
471+
return(filterVastToolsTable()$Qual)
472+
473+
}
474+
475+
if(input$sourcetool == "rMATS"){
476+
477+
return(filterRMATSTable()$Qual)
478+
479+
}
480+
422481
})
423482

424483
# create a reactive expression
425484
psifiltdataset <- reactive({
426-
req(selectAlternatives())
427-
return(selectAlternatives()$PSI)
485+
486+
if(input$sourcetool == "vast-tools"){
487+
488+
req(selectAlternatives())
489+
return(selectAlternatives()$PSI)
490+
491+
}
492+
493+
if(input$sourcetool == "rMATS"){
494+
495+
req(selectAlternativesRM())
496+
return(selectAlternativesRM()$PSI)
497+
498+
}
499+
500+
428501
})
429502

430503
qualfiltdataset <- reactive({
431-
req(selectAlternatives())
432-
return(selectAlternatives()$Qual)
504+
505+
if(input$sourcetool == "vast-tools"){
506+
507+
req(selectAlternatives())
508+
return(selectAlternatives()$Qual)
509+
510+
}
511+
512+
if(input$sourcetool == "rMATS"){
513+
514+
req(selectAlternativesRM())
515+
return(selectAlternativesRM()$Qual)
516+
517+
}
518+
519+
433520
})
434521

435522
eventNumber <- reactive({
@@ -438,7 +525,7 @@ betASapp_server <- function(){
438525

439526
output$textTotalNumberEvents <- renderText({
440527

441-
req(selectAlternatives())
528+
# req(selectAlternatives())
442529

443530
paste0("You have selected ", eventNumber(), " events")
444531

R/processVastTools.R

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ VT_all_minVLOW_tags <- function(quals){
2222

2323
}
2424

25-
2625
# Filter INCLUSION table (vast-tools) for quality and event type
2726
# Filter original table from vast-tools to remove events containing NAs in at least one sample and those that do not have minimal coverage based on VT_all_minVLOW_tags() and split PSI and Qual tables
2827
# @param incTable vast-tools' INCLUSION table

R/processrMATS.R

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
# Filter *.MATS.JC.txt table (rMATS) for quantified PSIs
2+
# Filter original table from rMATS to remove events containing NAs in at least one sample and split PSI and Qual tables
3+
# Normalised inc and exc reads are obtained from "IJC" and "SJC" columns dividing junction counts by IncFormLen and SkipFormLen
4+
# @param incTable rMATS' *.MATS.JC.txt table
5+
#
6+
# @return List with: 1) filtered table PSI columns, 2) filtered table "Qual" columns including inc and exc, 3) table with number of events per type and 4) Samples
7+
# @export
8+
#
9+
# @examples
10+
filterrMATS <- function(incTable){
11+
12+
filterRM <- list()
13+
14+
colnames <- colnames(incTable)
15+
16+
# Inspect rMATS colnames to identify type of event:
17+
# Each event type has its own separate file, and its own set of columns, as described in https://github.com/Xinglab/rmats-turbo/blob/v4.1.2/README.md
18+
19+
if ("exonStart_0base" %in% colnames){
20+
eventType <- "EX"
21+
eventCols <- c("exonStart_0base", "exonEnd", "upstreamES", "upstreamEE", "downstreamES", "downstreamEE")
22+
# The inclusion form includes the target exon (exonStart_0base, exonEnd)
23+
eventCoordinates <- paste0(incTable$chr,":",incTable$exonStart_0base,"-",incTable$exonEnd)
24+
25+
# NOTE for "MXE" events:
26+
# reading tables using read.table(file, sep="\t", header=TRUE, quote=""), which is the case in the app, corrects colnames starting with numbers by adding an X
27+
} else if ("X1stExonStart_0base" %in% colnames){
28+
eventType <- "MXE"
29+
eventCols <- c("X1stExonStart_0base", "X1stExonEnd", "X2ndExonStart_0base", "X2ndExonEnd", "upstreamES", "upstreamEE", "downstreamES", "downstreamEE")
30+
# If the strand is +, then the inclusion form includes the 1st exon (1stExonStart_0base, 1stExonEnd) and skips the 2nd exon
31+
# If the strand is -, then the inclusion form includes the 2nd exon (2ndExonStart_0base, 2ndExonEnd) and skips the 1st exon
32+
eventCoordinates <- paste0(incTable$chr,":(1st)",incTable$X1stExonStart_0base,"-",incTable$X1stExonEnd,";(2nd)",incTable$X2ndExonStart_0base,"-",incTable$X2ndExonEnd)
33+
34+
} else if ("longExonStart_0base" %in% colnames){
35+
eventType <- "Altss"
36+
eventCols <- c("longExonStart_0base", "longExonEnd", "shortES", "shortEE", "flankingES", "flankingEE")
37+
# The inclusion form includes the long exon (longExonStart_0base, longExonEnd) instead of the short exon (shortES shortEE)
38+
eventCoordinates <- paste0(incTable$chr,":",incTable$longExonStart_0base,"-",incTable$longExonEnd)
39+
40+
} else if ("riExonStart_0base" %in% colnames){
41+
eventType <- "IR"
42+
eventCols <- c("riExonStart_0base", "riExonEnd", "upstreamES", "upstreamEE", "downstreamES", "downstreamEE")
43+
# The inclusion form includes (retains) the intron (upstreamEE, downstreamES)
44+
eventCoordinates <- paste0(incTable$chr,":",incTable$upstreamEE,"-",incTable$downstreamES)
45+
} else {
46+
print("The provided file is not supported")
47+
}
48+
49+
# Mimicking vast-tools INCLUSION table structure (6 columns for event ID) to facilitate the compatibility with other betAS functions
50+
# "GENE" | "EVENT" | "COORD" | "LENGTH" | "FullCO" | "COMPLEX"
51+
commonCols <- cbind(incTable[,c("geneSymbol","ID")], eventCoordinates, rep(0,nrow(incTable)), eventCoordinates, rep(eventType,nrow(incTable)))
52+
53+
# Remove "\"" from some gene symbols (probably not needed if read.table/read.delim is done with quote = "\"")
54+
commonCols$geneSymbol <- gsub(pattern = "\"", replacement = "", x = commonCols$geneSymbol)
55+
56+
# Infer number of samples from the first row in "SJC columns" by summing 1 to the number of ","
57+
Nsamples_Group1 <- length(gregexpr(",", incTable$SJC_SAMPLE_1[1], fixed = TRUE)[[1]])+1
58+
Nsamples_Group2 <- length(gregexpr(",", incTable$SJC_SAMPLE_2[1], fixed = TRUE)[[1]])+1
59+
60+
# Name samples from groups 1 and 2
61+
Samples_Group1 <- paste0("G1_S",1:Nsamples_Group1)
62+
Samples_Group2 <- paste0("G2_S",1:Nsamples_Group2)
63+
64+
# psiTable
65+
psiRM <- cbind(commonCols,
66+
apply(matrix(unlist(strsplit(incTable$IncLevel1, ",")),ncol=Nsamples_Group1,byrow=T), 2, as.numeric),
67+
apply(matrix(unlist(strsplit(incTable$IncLevel2, ",")),ncol=Nsamples_Group2,byrow=T), 2, as.numeric))
68+
colnames(psiRM) <- c("GENE","EVENT","COORD","LENGTH","FullCO","COMPLEX",Samples_Group1,Samples_Group2)
69+
70+
# qualTable
71+
inc <- cbind(apply(matrix(unlist(strsplit(incTable$IJC_SAMPLE_1, ",")),ncol=Nsamples_Group1,byrow=T), 2, as.numeric)/incTable$IncFormLen,
72+
apply(matrix(unlist(strsplit(incTable$IJC_SAMPLE_2, ",")),ncol=Nsamples_Group1,byrow=T), 2, as.numeric)/incTable$IncFormLen)
73+
exc <- cbind(apply(matrix(unlist(strsplit(incTable$SJC_SAMPLE_1, ",")),ncol=Nsamples_Group1,byrow=T), 2, as.numeric)/incTable$SkipFormLen,
74+
apply(matrix(unlist(strsplit(incTable$SJC_SAMPLE_2, ",")),ncol=Nsamples_Group1,byrow=T), 2, as.numeric)/incTable$SkipFormLen)
75+
76+
qualRM <- cbind(commonCols,
77+
# Mimicking vast-tools INCLUSION table ".Q" columns to facilitate the compatibility with other betAS functions
78+
matrix(paste0("A,A,0=0=0,A,",rep(eventType,nrow(incTable)),"@", inc , ",", exc ), nrow = nrow(inc)))
79+
colnames(qualRM) <- c("GENE","EVENT","COORD","LENGTH","FullCO","COMPLEX",paste0(Samples_Group1,".Q"),paste0(Samples_Group2,".Q"))
80+
81+
# Remove events containing at least one NA
82+
psiRM$AnyNA <- apply(psiRM, 1, anyNA)
83+
psiRM <- psiRM[which(psiRM$AnyNA == FALSE),]
84+
psiRM <- psiRM[,-c(ncol(psiRM))]
85+
qualRM <- qualRM[match(psiRM$EVENT, qualRM$EVENT),]
86+
87+
filterRM[[1]] <- psiRM
88+
filterRM[[2]] <- qualRM
89+
filterRM[[3]] <- table(psiRM$COMPLEX)
90+
filterRM[[4]] <- colnames(psiRM)[-c(1:6)]
91+
92+
names(filterRM) <- c("PSI", "Qual", "EventsPerType", "Samples")
93+
return(filterRM)
94+
95+
}
96+
97+
# Filter PSI table (rMATS) by alternativity
98+
# Filter previously filtered PSI table from rMATS to consider only events with PSIs between (and including) minPsi and maxPSI.
99+
# @param filteredRMList List containing PSI and Qual (rMATS) tables, obtained with filterrMATS()
100+
# @param minPsi (numeric) Minimum PSI to consider
101+
# @param maxPsi (numeric) Maximum PSI to consider
102+
#
103+
# @return List with: 1) filtered table PSI columns, 2) filtered table "Qual" columns, including inc and exc, 3) table with number of events per type and 4) Samples
104+
# @export
105+
#
106+
# @examples
107+
alternativerMATS <- function(filteredRMList, minPsi, maxPsi){
108+
109+
alternativeRM <- list()
110+
111+
psiTable <- filteredRMList$PSI
112+
qualTable <- filteredRMList$Qual
113+
114+
originalColN <- ncol(psiTable)
115+
116+
# Consider alternative events only
117+
psiTable$AllGreaterMin <- apply(psiTable[,-c(1:6)], 1, all_grteq_row, minPsi)
118+
psiTable$AllLowerMax <- apply(psiTable[,-c(1:6)], 1, all_lweq_row, maxPsi)
119+
psiTable <- psiTable[which(psiTable$AllGreaterMin == TRUE & psiTable$AllLowerMax == TRUE),]
120+
qualTable <- qualTable[match(psiTable$EVENT, qualTable$EVENT),]
121+
122+
# Remove columns added
123+
psiTable <- psiTable[,c(1:originalColN)]
124+
qualTable <- qualTable[,c(1:originalColN)]
125+
126+
alternativeRM[[1]] <- psiTable
127+
alternativeRM[[2]] <- qualTable
128+
alternativeRM[[3]] <- table(psiTable$COMPLEX)
129+
alternativeRM[[4]] <- colnames(psiTable)[-c(1:6)]
130+
131+
names(alternativeRM) <- c("PSI", "Qual", "EventsPerType", "Samples")
132+
133+
return(alternativeRM)
134+
135+
}

0 commit comments

Comments
 (0)