|
| 1 | +# Filter *.MATS.JC.txt table (rMATS) for quantified PSIs |
| 2 | +# Filter original table from rMATS to remove events containing NAs in at least one sample and split PSI and Qual tables |
| 3 | +# Normalised inc and exc reads are obtained from "IJC" and "SJC" columns dividing junction counts by IncFormLen and SkipFormLen |
| 4 | +# @param incTable rMATS' *.MATS.JC.txt table |
| 5 | +# |
| 6 | +# @return List with: 1) filtered table PSI columns, 2) filtered table "Qual" columns including inc and exc, 3) table with number of events per type and 4) Samples |
| 7 | +# @export |
| 8 | +# |
| 9 | +# @examples |
| 10 | +filterrMATS <- function(incTable){ |
| 11 | + |
| 12 | + filterRM <- list() |
| 13 | + |
| 14 | + colnames <- colnames(incTable) |
| 15 | + |
| 16 | + # Inspect rMATS colnames to identify type of event: |
| 17 | + # Each event type has its own separate file, and its own set of columns, as described in https://github.com/Xinglab/rmats-turbo/blob/v4.1.2/README.md |
| 18 | + |
| 19 | + if ("exonStart_0base" %in% colnames){ |
| 20 | + eventType <- "EX" |
| 21 | + eventCols <- c("exonStart_0base", "exonEnd", "upstreamES", "upstreamEE", "downstreamES", "downstreamEE") |
| 22 | + # The inclusion form includes the target exon (exonStart_0base, exonEnd) |
| 23 | + eventCoordinates <- paste0(incTable$chr,":",incTable$exonStart_0base,"-",incTable$exonEnd) |
| 24 | + |
| 25 | + # NOTE for "MXE" events: |
| 26 | + # reading tables using read.table(file, sep="\t", header=TRUE, quote=""), which is the case in the app, corrects colnames starting with numbers by adding an X |
| 27 | + } else if ("X1stExonStart_0base" %in% colnames){ |
| 28 | + eventType <- "MXE" |
| 29 | + eventCols <- c("X1stExonStart_0base", "X1stExonEnd", "X2ndExonStart_0base", "X2ndExonEnd", "upstreamES", "upstreamEE", "downstreamES", "downstreamEE") |
| 30 | + # If the strand is +, then the inclusion form includes the 1st exon (1stExonStart_0base, 1stExonEnd) and skips the 2nd exon |
| 31 | + # If the strand is -, then the inclusion form includes the 2nd exon (2ndExonStart_0base, 2ndExonEnd) and skips the 1st exon |
| 32 | + eventCoordinates <- paste0(incTable$chr,":(1st)",incTable$X1stExonStart_0base,"-",incTable$X1stExonEnd,";(2nd)",incTable$X2ndExonStart_0base,"-",incTable$X2ndExonEnd) |
| 33 | + |
| 34 | + } else if ("longExonStart_0base" %in% colnames){ |
| 35 | + eventType <- "Altss" |
| 36 | + eventCols <- c("longExonStart_0base", "longExonEnd", "shortES", "shortEE", "flankingES", "flankingEE") |
| 37 | + # The inclusion form includes the long exon (longExonStart_0base, longExonEnd) instead of the short exon (shortES shortEE) |
| 38 | + eventCoordinates <- paste0(incTable$chr,":",incTable$longExonStart_0base,"-",incTable$longExonEnd) |
| 39 | + |
| 40 | + } else if ("riExonStart_0base" %in% colnames){ |
| 41 | + eventType <- "IR" |
| 42 | + eventCols <- c("riExonStart_0base", "riExonEnd", "upstreamES", "upstreamEE", "downstreamES", "downstreamEE") |
| 43 | + # The inclusion form includes (retains) the intron (upstreamEE, downstreamES) |
| 44 | + eventCoordinates <- paste0(incTable$chr,":",incTable$upstreamEE,"-",incTable$downstreamES) |
| 45 | + } else { |
| 46 | + print("The provided file is not supported") |
| 47 | + } |
| 48 | + |
| 49 | + # Mimicking vast-tools INCLUSION table structure (6 columns for event ID) to facilitate the compatibility with other betAS functions |
| 50 | + # "GENE" | "EVENT" | "COORD" | "LENGTH" | "FullCO" | "COMPLEX" |
| 51 | + commonCols <- cbind(incTable[,c("geneSymbol","ID")], eventCoordinates, rep(0,nrow(incTable)), eventCoordinates, rep(eventType,nrow(incTable))) |
| 52 | + |
| 53 | + # Remove "\"" from some gene symbols (probably not needed if read.table/read.delim is done with quote = "\"") |
| 54 | + commonCols$geneSymbol <- gsub(pattern = "\"", replacement = "", x = commonCols$geneSymbol) |
| 55 | + |
| 56 | + # Infer number of samples from the first row in "SJC columns" by summing 1 to the number of "," |
| 57 | + Nsamples_Group1 <- length(gregexpr(",", incTable$SJC_SAMPLE_1[1], fixed = TRUE)[[1]])+1 |
| 58 | + Nsamples_Group2 <- length(gregexpr(",", incTable$SJC_SAMPLE_2[1], fixed = TRUE)[[1]])+1 |
| 59 | + |
| 60 | + # Name samples from groups 1 and 2 |
| 61 | + Samples_Group1 <- paste0("G1_S",1:Nsamples_Group1) |
| 62 | + Samples_Group2 <- paste0("G2_S",1:Nsamples_Group2) |
| 63 | + |
| 64 | + # psiTable |
| 65 | + psiRM <- cbind(commonCols, |
| 66 | + apply(matrix(unlist(strsplit(incTable$IncLevel1, ",")),ncol=Nsamples_Group1,byrow=T), 2, as.numeric), |
| 67 | + apply(matrix(unlist(strsplit(incTable$IncLevel2, ",")),ncol=Nsamples_Group2,byrow=T), 2, as.numeric)) |
| 68 | + colnames(psiRM) <- c("GENE","EVENT","COORD","LENGTH","FullCO","COMPLEX",Samples_Group1,Samples_Group2) |
| 69 | + |
| 70 | + # qualTable |
| 71 | + inc <- cbind(apply(matrix(unlist(strsplit(incTable$IJC_SAMPLE_1, ",")),ncol=Nsamples_Group1,byrow=T), 2, as.numeric)/incTable$IncFormLen, |
| 72 | + apply(matrix(unlist(strsplit(incTable$IJC_SAMPLE_2, ",")),ncol=Nsamples_Group1,byrow=T), 2, as.numeric)/incTable$IncFormLen) |
| 73 | + exc <- cbind(apply(matrix(unlist(strsplit(incTable$SJC_SAMPLE_1, ",")),ncol=Nsamples_Group1,byrow=T), 2, as.numeric)/incTable$SkipFormLen, |
| 74 | + apply(matrix(unlist(strsplit(incTable$SJC_SAMPLE_2, ",")),ncol=Nsamples_Group1,byrow=T), 2, as.numeric)/incTable$SkipFormLen) |
| 75 | + |
| 76 | + qualRM <- cbind(commonCols, |
| 77 | + # Mimicking vast-tools INCLUSION table ".Q" columns to facilitate the compatibility with other betAS functions |
| 78 | + matrix(paste0("A,A,0=0=0,A,",rep(eventType,nrow(incTable)),"@", inc , ",", exc ), nrow = nrow(inc))) |
| 79 | + colnames(qualRM) <- c("GENE","EVENT","COORD","LENGTH","FullCO","COMPLEX",paste0(Samples_Group1,".Q"),paste0(Samples_Group2,".Q")) |
| 80 | + |
| 81 | + # Remove events containing at least one NA |
| 82 | + psiRM$AnyNA <- apply(psiRM, 1, anyNA) |
| 83 | + psiRM <- psiRM[which(psiRM$AnyNA == FALSE),] |
| 84 | + psiRM <- psiRM[,-c(ncol(psiRM))] |
| 85 | + qualRM <- qualRM[match(psiRM$EVENT, qualRM$EVENT),] |
| 86 | + |
| 87 | + filterRM[[1]] <- psiRM |
| 88 | + filterRM[[2]] <- qualRM |
| 89 | + filterRM[[3]] <- table(psiRM$COMPLEX) |
| 90 | + filterRM[[4]] <- colnames(psiRM)[-c(1:6)] |
| 91 | + |
| 92 | + names(filterRM) <- c("PSI", "Qual", "EventsPerType", "Samples") |
| 93 | + return(filterRM) |
| 94 | + |
| 95 | +} |
| 96 | + |
| 97 | +# Filter PSI table (rMATS) by alternativity |
| 98 | +# Filter previously filtered PSI table from rMATS to consider only events with PSIs between (and including) minPsi and maxPSI. |
| 99 | +# @param filteredRMList List containing PSI and Qual (rMATS) tables, obtained with filterrMATS() |
| 100 | +# @param minPsi (numeric) Minimum PSI to consider |
| 101 | +# @param maxPsi (numeric) Maximum PSI to consider |
| 102 | +# |
| 103 | +# @return List with: 1) filtered table PSI columns, 2) filtered table "Qual" columns, including inc and exc, 3) table with number of events per type and 4) Samples |
| 104 | +# @export |
| 105 | +# |
| 106 | +# @examples |
| 107 | +alternativerMATS <- function(filteredRMList, minPsi, maxPsi){ |
| 108 | + |
| 109 | + alternativeRM <- list() |
| 110 | + |
| 111 | + psiTable <- filteredRMList$PSI |
| 112 | + qualTable <- filteredRMList$Qual |
| 113 | + |
| 114 | + originalColN <- ncol(psiTable) |
| 115 | + |
| 116 | + # Consider alternative events only |
| 117 | + psiTable$AllGreaterMin <- apply(psiTable[,-c(1:6)], 1, all_grteq_row, minPsi) |
| 118 | + psiTable$AllLowerMax <- apply(psiTable[,-c(1:6)], 1, all_lweq_row, maxPsi) |
| 119 | + psiTable <- psiTable[which(psiTable$AllGreaterMin == TRUE & psiTable$AllLowerMax == TRUE),] |
| 120 | + qualTable <- qualTable[match(psiTable$EVENT, qualTable$EVENT),] |
| 121 | + |
| 122 | + # Remove columns added |
| 123 | + psiTable <- psiTable[,c(1:originalColN)] |
| 124 | + qualTable <- qualTable[,c(1:originalColN)] |
| 125 | + |
| 126 | + alternativeRM[[1]] <- psiTable |
| 127 | + alternativeRM[[2]] <- qualTable |
| 128 | + alternativeRM[[3]] <- table(psiTable$COMPLEX) |
| 129 | + alternativeRM[[4]] <- colnames(psiTable)[-c(1:6)] |
| 130 | + |
| 131 | + names(alternativeRM) <- c("PSI", "Qual", "EventsPerType", "Samples") |
| 132 | + |
| 133 | + return(alternativeRM) |
| 134 | + |
| 135 | +} |
0 commit comments