Skip to content

Commit 4f4ed2f

Browse files
jonathangoekeclaude
andcommitted
Merge branch 'devel_pre_v4' into devel_pre_v4_jg
Resolve conflicts across 4 files by taking the devel_pre_v4 refactor (isoreParameters -> opt.discovery/rcAssignmentParameters, demultiplexed -> extractBarcodeUMI; removed sampleNames/cleanReads/barcodesToFilter handling) and porting the applicable JG TODO review comments. Drop the stale setIsoreParameters Pass-2 header and comments that referred to code devel_pre_v4 deleted. Also fix a pre-existing brace mismatch in checkInputs() introduced by 2047190 that prevented the package from parsing. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2 parents 20e409c + 6dcb1d5 commit 4f4ed2f

11 files changed

Lines changed: 248 additions & 279 deletions

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Generated by roxygen2: do not edit by hand
22

33
export(bambu)
4+
export(bambu.singlecell)
45
export(plotBambu)
56
export(prepareAnnotations)
67
export(readFromGTF)

R/bambu-assignDist.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@
66
#' @inheritParams bambu
77
#' @import data.table
88
#' @noRd
9-
assignReadClasstoTranscripts <- function(readClassList, annotations, isoreParameters,
10-
verbose, sampleMetadata, demultiplexed,
9+
assignReadClasstoTranscripts <- function(readClassList, annotations, rcAssignmentParameters,
10+
verbose, sampleMetadata, extractBarcodeUMI,
1111
returnDistTable = FALSE, trackReads = TRUE) {
1212
if (is.character(readClassList)) readClassList <- readRDS(file = readClassList)
13-
metadata(readClassList)$readClassDist <- calculateDistTable(readClassList, annotations, isoreParameters, verbose, returnDistTable)
13+
metadata(readClassList)$readClassDist <- calculateDistTable(readClassList, annotations, rcAssignmentParameters, verbose, returnDistTable)
1414
readClassList <- splitReadClassFiles(readClassList)
1515
readClassDt <- genEquiRCs(metadata(readClassList)$readClassDist, annotations, verbose)
1616
readClassDt$eqClass.match = match(readClassDt$eqClassById,metadata(readClassList)$eqClassById)
@@ -21,7 +21,7 @@ assignReadClasstoTranscripts <- function(readClassList, annotations, isoreParame
2121
mutate(aval = 1) %>%
2222
data.table()
2323
#return non-em counts
24-
ColData <- generateColData(readClassList, sampleMetadata, demultiplexed)
24+
ColData <- generateColData(readClassList, sampleMetadata, extractBarcodeUMI)
2525
quantData <- SummarizedExperiment(assays = SimpleList(
2626
counts = generateUniqueCounts(readClassDt, metadata(readClassList)$countMatrix, annotations)),
2727
rowRanges = annotations,

R/bambu-extendAnnotations-utilityExtend.R

Lines changed: 13 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ filterTranscriptsByAnnotation <- function(rowDataCombined, annotationGrangesList
124124
} else if(is.null(NDR)) {
125125
NDR <- 0.5
126126
}
127-
filterSet <- (rowDataCombined$NDR <= NDR | rowDataCombined$readClassType == "equal:compatible")
127+
filterSet <- ((!is.na(rowDataCombined$NDR) & rowDataCombined$NDR <= NDR) | rowDataCombined$readClassType == "equal:compatible")
128128
lowConfidenceTranscripts <- combindRowDataWithRanges(
129129
rowDataCombined[!filterSet,],
130130
exonRangesCombined[!filterSet])
@@ -248,7 +248,7 @@ calculateNDROnTranscripts <- function(combinedTranscripts, useTxScore = FALSE){
248248
} else {
249249
combinedTranscripts$NDR <- calculateNDR(combinedTranscripts$maxTxScore, equal)
250250
}
251-
combinedTranscripts$NDR[combinedTranscripts$maxTxScore==-1] <- 1
251+
combinedTranscripts$NDR[combinedTranscripts$maxTxScore==-1] <- NA
252252
return(combinedTranscripts)
253253
}
254254

@@ -916,15 +916,14 @@ addGeneIdsToReadClassTable <- function(readClassTable, distTable,
916916
#' @description This function train a model for use on other data
917917
#' @param extendedAnnotations A GRangesList object produced from bambu(quant = FALSE) or rowRanges(se)
918918
#' @param NDR The maximum NDR for novel transcripts to be in extendedAnnotations (0-1). If not provided a recommended NDR is calculated.
919-
#' @param includeRef A boolean which if TRUE will also filter out reference annotations based on their NDR
920919
#' @param prefix A string which determines which transcripts are considered novel by bambu and will be filtered (by default = 'Bambu')
921920
#' @param baselineFDR a value between 0-1. Bambu uses this FDR on the trained model to recommend an equivilent NDR threshold to be used for the sample. By default, a baseline FDR of 0.1 is used. This does not impact the analysis if an NDR is set.
922921
#' @param defaultModels a bambu trained model object that bambu will use when fitReadClassModel==FALSE or the data is not suitable for training, defaults to the pretrained model in the bambu package
923922
#' Output - returns a similiar GRangesList object with entries swapped into or out of metadata(extendedAnnotations)$lowConfidenceTranscripts
924923
#' @details
925924
#' @return extendedAnnotations with a new NDR threshold
926925
#' @export
927-
setNDR <- function(extendedAnnotations, NDR = NULL, includeRef = FALSE, prefix = 'Bambu', baselineFDR = 0.1, defaultModels2 = defaultModels){
926+
setNDR <- function(extendedAnnotations, NDR = NULL, prefix = 'Bambu', baselineFDR = 0.1, defaultModels2 = defaultModels){
928927
#Check to see if the annotations/gtf are dervived from Bambu
929928
if(is.null(mcols(extendedAnnotations)$NDR)){
930929
warning("Annotations were not extended by Bambu (or the wrong prefix was provided). NDR can not be set")
@@ -941,17 +940,10 @@ setNDR <- function(extendedAnnotations, NDR = NULL, includeRef = FALSE, prefix =
941940
message("Recommending a novel discovery rate (NDR) of: ", NDR)
942941
}
943942

944-
#If reference annotations should be filtered too (note that reference annotations with no read support arn't filtered)
945-
if(includeRef){
946-
toRemove <- (!is.na(mcols(extendedAnnotations)$NDR) & mcols(extendedAnnotations)$NDR > NDR)
947-
toAdd <- !is.na(mcols(metadata(extendedAnnotations)$lowConfidenceTranscripts)$NDR) &
948-
mcols(metadata(extendedAnnotations)$lowConfidenceTranscripts)$NDR <= NDR
949-
} else {
950-
toRemove <- (mcols(extendedAnnotations)$NDR > NDR &
951-
grepl(prefix, mcols(extendedAnnotations)$TXNAME))
952-
toAdd <- (mcols(metadata(extendedAnnotations)$lowConfidenceTranscripts)$NDR <= NDR &
953-
grepl(prefix, mcols(metadata(extendedAnnotations)$lowConfidenceTranscripts)$TXNAME))
954-
}
943+
toRemove <- (mcols(extendedAnnotations)$NDR > NDR &
944+
grepl(prefix, mcols(extendedAnnotations)$TXNAME))
945+
toAdd <- (mcols(metadata(extendedAnnotations)$lowConfidenceTranscripts)$NDR <= NDR &
946+
grepl(prefix, mcols(metadata(extendedAnnotations)$lowConfidenceTranscripts)$TXNAME))
955947

956948
# TODO: [POOR NAMING] temp stores annotations being swapped between low/high confidence sets; rename to swappedAnnotations or similar
957949
temp <- c(metadata(extendedAnnotations)$lowConfidenceTranscripts[!toAdd], extendedAnnotations[toRemove])
@@ -974,7 +966,7 @@ setNDR <- function(extendedAnnotations, NDR = NULL, includeRef = FALSE, prefix =
974966
# Call count: 1 call, 1 file
975967
#' Extend annotations by clusters
976968
#' @noRd
977-
isore.extendAnnotations.clusters <- function(readClassList, annotations, clusters, NDR, isoreParameters, stranded, bpParameters, fusionMode, verbose = FALSE){
969+
isore.extendAnnotations.clusters <- function(readClassList, annotations, clusters, NDR, discoveryParameters, stranded, bpParameters, fusionMode, verbose = FALSE){
978970
message("--- Start extending annotations for clusters ---")
979971
#if clustering is a csv, create a list with the barcodes for each cluster
980972
#csv must have two cols with heading barcode, cluster
@@ -992,7 +984,7 @@ isore.extendAnnotations.clusters <- function(readClassList, annotations, cluster
992984
for(i in seq_along(clusters)){
993985
print(names(clusters)[i])
994986
###TODO need to account for the sample name here which is added to the barcode
995-
index <- match(clusters[[i]],gsub('demultiplexed','',metadata(readClassList[[1]])$samples))
987+
index <- match(clusters[[i]],gsub('demultiplexed','',metadata(readClassList[[1]])$samples))
996988
index <- index[!is.na(index)]
997989
print(length(index))
998990
if(length(index)<20) next
@@ -1006,21 +998,21 @@ isore.extendAnnotations.clusters <- function(readClassList, annotations, cluster
1006998
rowData(rcf.filt)$startSD <- 0
1007999
rowData(rcf.filt)$endSD <- 0
10081000
rowData(rcf.filt)$readCount.posStrand <- 0
1009-
thresholdIndex <- which(rowData(rcf.filt)$readCount>=isoreParameters$min.readCount)
1010-
model <- trainBambu(rcf.filt, verbose = verbose, min.readCount = isoreParameters$min.readCount)
1001+
thresholdIndex <- which(rowData(rcf.filt)$readCount>=discoveryParameters$min.readCount)
1002+
model <- trainBambu(rcf.filt, verbose = verbose, min.readCount = discoveryParameters$min.readCount)
10111003
txScore <- getTranscriptScore(rowData(rcf.filt)[thresholdIndex,], model,
10121004
defaultModels)
10131005
rowData(rcf.filt)$txScore <- rep(NA,nrow(rcf.filt))
10141006
rowData(rcf.filt)$txScore[thresholdIndex] <- txScore
10151007
#txScores = cbind(txScores, rowData(rcf.filt)$txScore)
10161008
rcfs.clusters[[names(clusters)[i]]] <- rcf.filt
10171009
annotations.clusters[[names(clusters)[i]]] <- bambu.extendAnnotations(list(rcf.filt), annotations, NDR,
1018-
isoreParameters, stranded, bpParameters, fusionMode, verbose)
1010+
discoveryParameters, stranded, bpParameters, fusionMode, verbose)
10191011
}
10201012
if(length(rcfs.clusters)>0){
10211013
print("--- Merging all individual clusters ---")
10221014
annotations.clusters[["merged"]] <- bambu.extendAnnotations(rcfs.clusters, annotations, NDR,
1023-
isoreParameters, stranded, bpParameters, fusionMode, verbose)
1015+
discoveryParameters, stranded, bpParameters, fusionMode, verbose)
10241016
}
10251017

10261018
return(annotations.clusters)

R/bambu-extendAnnotations.R

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,14 @@
77
#' @inheritParams bambu
88
#' @noRd
99
bambu.extendAnnotations <- function(readClassList, annotations, NDR,
10-
isoreParameters, stranded, bpParameters, fusionMode = FALSE, verbose = FALSE) {
10+
discoveryParameters, stranded, bpParameters, fusionMode = FALSE, verbose = FALSE) {
1111
start.ptm_all <- proc.time()
1212
combinedTxCandidates <- isore.combineTranscriptCandidates(readClassList,
1313
stranded, ## stranded used for unspliced reduce
14-
min.readCount = isoreParameters[["min.readCount"]],
15-
min.readFractionByGene = isoreParameters[["min.readFractionByGene"]],
16-
min.txScore.multiExon = isoreParameters[["min.txScore.multiExon"]],
17-
min.txScore.singleExon = isoreParameters[["min.txScore.singleExon"]],
14+
min.readCount = discoveryParameters[["min.readCount"]],
15+
min.readFractionByGene = discoveryParameters[["min.readFractionByGene"]],
16+
min.txScore.multiExon = discoveryParameters[["min.txScore.multiExon"]],
17+
min.txScore.singleExon = discoveryParameters[["min.txScore.singleExon"]],
1818
bpParameters,
1919
verbose)
2020
end.ptm_all <- proc.time()
@@ -24,21 +24,21 @@ bambu.extendAnnotations <- function(readClassList, annotations, NDR,
2424
annotations <- isore.extendAnnotations(
2525
combinedTranscripts = combinedTxCandidates,
2626
annotationGrangesList = annotations,
27-
remove.subsetTx = isoreParameters[["remove.subsetTx"]],
28-
min.sampleNumber = isoreParameters[["min.sampleNumber"]],
27+
remove.subsetTx = discoveryParameters[["remove.subsetTx"]],
28+
min.sampleNumber = discoveryParameters[["min.sampleNumber"]],
2929
NDR = NDR,
30-
min.exonDistance = isoreParameters[["min.exonDistance"]],
31-
min.exonOverlap = isoreParameters[["min.exonOverlap"]],
30+
min.exonDistance = discoveryParameters[["min.exonDistance"]],
31+
min.exonOverlap = discoveryParameters[["min.exonOverlap"]],
3232
min.primarySecondaryDist =
33-
isoreParameters[['min.primarySecondaryDist']],
33+
discoveryParameters[['min.primarySecondaryDist']],
3434
min.primarySecondaryDistStartEnd =
35-
isoreParameters[['min.primarySecondaryDistStartEnd1']],
35+
discoveryParameters[['min.primarySecondaryDistStartEnd1']],
3636
min.readFractionByEqClass =
37-
isoreParameters[['min.readFractionByEqClass']],
37+
discoveryParameters[['min.readFractionByEqClass']],
3838
fusionMode = fusionMode,
39-
prefix = isoreParameters[["prefix"]],
40-
baselineFDR = isoreParameters[["baselineFDR"]],
41-
defaultModels = isoreParameters[["defaultModels"]],
39+
prefix = discoveryParameters[["prefix"]],
40+
baselineFDR = discoveryParameters[["baselineFDR"]],
41+
defaultModels = discoveryParameters[["defaultModels"]],
4242
verbose = verbose)
4343
end.ptm_all <- proc.time()
4444
if (verbose) message("extend annotations in ",

0 commit comments

Comments
 (0)