Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions R/bambu-assignDist.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
#' @inheritParams bambu
#' @import data.table
#' @noRd
assignReadClasstoTranscripts <- function(readClassList, annotations, isoreParameters,
verbose, sampleMetadata, demultiplexed,
assignReadClasstoTranscripts <- function(readClassList, annotations,
rcAssignmentParameters, verbose, sampleMetadata, demultiplexed,
returnDistTable = FALSE, trackReads = TRUE) {
if (is.character(readClassList)) readClassList <- readRDS(file = readClassList)
metadata(readClassList)$readClassDist <- calculateDistTable(readClassList, annotations, isoreParameters, verbose, returnDistTable)
metadata(readClassList)$readClassDist <- calculateDistTable(readClassList, annotations, rcAssignmentParameters, verbose, returnDistTable)
readClassList <- splitReadClassFiles(readClassList)
readClassDt <- genEquiRCs(metadata(readClassList)$readClassDist, annotations, verbose)
readClassDt$eqClass.match = match(readClassDt$eqClassById,metadata(readClassList)$eqClassById)
Expand Down
10 changes: 5 additions & 5 deletions R/bambu-extendAnnotations-utilityExtend.R
Original file line number Diff line number Diff line change
Expand Up @@ -872,7 +872,7 @@ setNDR <- function(extendedAnnotations, NDR = NULL, prefix = 'Bambu', baselineFD

#' Extend annotations by clusters
#' @noRd
isore.extendAnnotations.clusters <- function(readClassList, annotations, clusters, NDR, isoreParameters, stranded, bpParameters, fusionMode, verbose = FALSE){
isore.extendAnnotations.clusters <- function(readClassList, annotations, clusters, NDR, discoveryParameters, stranded, bpParameters, fusionMode, verbose = FALSE){
message("--- Start extending annotations for clusters ---")
#if clustering is a csv, create a list with the barcodes for each cluster
#csv must have two cols with heading barcode, cluster
Expand Down Expand Up @@ -904,21 +904,21 @@ isore.extendAnnotations.clusters <- function(readClassList, annotations, cluster
rowData(rcf.filt)$startSD <- 0
rowData(rcf.filt)$endSD <- 0
rowData(rcf.filt)$readCount.posStrand <- 0
thresholdIndex <- which(rowData(rcf.filt)$readCount>=isoreParameters$min.readCount)
model <- trainBambu(rcf.filt, verbose = verbose, min.readCount = isoreParameters$min.readCount)
thresholdIndex <- which(rowData(rcf.filt)$readCount>=discoveryParameters$min.readCount)
model <- trainBambu(rcf.filt, verbose = verbose, min.readCount = discoveryParameters$min.readCount)
txScore <- getTranscriptScore(rowData(rcf.filt)[thresholdIndex,], model,
defaultModels)
Comment on lines +907 to 910
Copy link

Copilot AI Apr 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

defaultModels is referenced when calling getTranscriptScore(...) but is not defined in this function scope after renaming isoreParameters to discoveryParameters. This will error at runtime. Use discoveryParameters$defaultModels (or assign a local defaultModels <- discoveryParameters[["defaultModels"]]) before calling getTranscriptScore.

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is with the isore.extendAnnotations.clusters, will fix it later

rowData(rcf.filt)$txScore <- rep(NA,nrow(rcf.filt))
rowData(rcf.filt)$txScore[thresholdIndex] <- txScore
#txScores = cbind(txScores, rowData(rcf.filt)$txScore)
rcfs.clusters[[names(clusters)[i]]] <- rcf.filt
annotations.clusters[[names(clusters)[i]]] <- bambu.extendAnnotations(list(rcf.filt), annotations, NDR,
isoreParameters, stranded, bpParameters, fusionMode, verbose)
discoveryParameters, stranded, bpParameters, fusionMode, verbose)
}
if(length(rcfs.clusters)>0){
print("--- Merging all individual clusters ---")
annotations.clusters[["merged"]] <- bambu.extendAnnotations(rcfs.clusters, annotations, NDR,
isoreParameters, stranded, bpParameters, fusionMode, verbose)
discoveryParameters, stranded, bpParameters, fusionMode, verbose)
}

return(annotations.clusters)
Expand Down
30 changes: 15 additions & 15 deletions R/bambu-extendAnnotations.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
#' @inheritParams bambu
#' @noRd
bambu.extendAnnotations <- function(readClassList, annotations, NDR,
isoreParameters, stranded, bpParameters, fusionMode = FALSE, verbose = FALSE) {
discoveryParameters, stranded, bpParameters, fusionMode = FALSE, verbose = FALSE) {
start.ptm_all <- proc.time()
combinedTxCandidates <- isore.combineTranscriptCandidates(readClassList,
stranded, ## stranded used for unspliced reduce
min.readCount = isoreParameters[["min.readCount"]],
min.readFractionByGene = isoreParameters[["min.readFractionByGene"]],
min.txScore.multiExon = isoreParameters[["min.txScore.multiExon"]],
min.txScore.singleExon = isoreParameters[["min.txScore.singleExon"]],
min.readCount = discoveryParameters[["min.readCount"]],
min.readFractionByGene = discoveryParameters[["min.readFractionByGene"]],
min.txScore.multiExon = discoveryParameters[["min.txScore.multiExon"]],
min.txScore.singleExon = discoveryParameters[["min.txScore.singleExon"]],
bpParameters,
verbose)
end.ptm_all <- proc.time()
Expand All @@ -20,21 +20,21 @@ bambu.extendAnnotations <- function(readClassList, annotations, NDR,
annotations <- isore.extendAnnotations(
combinedTranscripts = combinedTxCandidates,
annotationGrangesList = annotations,
remove.subsetTx = isoreParameters[["remove.subsetTx"]],
min.sampleNumber = isoreParameters[["min.sampleNumber"]],
remove.subsetTx = discoveryParameters[["remove.subsetTx"]],
min.sampleNumber = discoveryParameters[["min.sampleNumber"]],
NDR = NDR,
min.exonDistance = isoreParameters[["min.exonDistance"]],
min.exonOverlap = isoreParameters[["min.exonOverlap"]],
min.exonDistance = discoveryParameters[["min.exonDistance"]],
min.exonOverlap = discoveryParameters[["min.exonOverlap"]],
min.primarySecondaryDist =
isoreParameters[['min.primarySecondaryDist']],
discoveryParameters[['min.primarySecondaryDist']],
min.primarySecondaryDistStartEnd =
isoreParameters[['min.primarySecondaryDistStartEnd1']],
discoveryParameters[['min.primarySecondaryDistStartEnd1']],
min.readFractionByEqClass =
isoreParameters[['min.readFractionByEqClass']],
discoveryParameters[['min.readFractionByEqClass']],
fusionMode = fusionMode,
prefix = isoreParameters[["prefix"]],
baselineFDR = isoreParameters[["baselineFDR"]],
defaultModels = isoreParameters[["defaultModels"]],
prefix = discoveryParameters[["prefix"]],
baselineFDR = discoveryParameters[["baselineFDR"]],
defaultModels = discoveryParameters[["defaultModels"]],
verbose = verbose)
end.ptm_all <- proc.time()
if (verbose) message("extend annotations in ",
Expand Down
12 changes: 6 additions & 6 deletions R/bambu-processReads.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#' @noRd
bambu.processReads <- function(reads, annotations, genomeSequence,
readClass.outputDir=NULL, yieldSize=1000000, bpParameters,
stranded=FALSE, verbose=FALSE, isoreParameters = setIsoreParameters(NULL),
stranded=FALSE, verbose=FALSE, discoveryParameters = setDiscoveryParameters(NULL),
processByChromosome = FALSE, processByBam = TRUE, trackReads = trackReads, fusionMode = fusionMode,
demultiplexed = FALSE, cleanReads = FALSE, dedupUMI = FALSE, sampleNames = NULL, barcodesToFilter = NULL) {
genomeSequence <- checkInputSequence(genomeSequence)
Expand Down Expand Up @@ -48,11 +48,11 @@ bambu.processReads <- function(reads, annotations, genomeSequence,
names(reads)[seq_along(sampleNames)] <- sampleNames
}
}
min.readCount <- isoreParameters[["min.readCount"]]
fitReadClassModel <- isoreParameters[["fitReadClassModel"]]
defaultModels <- isoreParameters[["defaultModels"]]
returnModel <- isoreParameters[["returnModel"]]
min.exonOverlap <- isoreParameters[["min.exonOverlap"]]
min.readCount <- discoveryParameters[["min.readCount"]]
fitReadClassModel <- discoveryParameters[["fitReadClassModel"]]
defaultModels <- discoveryParameters[["defaultModels"]]
returnModel <- discoveryParameters[["returnModel"]]
min.exonOverlap <- discoveryParameters[["min.exonOverlap"]]

if(processByBam){
readClassList <- bplapply(seq_along(reads), function(i) {
Expand Down
4 changes: 2 additions & 2 deletions R/bambu-quantify.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
#' @inheritParams bambu
#' @import data.table
#' @noRd
bambu.quantify <- function(readClassDt, countMatrix, incompatibleCountMatrix, txid.index, GENEIDs, emParameters,
bambu.quantify <- function(readClassDt, countMatrix, incompatibleCountMatrix, txid.index, GENEIDs, emParameters,
trackReads = FALSE, returnDistTable = FALSE,
verbose = FALSE, isoreParameters = setIsoreParameters(NULL)) {
verbose = FALSE) {
start.ptm <- proc.time()
readClassDt$nobs = countMatrix[readClassDt$eqClass.match]
readClassDt$nobs[is.na(readClassDt$nobs)] = 0
Expand Down
56 changes: 33 additions & 23 deletions R/bambu.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,8 @@
#' annotation to be assigned to the same gene id, defaults to 10bp}
#' \item{min.primarySecondaryDist}{specifying the minimum number of distance
#' threshold, defaults to 5bp}
#' \item{min.primarySecondaryDistStartEnd1}{specifying the minimum number
#' \item{min.primarySecondaryDistStartEnd1}{specifying the minimum number
#' of distance threshold, used for extending annotation, defaults to 5bp}
#' \item{min.primarySecondaryDistStartEnd2}{specifying the minimum number
#' of distance threshold, used for estimating distance to annotation,
#' defaults to 5bp}
#' \item{min.txScore.multiExon}{specifying the minimum transcript level
#' threshold for multi-exon transcripts during sample combining,
#' defaults to 0}
Expand All @@ -62,6 +59,17 @@
#' \item{prefix}{specifying prefix for new gene Ids (genePrefix.number),
#' defaults to "Bambu"}
#' }
#' @param opt.rcAssignment A list of controlling parameters for the read class
#' to transcript assignment process:
#' \describe{
#' \item{min.exonDistance}{specifying minimum distance to known transcript
#' to be considered a valid match, defaults to 35bp}
#' \item{min.primarySecondaryDist}{specifying the minimum distance
#' threshold between primary and secondary assignments, defaults to 5bp}
#' \item{min.primarySecondaryDistStartEnd2}{specifying the minimum
#' distance threshold for start/end positions used for read assignment,
#' defaults to 5bp}
#' }
Comment thread
SuiYue-2308 marked this conversation as resolved.
#' @param opt.em A list of controlling parameters for quantification
#' algorithm estimation process:
#' \describe{
Expand Down Expand Up @@ -141,8 +149,8 @@
#' genome = fa.file, discovery = TRUE, quant = TRUE)
#' @export
bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
Comment thread
SuiYue-2308 marked this conversation as resolved.
mode = NULL, opt.discovery = NULL, opt.em = NULL, rcOutDir = NULL, discovery = TRUE,
assignDist = TRUE, quant = TRUE, stranded = FALSE, ncore = 1, yieldSize = NULL,
mode = NULL, opt.discovery = NULL, opt.rcAssignment = NULL, opt.em = NULL, rcOutDir = NULL, discovery = TRUE,
assignDist = TRUE, quant = TRUE, stranded = FALSE, ncore = 1, yieldSize = NULL,
trackReads = FALSE, returnDistTable = FALSE, lowMemory = FALSE, sampleData = NULL,
fusionMode = FALSE, verbose = FALSE, demultiplexed = FALSE, quantData = NULL,
sampleNames = NULL, cleanReads = FALSE, dedupUMI = FALSE, barcodesToFilter = NULL, clusters = NULL,
Expand Down Expand Up @@ -176,6 +184,8 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
}
if(lowMemory)
message("lowMemory has been deprecated and split into processByChromosome and processByBam. Please see Documentation")
if("min.primarySecondaryDistStartEnd2" %in% names(opt.discovery))
message("min.primarySecondaryDistStartEnd2 has been moved to opt.rcAssignment. Please pass this parameter via opt.rcAssignment instead.")
if(is.null(annotations)){
annotations <- GRangesList()
} else {
Expand All @@ -184,11 +194,11 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
genomeSequence = genome, discovery = discovery,
sampleNames = sampleNames, sampleData = sampleData, quantData = quantData)
}
isoreParameters <- setIsoreParameters(isoreParameters = opt.discovery)
opt.discovery <- setDiscoveryParameters(discoveryParameters = opt.discovery)
#below line is to be compatible with earlier version of running bambu
if(!is.null(isoreParameters$max.txNDR)) NDR = isoreParameters$max.txNDR

emParameters <- setEmParameters(emParameters = opt.em)
if(!is.null(opt.discovery$max.txNDR)) NDR = opt.discovery$max.txNDR
Copy link

Copilot AI Apr 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

New behavior/API surface is introduced (opt.rcAssignment, and potentially forwarding from opt.discovery for backward compatibility), but there are no tests covering (1) that opt.rcAssignment values actually change assignment outputs, and (2) that legacy opt.discovery settings continue to affect assignment if backward compatibility is intended. Please add/extend test coverage (e.g., in tests/testthat/test_bambu_arguments.R) to lock down these expectations.

Suggested change
if(!is.null(opt.discovery$max.txNDR)) NDR = opt.discovery$max.txNDR
if(!is.null(opt.discovery$max.txNDR)) NDR = opt.discovery$max.txNDR
default.rcAssignment <- setRcAssignmentParameters(rcAssignmentParameters = NULL)
if(is.null(opt.rcAssignment)) opt.rcAssignment <- list()
if(!is.null(opt.discovery)){
legacy.rcAssignment.names <- intersect(names(opt.discovery),
names(default.rcAssignment))
if(length(legacy.rcAssignment.names) > 0){
rcAssignment.names.to.fill <- legacy.rcAssignment.names[
!(legacy.rcAssignment.names %in% names(opt.rcAssignment))]
if(length(rcAssignment.names.to.fill) > 0){
opt.rcAssignment[rcAssignment.names.to.fill] <-
opt.discovery[rcAssignment.names.to.fill]
}
}
}

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No testing cover the new introduced parameter, will fix later

opt.rcAssignment <- setRcAssignmentParameters(rcAssignmentParameters = opt.rcAssignment)
opt.em <- setEmParameters(emParameters = opt.em)
Comment on lines +197 to +201
Copy link

Copilot AI Apr 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change introduces opt.rcAssignment but opt.rcAssignment is always defaulted independently of opt.discovery. Previously, opt.discovery values (e.g., min.exonDistance, min.primarySecondaryDist, min.primarySecondaryDistStartEnd2) controlled both transcript discovery and read-class assignment; now user-supplied values in opt.discovery will no longer affect assignment unless duplicated in opt.rcAssignment (behavior/API change). To keep backward compatibility, consider seeding opt.rcAssignment from overlapping keys in opt.discovery when opt.rcAssignment is NULL (or when specific keys are missing), and optionally warn/deprecate the old pathway.

Copilot uses AI. Check for mistakes.
bpParameters <- setBiocParallelParameters(reads, ncore, verbose, demultiplexed)
xgb.set.config(nthread = 1)
# only when reads is not NULL, this proceed, otherwise, it will jump to quant step
Expand All @@ -214,7 +224,7 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
genomeSequence = genome,
readClass.outputDir = rcOutDir, yieldSize = yieldSize,
bpParameters = bpParameters, stranded = stranded, verbose = verbose,
isoreParameters = isoreParameters, trackReads = trackReads,
discoveryParameters = opt.discovery, trackReads = trackReads,
fusionMode = fusionMode,
processByChromosome = processByChromosome, processByBam = processByBam,
demultiplexed = demultiplexed,
Expand All @@ -227,14 +237,14 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
if (discovery) {
message("--- Start extending annotations ---")
extendedAnnotations <- bambu.extendAnnotations(readClassList, annotations, NDR,
isoreParameters, stranded, bpParameters, fusionMode, verbose)
opt.discovery, stranded, bpParameters, fusionMode, verbose)
metadata(extendedAnnotations)$warnings = warnings

#### cluster based transcript discovery
if(!is.null(clusters)){
annotations.clusters <- isore.extendAnnotations.clusters(readClassList,
annotations, clusters, NDR,
isoreParameters, stranded, bpParameters, fusionMode, verbose = FALSE)
annotations, clusters, NDR,
opt.discovery, stranded, bpParameters, fusionMode, verbose = FALSE)
metadata(extendedAnnotations)$clusters <- annotations.clusters
}
annotations <- extendedAnnotations
Expand All @@ -246,9 +256,9 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
quantData <- bplapply(seq_along(readClassList), function(i){
assignReadClasstoTranscripts(
readClassList = readClassList[[i]],
annotations = annotations,
isoreParameters = isoreParameters,
verbose = verbose,
annotations = annotations,
rcAssignmentParameters = opt.rcAssignment,
verbose = verbose,
# for bulk data, there is one sampleData (keep sampleData[1]), for single-cell, there is one per sample
sampleMetadata = if(length(sampleData) == 1) sampleData[1] else sampleData[i],
demultiplexed = demultiplexed,
Expand All @@ -263,10 +273,10 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
if (quant) {
message("--- Start isoform EM quantification ---")
if(!is.null(NDR) & !discovery)# this step is used when reset NDR is needed
annotations <- setNDR(annotations, NDR,
prefix = isoreParameters$prefix,
baselineFDR = isoreParameters[["baselineFDR"]],
defaultModels2 = isoreParameters[["defaultModels"]])
annotations <- setNDR(annotations, NDR,
prefix = opt.discovery$prefix,
baselineFDR = opt.discovery[["baselineFDR"]],
defaultModels2 = opt.discovery[["defaultModels"]])
if(length(annotations)==0) stop("No valid annotations, if running
de novo please try less stringent parameters")
if(is.null(quantData)) stop("quantData must be provided or assignDist = TRUE")
Expand Down Expand Up @@ -311,8 +321,8 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
}
return(bambu.quantify(readClassDt = metadata(quantData_i)$readClassDt, countMatrix = countMatrix,
incompatibleCountMatrix = data.table(GENEID.i = as.numeric(rownames(metadata(quantData_i)$incompatibleCountMatrix)), counts = incompatibleCountMatrix),
txid.index = mcols(annotations)$txid, GENEIDs = GENEIDs.i, isoreParameters = isoreParameters,
emParameters = emParameters, trackReads = trackReads,
txid.index = mcols(annotations)$txid, GENEIDs = GENEIDs.i,
emParameters = opt.em, trackReads = trackReads,
verbose = verbose))},
BPPARAM = bpParameters)
end.ptm <- proc.time()
Expand Down
Loading
Loading