Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
14f5547
tidy up code
lingminhao Feb 15, 2026
d63fdae
refactor generateColData to take sampleData as argument
lingminhao Feb 15, 2026
f86ce9c
refactor combineCountSes to inherit colData directly from quantData
lingminhao Feb 15, 2026
ed26b1f
update colData for pseudobulk single-cell
lingminhao Feb 15, 2026
f9554b5
add sampleData argument
lingminhao Feb 15, 2026
0307855
remove spatial argument from bambu
lingminhao Feb 19, 2026
8e25154
rename colData parameter combineCountSes to colDataList (avoid same …
lingminhao Feb 19, 2026
c29eab4
update bambu sampleData parameter description
lingminhao Feb 19, 2026
bfa131e
refine sampleData input check description
lingminhao Feb 19, 2026
9c2d8ba
tidy up spatial & sampleData argument
lingminhao Feb 20, 2026
fda300b
change sampleData to sampleMetadata in assignReadClasstoTranscripts f…
lingminhao Feb 20, 2026
e80cb39
fix bug: omit the check for NA elements in sampleData
lingminhao Feb 20, 2026
7d60435
allow . csv/.tsv/.txt file input type in sampleData
lingminhao Mar 2, 2026
8f7f506
refactor: store sampleData in readClassList for parsing
lingminhao Mar 27, 2026
547e034
update comment to describe CB/UMI parsing from bam
lingminhao Mar 27, 2026
586cdb3
change priority in CB & UMI name extraction from bam file
lingminhao Mar 27, 2026
4bd48bf
fix: standardize list access for all sample sizes
lingminhao Mar 27, 2026
807ad06
remove redundant code
lingminhao Mar 27, 2026
44e791d
correct order of extracted barcode to match devel_pre_v4
lingminhao Apr 9, 2026
446407a
tidy for cleaner code
lingminhao Apr 9, 2026
93b2e7d
add back row.names to prevent missing colnames in se_pseudobulk
lingminhao Apr 9, 2026
01f4ff8
correct order of extracted barcode to match devel_pre_v4
lingminhao Apr 9, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions R/bambu-assignDist.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#' @import data.table
#' @noRd
assignReadClasstoTranscripts <- function(readClassList, annotations, isoreParameters,
verbose, demultiplexed, spatial,
verbose, sampleMetadata, demultiplexed,
returnDistTable = FALSE, trackReads = TRUE) {
Comment thread
lingminhao marked this conversation as resolved.
if (is.character(readClassList)) readClassList <- readRDS(file = readClassList)
metadata(readClassList)$readClassDist <- calculateDistTable(readClassList, annotations, isoreParameters, verbose, returnDistTable)
Expand All @@ -17,7 +17,7 @@ assignReadClasstoTranscripts <- function(readClassList, annotations, isoreParame
mutate(aval = 1) %>%
data.table()
#return non-em counts
ColData <- generateColData(colnames(metadata(readClassList)$countMatrix), clusters = NULL, demultiplexed, spatial)
ColData <- generateColData(readClassList, sampleMetadata, demultiplexed)
quantData <- SummarizedExperiment(assays = SimpleList(
counts = generateUniqueCounts(readClassDt, metadata(readClassList)$countMatrix, annotations)),
rowRanges = annotations,
Expand All @@ -32,7 +32,7 @@ assignReadClasstoTranscripts <- function(readClassList, annotations, isoreParame
metadata(quantData)$readClassDt <- readClassDt
metadata(quantData)$countMatrix <- metadata(readClassList)$countMatrix
metadata(quantData)$incompatibleCountMatrix <- metadata(readClassList)$incompatibleCountMatrix
metadata(quantData)$sampleNames <- metadata(readClassList)$sampleNames
metadata(quantData)$sampleName <- metadata(readClassList)$sampleData$sampleName
if(returnDistTable)
metadata(quantData)$distTable <- metadata(metadata(readClassList)$readClassDist)$distTableOld

Expand Down
39 changes: 21 additions & 18 deletions R/bambu-processReads.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ bambu.processReads <- function(reads, annotations, genomeSequence,
returnModel <- isoreParameters[["returnModel"]]
min.exonOverlap <- isoreParameters[["min.exonOverlap"]]

if(processByBam){ # bulk mode
if(processByBam){
readClassList <- bplapply(seq_along(reads), function(i) {
bambu.processReadsByFile(bam.file = reads[i],
genomeSequence = genomeSequence,annotations = annotations,
Expand All @@ -64,7 +64,7 @@ bambu.processReads <- function(reads, annotations, genomeSequence,
processByChromosome = processByChromosome, trackReads = trackReads, fusionMode = fusionMode,
demultiplexed = demultiplexed, cleanReads = cleanReads, dedupUMI = dedupUMI, index = 1, barcodesToFilter = barcodesToFilter)},
BPPARAM = bpParameters)
} else { # single cell mode
} else {
readGrgList <- bplapply(seq_along(reads), function(i) {
bambu.readsByFile(bam.file = reads[i],
genomeSequence = genomeSequence,annotations = annotations,
Expand Down Expand Up @@ -173,13 +173,6 @@ bambu.processReadsByFile <- function(bam.file, genomeSequence, annotations,

mcols(readGrgList)$id <- seq_along(readGrgList)

sampleName <- names(bam.file)[1]
if(!isFALSE(demultiplexed)){
mcols(readGrgList)$CB <- paste0(sampleName, '_', mcols(readGrgList)$CB)
} else{
mcols(readGrgList)$CB <- sampleName
}
mcols(readGrgList)$CB <- as.factor(mcols(readGrgList)$CB)
if(!isFALSE(demultiplexed)){
mcols(readGrgList)$sampleID <- as.numeric(mcols(readGrgList)$CB)
} else {
Expand Down Expand Up @@ -217,9 +210,20 @@ bambu.processReadsByFile <- function(bam.file, genomeSequence, annotations,
fusionMode = fusionMode,
verbose = verbose)

metadata(se)$samples <- names(bam.file)[1]
metadata(se)$sampleNames <- names(bam.file)[1]
if(!isFALSE(demultiplexed)) metadata(se)$samples <- levels(mcols(readGrgList)$CB)
if (demultiplexed) {
barcodes <- levels(mcols(readGrgList)$CB)
metadata(se)$sampleData <- tibble(
id = paste(names(bam.file)[1], barcodes, sep = '_'),
sampleName = names(bam.file)[1],
barcode = barcodes
)
} else{
metadata(se)$sampleData <- tibble(
id = names(bam.file)[1],
sampleName = names(bam.file)[1]
)
}

Comment thread
jonathangoeke marked this conversation as resolved.
return(se)
}

Expand Down Expand Up @@ -307,7 +311,6 @@ constructReadClasses <- function(readGrgList, genomeSequence, annotations,
stranded = FALSE, min.readCount = 2,
fitReadClassModel = TRUE, min.exonOverlap = 10, defaultModels = NULL, returnModel = FALSE,
verbose = FALSE, processByChromosome = FALSE, trackReads = FALSE, fusionMode = FALSE){
warnings <- c() ###TODO

if(processByChromosome){
# construct read classes for each chromosome seperately
Expand Down Expand Up @@ -403,12 +406,12 @@ splitReadClassFiles = function(readClassFile){
i = rep(seq_along(counts.table), lengths(counts.table)),
j = as.numeric(names(unlist(counts.table))),
x = unlist(counts.table),
dims = c(nrow(eqClasses), length(metadata(readClassFile)$samples)))
dims = c(nrow(eqClasses), nrow(metadata(readClassFile)$sampleData)))
#incompatible counts
distTable <- metadata(metadata(readClassFile)$readClassDist)$distTable.incompatible
if(nrow(distTable)==0) {
counts.incompatible <- sparseMatrix(i= 1, j = 1, x = 0,
dims = c(1, length(metadata(readClassFile)$samples)))
dims = c(1, length(metadata(readClassFile)$sampleData$id)))
rownames(counts.incompatible) <- "TODO"
} else{
distTable$sampleIDs <- rowData(readClassFile)$sampleIDs[match(distTable$readClassId, rownames(readClassFile))]
Expand All @@ -419,11 +422,11 @@ splitReadClassFiles = function(readClassFile){
i = rep(seq_along(counts.table), lengths(counts.table)),
j = as.numeric(names(unlist(counts.table))),
x = unlist(counts.table),
dims = c(nrow(distTable), length(metadata(readClassFile)$samples)))
colnames(counts.incompatible) <- metadata(readClassFile)$samples
dims = c(nrow(distTable), length(metadata(readClassFile)$sampleData$id)))
colnames(counts.incompatible) <- metadata(readClassFile)$sampleData$id
rownames(counts.incompatible) <- distTable$GENEID.i
}
colnames(counts) <- metadata(readClassFile)$samples
colnames(counts) <- metadata(readClassFile)$sampleData$id
metadata(readClassFile)$eqClassById <- eqClasses$eqClassById
#rownames(counts) = eqClasses$eqClassById
metadata(readClassFile)$countMatrix <- counts
Expand Down
22 changes: 13 additions & 9 deletions R/bambu-processReads_utilityConstructReadClasses.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,24 +25,28 @@ isore.constructReadClasses <- function(readGrgList, unlisted_junctions,
Please report this")
start.ptm <- proc.time()
if(!is.null(uniqueJunctions)){
exonsByRC.spliced <- constructSplicedReadClasses(
uniqueJunctions = uniqueJunctions,
unlisted_junctions = unlisted_junctions,
readGrgList = readGrgList,
stranded = stranded)}
else{exonsByRC.spliced = GRangesList()}
exonsByRC.spliced <- constructSplicedReadClasses(
uniqueJunctions = uniqueJunctions,
unlisted_junctions = unlisted_junctions,
readGrgList = readGrgList,
stranded = stranded)
} else{
exonsByRC.spliced = GRangesList()
}
end.ptm <- proc.time()
rm(readGrgList, unlisted_junctions, uniqueJunctions)
if (verbose)
message("Finished creating transcript models (read classes) for reads with ",
"spliced junctions in ", round((end.ptm - start.ptm)[3] / 60, 1)," mins.")
if(length(reads.singleExon)==0) {
exonsByRC.unspliced <- NULL
} else {exonsByRC.unspliced <- constructUnsplicedReadClasses(reads.singleExon,
annotations, exonsByRC.spliced, stranded, verbose)}
} else {
exonsByRC.unspliced <- constructUnsplicedReadClasses(reads.singleExon,
annotations, exonsByRC.spliced, stranded, verbose)
}
exonsByRC <- c(exonsByRC.spliced, exonsByRC.unspliced)
colDataDf <- DataFrame(name = runName, row.names = runName)
#TODO later remove assays = SimpleList(counts = counts)

counts <- matrix(mcols(exonsByRC)$readCount,
dimnames = list(names(exonsByRC), runName))
se <- SummarizedExperiment(assays = SimpleList(counts = counts),
Expand Down
57 changes: 32 additions & 25 deletions R/bambu.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,11 @@
#' distTables. The output is a list with an entry for each sample.
#' @param lowMemory Read classes will be processed by chromosomes when lowMemory
#' is specified. This option provides an efficient way to process big samples.
#' @param sampleData A character vector of paths to metadata CSV files (or \code{NA} if
#' unavailable for specific samples); defaults to \code{NULL}. Files must contain a
#' "sampleName" column for bulk data or a "barcode" column for single-cell/spatial data.
#' For bulk data, one metadata CSV file for all samples is sufficient, whereas single-cell/spatial
#' data requires one metadata CSV file per sample.
#' @param fusionMode A logical variable indicating whether run in fusion mode
#' @param verbose A logical variable indicating whether processing messages will
#' be printed.
Expand Down Expand Up @@ -138,8 +143,8 @@
bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
mode = NULL, opt.discovery = NULL, opt.em = NULL, rcOutDir = NULL, discovery = TRUE,
assignDist = TRUE, quant = TRUE, stranded = FALSE, ncore = 1, yieldSize = NULL,
trackReads = FALSE, returnDistTable = FALSE, lowMemory = FALSE,
fusionMode = FALSE, verbose = FALSE, demultiplexed = FALSE, spatial = NULL, quantData = NULL,
trackReads = FALSE, returnDistTable = FALSE, lowMemory = FALSE, sampleData = NULL,
fusionMode = FALSE, verbose = FALSE, demultiplexed = FALSE, quantData = NULL,
sampleNames = NULL, cleanReads = FALSE, dedupUMI = FALSE, barcodesToFilter = NULL, clusters = NULL,
processByChromosome = FALSE, processByBam = TRUE) {
message(paste0("Running Bambu-v", "3.9.0"))
Expand Down Expand Up @@ -173,7 +178,7 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
annotations <- checkInputs(annotations, reads,
readClass.outputDir = rcOutDir,
genomeSequence = genome, discovery = discovery,
sampleNames = sampleNames, spatial = spatial,quantData = quantData)
sampleNames = sampleNames, sampleData = sampleData, quantData = quantData)
}
isoreParameters <- setIsoreParameters(isoreParameters = opt.discovery)
#below line is to be compatible with earlier version of running bambu
Expand Down Expand Up @@ -234,16 +239,19 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
}
if(assignDist){
message("--- Start calculating equivilance classes ---")
quantData <- bplapply(readClassList,
FUN = assignReadClasstoTranscripts,
annotations = annotations,
isoreParameters = isoreParameters,
verbose = verbose,
demultiplexed = demultiplexed,
spatial = spatial,
returnDistTable = returnDistTable,
trackReads = trackReads,
BPPARAM = bpParameters)
quantData <- bplapply(seq_along(readClassList), function(i){
assignReadClasstoTranscripts(
readClassList = readClassList[[i]],
annotations = annotations,
isoreParameters = isoreParameters,
verbose = verbose,
# for bulk data, there is one sampleData (keep sampleData[1]), for single-cell, there is one per sample
sampleMetadata = if(length(sampleData) == 1) sampleData[1] else sampleData[i],
demultiplexed = demultiplexed,
Comment thread
lingminhao marked this conversation as resolved.
returnDistTable = returnDistTable,
trackReads = trackReads
)
}, BPPARAM = bpParameters)
if (!quant) return(quantData)
}
}
Expand All @@ -262,6 +270,7 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
start.ptm <- proc.time()
countsSeCompressed.all <- NULL
ColNames <- c()
colData.all <- list()
for(i in seq_along(quantData)){
quantData_i <- quantData[[i]]
#load in the barcode clustering from file if provided
Expand All @@ -285,11 +294,7 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
iter <- clustering

} else{ #if clusters is a list
if(length(quantData)>1){
iter <- clusters[[i]] #lowMemory mode
}else{
iter <- clusters#do.call(c,clusters)
}
iter <- clusters[[i]]
}
}
countsSeCompressed <- bplapply(iter, FUN = function(j){ # previous i changed to j to avoid duplicated assignment
Expand All @@ -310,25 +315,27 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
message("Total Time ", round((end.ptm - start.ptm)[3] / 60, 3), " mins.")
if(!is.null(clusters)){
ColNames <- c(ColNames, names(iter))
colData.all[[i]] <- data.frame(
id = names(countsSeCompressed),
sampleName = names(countsSeCompressed),
row.names = names(countsSeCompressed)
)
} else{
ColNames <- c(ColNames, colnames(quantData_i))
colData.all[[i]] <- data.frame(colData(quantData_i))
}
countsSeCompressed.all <- c(countsSeCompressed.all, countsSeCompressed)
}
countsSeCompressed.all$colnames <- ColNames
countsSe <- combineCountSes(countsSeCompressed.all, annotations)
names(countsSeCompressed.all) <- ColNames

countsSe <- combineCountSes(countsSeCompressed.all, colData.all, annotations)
Comment thread
lingminhao marked this conversation as resolved.
if(returnDistTable){
distTables = list()
for(i in seq_along(quantData)){
distTables[[i]] <- metadata(quantData[[i]])$distTable
}
metadata(countsSe)$distTables <- distTables
}
#metadata(countsSe)$warnings = warnings

ColData <- generateColData(colnames(countsSe), clusters, demultiplexed, spatial)
colData(countsSe) <- ColData
colnames(countsSe) <- ColData[,1]
return(countsSe)
}
}
Loading
Loading