From 14f5547a8d632831e95af914388a9c1d4b97cee6 Mon Sep 17 00:00:00 2001 From: lingminhao Date: Sun, 15 Feb 2026 21:33:47 +0800 Subject: [PATCH 01/22] tidy up code --- R/bambu-processReads.R | 1 - ...processReads_utilityConstructReadClasses.R | 22 +++++++++++-------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/R/bambu-processReads.R b/R/bambu-processReads.R index f144c036..29082ad6 100644 --- a/R/bambu-processReads.R +++ b/R/bambu-processReads.R @@ -307,7 +307,6 @@ constructReadClasses <- function(readGrgList, genomeSequence, annotations, stranded = FALSE, min.readCount = 2, fitReadClassModel = TRUE, min.exonOverlap = 10, defaultModels = NULL, returnModel = FALSE, verbose = FALSE, processByChromosome = FALSE, trackReads = FALSE, fusionMode = FALSE){ - warnings <- c() ###TODO if(processByChromosome){ # construct read classes for each chromosome seperately diff --git a/R/bambu-processReads_utilityConstructReadClasses.R b/R/bambu-processReads_utilityConstructReadClasses.R index fec9adbf..a9610045 100644 --- a/R/bambu-processReads_utilityConstructReadClasses.R +++ b/R/bambu-processReads_utilityConstructReadClasses.R @@ -25,12 +25,14 @@ isore.constructReadClasses <- function(readGrgList, unlisted_junctions, Please report this") start.ptm <- proc.time() if(!is.null(uniqueJunctions)){ - exonsByRC.spliced <- constructSplicedReadClasses( - uniqueJunctions = uniqueJunctions, - unlisted_junctions = unlisted_junctions, - readGrgList = readGrgList, - stranded = stranded)} - else{exonsByRC.spliced = GRangesList()} + exonsByRC.spliced <- constructSplicedReadClasses( + uniqueJunctions = uniqueJunctions, + unlisted_junctions = unlisted_junctions, + readGrgList = readGrgList, + stranded = stranded) + } else{ + exonsByRC.spliced = GRangesList() + } end.ptm <- proc.time() rm(readGrgList, unlisted_junctions, uniqueJunctions) if (verbose) @@ -38,11 +40,13 @@ isore.constructReadClasses <- function(readGrgList, unlisted_junctions, "spliced junctions in ", round((end.ptm - start.ptm)[3] / 60, 1)," mins.") if(length(reads.singleExon)==0) { exonsByRC.unspliced <- NULL - } else {exonsByRC.unspliced <- constructUnsplicedReadClasses(reads.singleExon, - annotations, exonsByRC.spliced, stranded, verbose)} + } else { + exonsByRC.unspliced <- constructUnsplicedReadClasses(reads.singleExon, + annotations, exonsByRC.spliced, stranded, verbose) + } exonsByRC <- c(exonsByRC.spliced, exonsByRC.unspliced) colDataDf <- DataFrame(name = runName, row.names = runName) - #TODO later remove assays = SimpleList(counts = counts) + counts <- matrix(mcols(exonsByRC)$readCount, dimnames = list(names(exonsByRC), runName)) se <- SummarizedExperiment(assays = SimpleList(counts = counts), From d63fdae91b762f33793b13f6db2c102969be9f56 Mon Sep 17 00:00:00 2001 From: lingminhao Date: Sun, 15 Feb 2026 21:37:13 +0800 Subject: [PATCH 02/22] refactor generateColData to take sampleData as argument --- R/bambu_utilityFunctions.R | 70 ++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 40 deletions(-) diff --git a/R/bambu_utilityFunctions.R b/R/bambu_utilityFunctions.R index 5deac8ba..965f72e8 100644 --- a/R/bambu_utilityFunctions.R +++ b/R/bambu_utilityFunctions.R @@ -288,47 +288,37 @@ combineCountSes <- function(countsSe, annotations){ return(countsSe) } -#' Generate the coldata for se options using colnames, and other option inputs -#' @noRd -generateColData <- function(sampleNames, clusters, demultiplexed, spatial){ - ColData <- DataFrame(id = sampleNames) - if(!isFALSE(demultiplexed) & is.null(clusters)){ - ColData <- DataFrame(id = sampleNames, - sampleName = gsub("_[^_]+$","", sampleNames, perl = TRUE), - Barcode = gsub(".*_(?=[^_]*$)","", sampleNames, perl = TRUE)) - } - if(!is.null(spatial) & is.null(clusters)){ - ColData$x_coordinate <- NA - ColData$y_coordinate <- NA - if(length(spatial)==1){ - # the following line takes a regular delimited file as input - # it can either has header or without header - # it can also be compressed - bc_coords <- fread(spatial, - col.names = c("Barcode", "x_coordinate", "y_coordinate"), - data.table = FALSE) - # DataFrame(read.table(gzfile(spatial), - # col.names = c("Barcode", "x_coordinate", "y_coordinate"))) - bcMatch <- match(ColData$Barcode, bc_coords$Barcode) - ColData$x_coordinate <- bc_coords$x_coordinate[bcMatch] - ColData$y_coordinate <- bc_coords$y_coordinate[bcMatch] +#' Generate the colData using the external sampleData.csv provided by the user in the sampleData argument +#' @param readClassList A list object containingmetadata about read classes. +#' @param sampleData A path to a CSV file or NULL/NA if there is no metadata for the sample. +#' @param demultiplexed Logical; indicates if data is demultiplexed. +#' +#' @return A DataFrame containing colData for the sample. +#' @export +generateColData <- function(readClassList, sampleData, demultiplexed) { + sampleDataDf <- if (is.null(sampleData) || is.na(sampleData)) { + if (demultiplexed) tibble(barcode = character()) else tibble(sampleName = character()) + } else { + read.csv(sampleData) + } + samples <- metadata(readClassList)$samples + joinKey <- if (demultiplexed) "barcode" else "sampleName" + + colData <- tibble(id = samples) + if (demultiplexed) { + colData$sampleName = sub('_[^_]+$', '', samples) + colData$barcode <- sub('.*_', '', samples) } else{ - spatial.unique <- unique(spatial) - for(whitelist in spatial.unique){ - i <- which(spatial.unique==whitelist) - bc_coords <- fread(whitelist, - col.names = c("Barcode", "x_coordinate", "y_coordinate"), - data.table = FALSE) - # DataFrame(read.table(gzfile(whitelist), - # col.names = c("Barcode", "x_coordinate", "y_coordinate"))) - bcSampleIndex <- ColData$sampleName %in% sampleNames[i] - bcMatch <- match(ColData$Barcode[bcSampleIndex], bc_coords$Barcode) - ColData$x_coordinate[bcSampleIndex] <- bc_coords$x_coordinate[bcMatch] - ColData$y_coordinate[bcSampleIndex] <- bc_coords$y_coordinate[bcMatch] - } - } - } - return(ColData) + colData$sampleName <- samples + } + + colData <- colData %>% + left_join(sampleDataDf, by = joinKey) %>% + as.data.frame() + + rownames(colData) <- colData$id + + colData } # Quick wrapper function (https://stackoverflow.com/questions/13273833/merging-multiple-data-tables) From f86ce9c01dd2751c97d91d7ae6bd017c4c01e045 Mon Sep 17 00:00:00 2001 From: lingminhao Date: Sun, 15 Feb 2026 21:43:08 +0800 Subject: [PATCH 03/22] refactor combineCountSes to inherit colData directly from quantData --- R/bambu.R | 10 +++------- R/bambu_utilityFunctions.R | 23 ++++++++++++----------- 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/R/bambu.R b/R/bambu.R index edd8a8cc..78a7df03 100644 --- a/R/bambu.R +++ b/R/bambu.R @@ -315,8 +315,9 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL, } countsSeCompressed.all <- c(countsSeCompressed.all, countsSeCompressed) } - countsSeCompressed.all$colnames <- ColNames - countsSe <- combineCountSes(countsSeCompressed.all, annotations) + names(countsSeCompressed.all) <- ColNames + + countsSe <- combineCountSes(countsSeCompressed.all, colData.all, annotations) if(returnDistTable){ distTables = list() for(i in seq_along(quantData)){ @@ -324,11 +325,6 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL, } metadata(countsSe)$distTables <- distTables } - #metadata(countsSe)$warnings = warnings - - ColData <- generateColData(colnames(countsSe), clusters, demultiplexed, spatial) - colData(countsSe) <- ColData - colnames(countsSe) <- ColData[,1] return(countsSe) } } \ No newline at end of file diff --git a/R/bambu_utilityFunctions.R b/R/bambu_utilityFunctions.R index 965f72e8..b5960c00 100644 --- a/R/bambu_utilityFunctions.R +++ b/R/bambu_utilityFunctions.R @@ -255,13 +255,11 @@ calculateDistTable <- function(readClassList, annotations, isoreParameters, verb return(readClassDist) } -#' Combine count se object while preserving the metadata objects +#' Combine combined count se object from multiple samples, cells or spatial locations #' @noRd -combineCountSes <- function(countsSe, annotations){ - countsData <- c("counts", "CPM", "fullLengthCounts", - "uniqueCounts", "incompatibleCounts") - sampleNames <- countsSe$colnames - countsSe$colnames <- NULL +combineCountSes <- function(countsSe, colData, annotations){ + countsData <- c("counts", "CPM", "fullLengthCounts", "uniqueCounts", "incompatibleCounts") + sampleNames <- names(countsSe) countsDataMat <- lapply(countsData, FUN = function(k){ countsVecList <- lapply(countsSe, function(j){j[[k]]}) countsMat <- sparseMatrix(i = unlist(lapply(countsVecList, function(j) j@i)), @@ -279,13 +277,16 @@ combineCountSes <- function(countsSe, annotations){ return(countsMat) }) names(countsDataMat) <- countsData - countsSe <- SummarizedExperiment(assays = SimpleList(counts = countsDataMat$counts, + combinedCountsSe <- SummarizedExperiment(assays = SimpleList(counts = countsDataMat$counts, CPM = countsDataMat$CPM, fullLengthCounts = countsDataMat$fullLengthCounts, uniqueCounts = countsDataMat$uniqueCounts)) - metadata(countsSe)$incompatibleCounts <- countsDataMat$incompatibleCounts - rowRanges(countsSe) <- annotations - return(countsSe) + metadata(combinedCountsSe)$incompatibleCounts <- countsDataMat$incompatibleCounts + rowRanges(combinedCountsSe) <- annotations + + colData(combinedCountsSe) <- DataFrame(bind_rows(colData)) + + return(combinedCountsSe) } #' Generate the colData using the external sampleData.csv provided by the user in the sampleData argument @@ -308,7 +309,7 @@ generateColData <- function(readClassList, sampleData, demultiplexed) { if (demultiplexed) { colData$sampleName = sub('_[^_]+$', '', samples) colData$barcode <- sub('.*_', '', samples) - } else{ + } else{ colData$sampleName <- samples } From ed26b1ffdbe884780ccaa788d0a45f2dd0c24abb Mon Sep 17 00:00:00 2001 From: lingminhao Date: Sun, 15 Feb 2026 21:44:04 +0800 Subject: [PATCH 04/22] update colData for pseudobulk single-cell --- R/bambu.R | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/R/bambu.R b/R/bambu.R index 78a7df03..82ae0253 100644 --- a/R/bambu.R +++ b/R/bambu.R @@ -262,6 +262,7 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL, start.ptm <- proc.time() countsSeCompressed.all <- NULL ColNames <- c() + colData.all <- list() for(i in seq_along(quantData)){ quantData_i <- quantData[[i]] #load in the barcode clustering from file if provided @@ -310,8 +311,14 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL, message("Total Time ", round((end.ptm - start.ptm)[3] / 60, 3), " mins.") if(!is.null(clusters)){ ColNames <- c(ColNames, names(iter)) + colData.all[[i]] <- data.frame( + id = names(countsSeCompressed), + sampleName = names(countsSeCompressed), + row.names = names(countsSeCompressed) + ) } else{ ColNames <- c(ColNames, colnames(quantData_i)) + colData.all[[i]] <- data.frame(colData(quantData_i)) } countsSeCompressed.all <- c(countsSeCompressed.all, countsSeCompressed) } From f9554b5096351e3c8a7d652c30adc7637e264f64 Mon Sep 17 00:00:00 2001 From: lingminhao Date: Sun, 15 Feb 2026 21:44:37 +0800 Subject: [PATCH 05/22] add sampleData argument --- R/bambu-assignDist.R | 4 ++-- R/bambu.R | 26 +++++++++++++++----------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/R/bambu-assignDist.R b/R/bambu-assignDist.R index be73e0a8..d5c626ed 100644 --- a/R/bambu-assignDist.R +++ b/R/bambu-assignDist.R @@ -3,7 +3,7 @@ #' @import data.table #' @noRd assignReadClasstoTranscripts <- function(readClassList, annotations, isoreParameters, - verbose, demultiplexed, spatial, + verbose, sampleData, demultiplexed, spatial, returnDistTable = FALSE, trackReads = TRUE) { if (is.character(readClassList)) readClassList <- readRDS(file = readClassList) metadata(readClassList)$readClassDist <- calculateDistTable(readClassList, annotations, isoreParameters, verbose, returnDistTable) @@ -17,7 +17,7 @@ assignReadClasstoTranscripts <- function(readClassList, annotations, isoreParame mutate(aval = 1) %>% data.table() #return non-em counts - ColData <- generateColData(colnames(metadata(readClassList)$countMatrix), clusters = NULL, demultiplexed, spatial) + ColData <- generateColData(readClassList, sampleData, demultiplexed) quantData <- SummarizedExperiment(assays = SimpleList( counts = generateUniqueCounts(readClassDt, metadata(readClassList)$countMatrix, annotations)), rowRanges = annotations, diff --git a/R/bambu.R b/R/bambu.R index 82ae0253..c30b3553 100644 --- a/R/bambu.R +++ b/R/bambu.R @@ -138,7 +138,7 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL, mode = NULL, opt.discovery = NULL, opt.em = NULL, rcOutDir = NULL, discovery = TRUE, assignDist = TRUE, quant = TRUE, stranded = FALSE, ncore = 1, yieldSize = NULL, - trackReads = FALSE, returnDistTable = FALSE, lowMemory = FALSE, + trackReads = FALSE, returnDistTable = FALSE, lowMemory = FALSE, sampleData = NULL, fusionMode = FALSE, verbose = FALSE, demultiplexed = FALSE, spatial = NULL, quantData = NULL, sampleNames = NULL, cleanReads = FALSE, dedupUMI = FALSE, barcodesToFilter = NULL, clusters = NULL, processByChromosome = FALSE, processByBam = TRUE) { @@ -234,16 +234,20 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL, } if(assignDist){ message("--- Start calculating equivilance classes ---") - quantData <- bplapply(readClassList, - FUN = assignReadClasstoTranscripts, - annotations = annotations, - isoreParameters = isoreParameters, - verbose = verbose, - demultiplexed = demultiplexed, - spatial = spatial, - returnDistTable = returnDistTable, - trackReads = trackReads, - BPPARAM = bpParameters) + quantData <- bplapply(seq_along(readClassList), function(i){ + assignReadClasstoTranscripts( + readClassList = readClassList[[i]], + annotations = annotations, + isoreParameters = isoreParameters, + verbose = verbose, + # for bulk data, there is one sampleData (keep sampleData[1]), for single-cell, there is one per sample + sampleData = if(length(sampleData) == 1) sampleData[1] else sampleData[i], + demultiplexed = demultiplexed, + spatial = spatial, + returnDistTable = returnDistTable, + trackReads = trackReads + ) + }, BPPARAM = bpParameters) if (!quant) return(quantData) } } From 03078553b0b6301cafd4bae5990ae96e006d19c8 Mon Sep 17 00:00:00 2001 From: lingminhao Date: Thu, 19 Feb 2026 09:02:25 +0800 Subject: [PATCH 06/22] remove spatial argument from bambu --- R/bambu-assignDist.R | 2 +- R/bambu.R | 3 +-- R/bambu_utilityFunctions.R | 14 +++++++------- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/R/bambu-assignDist.R b/R/bambu-assignDist.R index d5c626ed..8e331223 100644 --- a/R/bambu-assignDist.R +++ b/R/bambu-assignDist.R @@ -3,7 +3,7 @@ #' @import data.table #' @noRd assignReadClasstoTranscripts <- function(readClassList, annotations, isoreParameters, - verbose, sampleData, demultiplexed, spatial, + verbose, sampleData, demultiplexed, returnDistTable = FALSE, trackReads = TRUE) { if (is.character(readClassList)) readClassList <- readRDS(file = readClassList) metadata(readClassList)$readClassDist <- calculateDistTable(readClassList, annotations, isoreParameters, verbose, returnDistTable) diff --git a/R/bambu.R b/R/bambu.R index c30b3553..dc3271ae 100644 --- a/R/bambu.R +++ b/R/bambu.R @@ -173,7 +173,7 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL, annotations <- checkInputs(annotations, reads, readClass.outputDir = rcOutDir, genomeSequence = genome, discovery = discovery, - sampleNames = sampleNames, spatial = spatial,quantData = quantData) + sampleNames = sampleNames, sampleData = sampleData, quantData = quantData) } isoreParameters <- setIsoreParameters(isoreParameters = opt.discovery) #below line is to be compatible with earlier version of running bambu @@ -243,7 +243,6 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL, # for bulk data, there is one sampleData (keep sampleData[1]), for single-cell, there is one per sample sampleData = if(length(sampleData) == 1) sampleData[1] else sampleData[i], demultiplexed = demultiplexed, - spatial = spatial, returnDistTable = returnDistTable, trackReads = trackReads ) diff --git a/R/bambu_utilityFunctions.R b/R/bambu_utilityFunctions.R index b5960c00..8740cc52 100644 --- a/R/bambu_utilityFunctions.R +++ b/R/bambu_utilityFunctions.R @@ -73,7 +73,7 @@ updateParameters <- function(Parameters, Parameters.default) { #' @importFrom methods is #' @noRd checkInputs <- function(annotations, reads, readClass.outputDir, genomeSequence, - discovery, sampleNames, spatial, quantData){ + discovery, sampleNames, quantData){ # ===# Check annotation inputs #===# if (!is.null(annotations)) { if (is(annotations, "CompressedGRangesList")) { @@ -156,12 +156,12 @@ checkInputs <- function(annotations, reads, readClass.outputDir, genomeSequence, } } - if(!is.null(spatial)){ - #if(!all(grepl(".tsv^", spatial))){stop("Not all paths for spatial are .tsv files")} - if(length(spatial)==1 & length(reads)>1){ - warning("Using the same whitelist and coordinates for all input samples") - } else if(length(reads)!=length(spatial)){ - stop("There are not the same number spatial whitelist paths as input files to reads. ", + if(!is.null(sampleData)){ + if(!all(grepl(".csv^", sampleData))){stop("Not all paths for sample metadata files are .csv files")} + if(length(sampleData)==1 & length(reads)>1){ + warning("Using the same sample metadata file for all input samples") + } else if(length(reads)!=length(sampleData)){ + stop("There are not the same number sample metadata files paths as input files to reads. ", "Make sure these two arguments are vectors of the same length") } } From 8e25154d48e740020099334b11918cd37a40c1b4 Mon Sep 17 00:00:00 2001 From: lingminhao Date: Thu, 19 Feb 2026 09:09:35 +0800 Subject: [PATCH 07/22] rename colData parameter combineCountSes to colDataList (avoid same name as function) --- R/bambu_utilityFunctions.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/bambu_utilityFunctions.R b/R/bambu_utilityFunctions.R index 8740cc52..7b3c647f 100644 --- a/R/bambu_utilityFunctions.R +++ b/R/bambu_utilityFunctions.R @@ -257,7 +257,7 @@ calculateDistTable <- function(readClassList, annotations, isoreParameters, verb #' Combine combined count se object from multiple samples, cells or spatial locations #' @noRd -combineCountSes <- function(countsSe, colData, annotations){ +combineCountSes <- function(countsSe, colDataList, annotations){ countsData <- c("counts", "CPM", "fullLengthCounts", "uniqueCounts", "incompatibleCounts") sampleNames <- names(countsSe) countsDataMat <- lapply(countsData, FUN = function(k){ @@ -284,7 +284,7 @@ combineCountSes <- function(countsSe, colData, annotations){ metadata(combinedCountsSe)$incompatibleCounts <- countsDataMat$incompatibleCounts rowRanges(combinedCountsSe) <- annotations - colData(combinedCountsSe) <- DataFrame(bind_rows(colData)) + colData(combinedCountsSe) <- DataFrame(bind_rows(colDataList)) return(combinedCountsSe) } From c29eab421ac9133e74e5b4a5f484fc9677939c39 Mon Sep 17 00:00:00 2001 From: lingminhao Date: Thu, 19 Feb 2026 09:32:16 +0800 Subject: [PATCH 08/22] update bambu sampleData parameter description --- R/bambu.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/R/bambu.R b/R/bambu.R index dc3271ae..ef18fed3 100644 --- a/R/bambu.R +++ b/R/bambu.R @@ -93,6 +93,11 @@ #' distTables. The output is a list with an entry for each sample. #' @param lowMemory Read classes will be processed by chromosomes when lowMemory #' is specified. This option provides an efficient way to process big samples. +#' @param sampleData A character vector of paths to metadata CSV files (or \code{NA} if +#' unavailable for specific samples); defaults to \code{NULL}. Files must contain a +#' "sampleName" column for bulk data or a "barcode" column for single-cell/spatial data. +#' For bulk data, one metadata CSV file for all samples is sufficient, whereas single-cell/spatial +#' data requires one metadata CSV file per sample. #' @param fusionMode A logical variable indicating whether run in fusion mode #' @param verbose A logical variable indicating whether processing messages will #' be printed. From bfa131e583b93ca40704cab5fe395447f67a21bb Mon Sep 17 00:00:00 2001 From: lingminhao Date: Thu, 19 Feb 2026 09:46:54 +0800 Subject: [PATCH 09/22] refine sampleData input check description --- R/bambu_utilityFunctions.R | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/R/bambu_utilityFunctions.R b/R/bambu_utilityFunctions.R index 7b3c647f..1dec180a 100644 --- a/R/bambu_utilityFunctions.R +++ b/R/bambu_utilityFunctions.R @@ -158,11 +158,14 @@ checkInputs <- function(annotations, reads, readClass.outputDir, genomeSequence, if(!is.null(sampleData)){ if(!all(grepl(".csv^", sampleData))){stop("Not all paths for sample metadata files are .csv files")} - if(length(sampleData)==1 & length(reads)>1){ - warning("Using the same sample metadata file for all input samples") - } else if(length(reads)!=length(sampleData)){ - stop("There are not the same number sample metadata files paths as input files to reads. ", - "Make sure these two arguments are vectors of the same length") + if(length(sampleData)==1 & length(reads)>1){ # normally used for bulk samples + message("Using the same sample metadata file for all input samples") + } else if(length(reads)!=length(sampleData)){ # normally used for single-cell/spatial samples + stop( + "The number of sample metadata files does not match the number of input read files. ", + "These two arguments (sampleData & reads) must be vectors of the same length. ", + "If a specific sample has no metadata, please use 'NA' as a placeholder in the sampleData vector." + ) } } return(annotations) From 9c2d8bab127646f26ebfff6837cf4beec5a15cbf Mon Sep 17 00:00:00 2001 From: lingminhao Date: Fri, 20 Feb 2026 14:00:53 +0800 Subject: [PATCH 10/22] tidy up spatial & sampleData argument --- R/bambu.R | 2 +- R/bambu_utilityFunctions.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/bambu.R b/R/bambu.R index ef18fed3..b391a2c3 100644 --- a/R/bambu.R +++ b/R/bambu.R @@ -144,7 +144,7 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL, mode = NULL, opt.discovery = NULL, opt.em = NULL, rcOutDir = NULL, discovery = TRUE, assignDist = TRUE, quant = TRUE, stranded = FALSE, ncore = 1, yieldSize = NULL, trackReads = FALSE, returnDistTable = FALSE, lowMemory = FALSE, sampleData = NULL, - fusionMode = FALSE, verbose = FALSE, demultiplexed = FALSE, spatial = NULL, quantData = NULL, + fusionMode = FALSE, verbose = FALSE, demultiplexed = FALSE, quantData = NULL, sampleNames = NULL, cleanReads = FALSE, dedupUMI = FALSE, barcodesToFilter = NULL, clusters = NULL, processByChromosome = FALSE, processByBam = TRUE) { message(paste0("Running Bambu-v", "3.9.0")) diff --git a/R/bambu_utilityFunctions.R b/R/bambu_utilityFunctions.R index 1dec180a..3424698a 100644 --- a/R/bambu_utilityFunctions.R +++ b/R/bambu_utilityFunctions.R @@ -73,7 +73,7 @@ updateParameters <- function(Parameters, Parameters.default) { #' @importFrom methods is #' @noRd checkInputs <- function(annotations, reads, readClass.outputDir, genomeSequence, - discovery, sampleNames, quantData){ + discovery, sampleNames, sampleData, quantData){ # ===# Check annotation inputs #===# if (!is.null(annotations)) { if (is(annotations, "CompressedGRangesList")) { From fda300b1b9200188bf5d81e0e3b1bbf637c3d079 Mon Sep 17 00:00:00 2001 From: lingminhao Date: Fri, 20 Feb 2026 14:03:42 +0800 Subject: [PATCH 11/22] change sampleData to sampleMetadata in assignReadClasstoTranscripts for variable clarity --- R/bambu-assignDist.R | 4 ++-- R/bambu.R | 2 +- R/bambu_utilityFunctions.R | 12 ++++++------ 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/R/bambu-assignDist.R b/R/bambu-assignDist.R index 8e331223..c5908ace 100644 --- a/R/bambu-assignDist.R +++ b/R/bambu-assignDist.R @@ -3,7 +3,7 @@ #' @import data.table #' @noRd assignReadClasstoTranscripts <- function(readClassList, annotations, isoreParameters, - verbose, sampleData, demultiplexed, + verbose, sampleMetadata, demultiplexed, returnDistTable = FALSE, trackReads = TRUE) { if (is.character(readClassList)) readClassList <- readRDS(file = readClassList) metadata(readClassList)$readClassDist <- calculateDistTable(readClassList, annotations, isoreParameters, verbose, returnDistTable) @@ -17,7 +17,7 @@ assignReadClasstoTranscripts <- function(readClassList, annotations, isoreParame mutate(aval = 1) %>% data.table() #return non-em counts - ColData <- generateColData(readClassList, sampleData, demultiplexed) + ColData <- generateColData(readClassList, sampleMetadata, demultiplexed) quantData <- SummarizedExperiment(assays = SimpleList( counts = generateUniqueCounts(readClassDt, metadata(readClassList)$countMatrix, annotations)), rowRanges = annotations, diff --git a/R/bambu.R b/R/bambu.R index b391a2c3..0254ca4e 100644 --- a/R/bambu.R +++ b/R/bambu.R @@ -246,7 +246,7 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL, isoreParameters = isoreParameters, verbose = verbose, # for bulk data, there is one sampleData (keep sampleData[1]), for single-cell, there is one per sample - sampleData = if(length(sampleData) == 1) sampleData[1] else sampleData[i], + sampleMetadata = if(length(sampleData) == 1) sampleData[1] else sampleData[i], demultiplexed = demultiplexed, returnDistTable = returnDistTable, trackReads = trackReads diff --git a/R/bambu_utilityFunctions.R b/R/bambu_utilityFunctions.R index 3424698a..16c53f8e 100644 --- a/R/bambu_utilityFunctions.R +++ b/R/bambu_utilityFunctions.R @@ -292,18 +292,18 @@ combineCountSes <- function(countsSe, colDataList, annotations){ return(combinedCountsSe) } -#' Generate the colData using the external sampleData.csv provided by the user in the sampleData argument +#' Generate the colData using the external sampleMetadata.csv provided by the user in the sampleMetadata argument #' @param readClassList A list object containingmetadata about read classes. -#' @param sampleData A path to a CSV file or NULL/NA if there is no metadata for the sample. +#' @param sampleMetadata A path to a CSV file or NULL/NA if there is no metadata for the sample. #' @param demultiplexed Logical; indicates if data is demultiplexed. #' #' @return A DataFrame containing colData for the sample. #' @export -generateColData <- function(readClassList, sampleData, demultiplexed) { - sampleDataDf <- if (is.null(sampleData) || is.na(sampleData)) { +generateColData <- function(readClassList, sampleMetadata, demultiplexed) { + sampleMetadataDf <- if (is.null(sampleMetadata) || is.na(sampleMetadata)) { if (demultiplexed) tibble(barcode = character()) else tibble(sampleName = character()) } else { - read.csv(sampleData) + read.csv(sampleMetadata) } samples <- metadata(readClassList)$samples joinKey <- if (demultiplexed) "barcode" else "sampleName" @@ -317,7 +317,7 @@ generateColData <- function(readClassList, sampleData, demultiplexed) { } colData <- colData %>% - left_join(sampleDataDf, by = joinKey) %>% + left_join(sampleMetadataDf, by = joinKey) %>% as.data.frame() rownames(colData) <- colData$id From e80cb39ddce340fbd5d6e85ad067bfe0624a3959 Mon Sep 17 00:00:00 2001 From: lingminhao Date: Fri, 20 Feb 2026 14:04:19 +0800 Subject: [PATCH 12/22] fix bug: omit the check for NA elements in sampleData --- R/bambu_utilityFunctions.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/bambu_utilityFunctions.R b/R/bambu_utilityFunctions.R index 16c53f8e..0f641e52 100644 --- a/R/bambu_utilityFunctions.R +++ b/R/bambu_utilityFunctions.R @@ -157,7 +157,9 @@ checkInputs <- function(annotations, reads, readClass.outputDir, genomeSequence, } if(!is.null(sampleData)){ - if(!all(grepl(".csv^", sampleData))){stop("Not all paths for sample metadata files are .csv files")} + if (!all(grepl("\\.csv$", na.omit(sampleData)))){ + stop("Not all paths for sample metadata files are .csv files") + } if(length(sampleData)==1 & length(reads)>1){ # normally used for bulk samples message("Using the same sample metadata file for all input samples") } else if(length(reads)!=length(sampleData)){ # normally used for single-cell/spatial samples From 7d60435ae944acbaff84ad7b18a4edc6b2fd70be Mon Sep 17 00:00:00 2001 From: lingminhao Date: Mon, 2 Mar 2026 15:11:00 +0800 Subject: [PATCH 13/22] allow . csv/.tsv/.txt file input type in sampleData --- R/bambu_utilityFunctions.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/bambu_utilityFunctions.R b/R/bambu_utilityFunctions.R index 0f641e52..0f88a8b0 100644 --- a/R/bambu_utilityFunctions.R +++ b/R/bambu_utilityFunctions.R @@ -157,12 +157,12 @@ checkInputs <- function(annotations, reads, readClass.outputDir, genomeSequence, } if(!is.null(sampleData)){ - if (!all(grepl("\\.csv$", na.omit(sampleData)))){ - stop("Not all paths for sample metadata files are .csv files") + if (!all(grepl("\\.(csv|tsv|txt)$", na.omit(sampleData), ignore.case = TRUE))){ + stop("Not all paths for sample metadata files are .csv/.tsv/.txt files") } - if(length(sampleData)==1 & length(reads)>1){ # normally used for bulk samples + if(length(sampleData)==1 & length(reads)>1){ # one sample metadata for all samples message("Using the same sample metadata file for all input samples") - } else if(length(reads)!=length(sampleData)){ # normally used for single-cell/spatial samples + } else if(length(reads)!=length(sampleData)){ # multiple sample metadatas for multiple samples stop( "The number of sample metadata files does not match the number of input read files. ", "These two arguments (sampleData & reads) must be vectors of the same length. ", @@ -305,7 +305,7 @@ generateColData <- function(readClassList, sampleMetadata, demultiplexed) { sampleMetadataDf <- if (is.null(sampleMetadata) || is.na(sampleMetadata)) { if (demultiplexed) tibble(barcode = character()) else tibble(sampleName = character()) } else { - read.csv(sampleMetadata) + fread(sampleMetadata) } samples <- metadata(readClassList)$samples joinKey <- if (demultiplexed) "barcode" else "sampleName" From 8f7f506c89900c7985232d82977b53a844f33409 Mon Sep 17 00:00:00 2001 From: lingminhao Date: Fri, 27 Mar 2026 10:02:07 +0800 Subject: [PATCH 14/22] refactor: store sampleData in readClassList for parsing --- R/bambu-assignDist.R | 2 +- R/bambu-processReads.R | 34 ++++++++++++++++++++-------------- R/bambu_utilityFunctions.R | 14 ++++++++------ 3 files changed, 29 insertions(+), 21 deletions(-) diff --git a/R/bambu-assignDist.R b/R/bambu-assignDist.R index c5908ace..750c1e87 100644 --- a/R/bambu-assignDist.R +++ b/R/bambu-assignDist.R @@ -32,7 +32,7 @@ assignReadClasstoTranscripts <- function(readClassList, annotations, isoreParame metadata(quantData)$readClassDt <- readClassDt metadata(quantData)$countMatrix <- metadata(readClassList)$countMatrix metadata(quantData)$incompatibleCountMatrix <- metadata(readClassList)$incompatibleCountMatrix - metadata(quantData)$sampleNames <- metadata(readClassList)$sampleNames + metadata(quantData)$sampleName <- metadata(readClassList)$sampleData$sampleName if(returnDistTable) metadata(quantData)$distTable <- metadata(metadata(readClassList)$readClassDist)$distTableOld diff --git a/R/bambu-processReads.R b/R/bambu-processReads.R index 29082ad6..694cec03 100644 --- a/R/bambu-processReads.R +++ b/R/bambu-processReads.R @@ -174,13 +174,9 @@ bambu.processReadsByFile <- function(bam.file, genomeSequence, annotations, mcols(readGrgList)$id <- seq_along(readGrgList) sampleName <- names(bam.file)[1] - if(!isFALSE(demultiplexed)){ - mcols(readGrgList)$CB <- paste0(sampleName, '_', mcols(readGrgList)$CB) - } else{ - mcols(readGrgList)$CB <- sampleName - } - mcols(readGrgList)$CB <- as.factor(mcols(readGrgList)$CB) + if(!isFALSE(demultiplexed)){ + mcols(readGrgList)$CB <- as.factor(mcols(readGrgList)$CB) mcols(readGrgList)$sampleID <- as.numeric(mcols(readGrgList)$CB) } else { mcols(readGrgList)$sampleID <- index @@ -217,9 +213,19 @@ bambu.processReadsByFile <- function(bam.file, genomeSequence, annotations, fusionMode = fusionMode, verbose = verbose) - metadata(se)$samples <- names(bam.file)[1] - metadata(se)$sampleNames <- names(bam.file)[1] - if(!isFALSE(demultiplexed)) metadata(se)$samples <- levels(mcols(readGrgList)$CB) + if (demultiplexed) { + metadata(se)$sampleData <- tibble( + id = paste(names(bam.file)[1], levels(mcols(readGrgList)$CB), sep = '_'), + sampleName = names(bam.file)[1], + barcode = levels(mcols(readGrgList)$CB) + ) + } else{ + metadata(se)$sampleData <- tibble( + id = names(bam.file)[1], + sampleName = names(bam.file)[1] + ) + } + return(se) } @@ -402,12 +408,12 @@ splitReadClassFiles = function(readClassFile){ i = rep(seq_along(counts.table), lengths(counts.table)), j = as.numeric(names(unlist(counts.table))), x = unlist(counts.table), - dims = c(nrow(eqClasses), length(metadata(readClassFile)$samples))) + dims = c(nrow(eqClasses), length(metadata(readClassFile)$sampleData$id))) #incompatible counts distTable <- metadata(metadata(readClassFile)$readClassDist)$distTable.incompatible if(nrow(distTable)==0) { counts.incompatible <- sparseMatrix(i= 1, j = 1, x = 0, - dims = c(1, length(metadata(readClassFile)$samples))) + dims = c(1, length(metadata(readClassFile)$sampleData$id))) rownames(counts.incompatible) <- "TODO" } else{ distTable$sampleIDs <- rowData(readClassFile)$sampleIDs[match(distTable$readClassId, rownames(readClassFile))] @@ -418,11 +424,11 @@ splitReadClassFiles = function(readClassFile){ i = rep(seq_along(counts.table), lengths(counts.table)), j = as.numeric(names(unlist(counts.table))), x = unlist(counts.table), - dims = c(nrow(distTable), length(metadata(readClassFile)$samples))) - colnames(counts.incompatible) <- metadata(readClassFile)$samples + dims = c(nrow(distTable), length(metadata(readClassFile)$sampleData$id))) + colnames(counts.incompatible) <- metadata(readClassFile)$sampleData$id rownames(counts.incompatible) <- distTable$GENEID.i } - colnames(counts) <- metadata(readClassFile)$samples + colnames(counts) <- metadata(readClassFile)$sampleData$id metadata(readClassFile)$eqClassById <- eqClasses$eqClassById #rownames(counts) = eqClasses$eqClassById metadata(readClassFile)$countMatrix <- counts diff --git a/R/bambu_utilityFunctions.R b/R/bambu_utilityFunctions.R index 0f88a8b0..a80c70a3 100644 --- a/R/bambu_utilityFunctions.R +++ b/R/bambu_utilityFunctions.R @@ -307,15 +307,17 @@ generateColData <- function(readClassList, sampleMetadata, demultiplexed) { } else { fread(sampleMetadata) } - samples <- metadata(readClassList)$samples + joinKey <- if (demultiplexed) "barcode" else "sampleName" - - colData <- tibble(id = samples) + if (demultiplexed) { - colData$sampleName = sub('_[^_]+$', '', samples) - colData$barcode <- sub('.*_', '', samples) + colData <- tibble( + id = paste(metadata(readClassList)$sampleData$sampleName, metadata(readClassList)$sampleData$barcode, sep = '_'), + sampleName = metadata(readClassList)$sampleData$sampleName, + barcode = metadata(readClassList)$sampleData$barcode + ) } else{ - colData$sampleName <- samples + colData <- tibble(id = metadata(readClassList)$sampleData$sampleName, sampleName = metadata(readClassList)$sampleData$sampleName) } colData <- colData %>% From 547e0348abaa635486c51b5c41c90595872bcb86 Mon Sep 17 00:00:00 2001 From: lingminhao Date: Fri, 27 Mar 2026 10:16:50 +0800 Subject: [PATCH 15/22] update comment to describe CB/UMI parsing from bam --- R/prepareDataFromBam.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/prepareDataFromBam.R b/R/prepareDataFromBam.R index e9634a92..11ce950d 100755 --- a/R/prepareDataFromBam.R +++ b/R/prepareDataFromBam.R @@ -46,11 +46,13 @@ prepareDataFromBam <- function(bamFile, yieldSize = NULL, verbose = FALSE, if (!isFALSE(demultiplexed)){ # if demultiplexed is TRUE or a string path if(isTRUE(demultiplexed)){ # if demultiplexed is TRUE - mcols(readGrgList[[counter]])$CB <- case_when(grepl("^[^_]+_[^#]+#", names(readGrgList[[counter]]), perl = TRUE) ~ sub("_.*", "", names(readGrgList[[counter]])), # a checkpoint to see whether CB is contained in the name, with specific format CB_UMI#READNAME, + # a checkpoint to parse CB and UMI from the bam file, either from reads or CB/UMI tags. + # currently read name only accepts the format CB_UMI#READNAME (CB & UMI cannot have '_', otherwise parsing fails) + mcols(readGrgList[[counter]])$CB <- case_when(grepl("^[^_]+_[^#]+#", names(readGrgList[[counter]]), perl = TRUE) ~ sub("_.*", "", names(readGrgList[[counter]])), !is.na(mcols(alignmentInfo)$CB) ~ mcols(alignmentInfo)$CB, TRUE ~ NA) - mcols(readGrgList[[counter]])$UMI <- case_when(grepl("^[^_]+_[^#]+#", names(readGrgList[[counter]]), perl = TRUE) ~ sub("^[^_]+_([^#]+)#.*$", "\\1", names(readGrgList[[counter]])), # a checkpoint to see whether UMI is contained in the name, with specific format CB_UMI#READNAME, + mcols(readGrgList[[counter]])$UMI <- case_when(grepl("^[^_]+_[^#]+#", names(readGrgList[[counter]]), perl = TRUE) ~ sub("^[^_]+_([^#]+)#.*$", "\\1", names(readGrgList[[counter]])), !is.na(mcols(alignmentInfo)$UB) ~ mcols(alignmentInfo)$UB, TRUE ~ NA) } else{ # if demultiplexed is a string path From 586cdb36d76b4432dd127acdb4badd51d8afb7c6 Mon Sep 17 00:00:00 2001 From: lingminhao Date: Fri, 27 Mar 2026 10:30:38 +0800 Subject: [PATCH 16/22] change priority in CB & UMI name extraction from bam file --- R/prepareDataFromBam.R | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/R/prepareDataFromBam.R b/R/prepareDataFromBam.R index 11ce950d..980bd312 100755 --- a/R/prepareDataFromBam.R +++ b/R/prepareDataFromBam.R @@ -48,13 +48,18 @@ prepareDataFromBam <- function(bamFile, yieldSize = NULL, verbose = FALSE, # a checkpoint to parse CB and UMI from the bam file, either from reads or CB/UMI tags. # currently read name only accepts the format CB_UMI#READNAME (CB & UMI cannot have '_', otherwise parsing fails) - mcols(readGrgList[[counter]])$CB <- case_when(grepl("^[^_]+_[^#]+#", names(readGrgList[[counter]]), perl = TRUE) ~ sub("_.*", "", names(readGrgList[[counter]])), - !is.na(mcols(alignmentInfo)$CB) ~ mcols(alignmentInfo)$CB, - TRUE ~ NA) + mcols(readGrgList[[counter]])$CB <- case_when( + !is.na(mcols(alignmentInfo)$CB) ~ mcols(alignmentInfo)$CB, + grepl("^[^_]+_[^#]+#", names(readGrgList[[counter]]), perl = TRUE) ~ sub("_.*", "", names(readGrgList[[counter]])), + TRUE ~ NA + ) - mcols(readGrgList[[counter]])$UMI <- case_when(grepl("^[^_]+_[^#]+#", names(readGrgList[[counter]]), perl = TRUE) ~ sub("^[^_]+_([^#]+)#.*$", "\\1", names(readGrgList[[counter]])), - !is.na(mcols(alignmentInfo)$UB) ~ mcols(alignmentInfo)$UB, - TRUE ~ NA) + mcols(readGrgList[[counter]])$UMI <- case_when( + !is.na(mcols(alignmentInfo)$UB) ~ mcols(alignmentInfo)$UB, + grepl("^[^_]+_[^#]+#", names(readGrgList[[counter]]), perl = TRUE) ~ sub("^[^_]+_([^#]+)#.*$", "\\1", names(readGrgList[[counter]])), + TRUE ~ NA + ) + } else{ # if demultiplexed is a string path mcols(readGrgList[[counter]])$CB <- NA mcols(readGrgList[[counter]])$UMI <- NA From 4bd48bf163c45365626402ec6317cb3e6d0ec250 Mon Sep 17 00:00:00 2001 From: lingminhao Date: Fri, 27 Mar 2026 16:19:46 +0800 Subject: [PATCH 17/22] fix: standardize list access for all sample sizes --- R/bambu.R | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/R/bambu.R b/R/bambu.R index 0254ca4e..ac14344f 100644 --- a/R/bambu.R +++ b/R/bambu.R @@ -294,11 +294,7 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL, iter <- clustering } else{ #if clusters is a list - if(length(quantData)>1){ - iter <- clusters[[i]] #lowMemory mode - }else{ - iter <- clusters#do.call(c,clusters) - } + iter <- clusters[[i]] } } countsSeCompressed <- bplapply(iter, FUN = function(j){ # previous i changed to j to avoid duplicated assignment From 807ad06b96a87a60218b3b040b2bcbe2c011b974 Mon Sep 17 00:00:00 2001 From: lingminhao Date: Fri, 27 Mar 2026 17:02:08 +0800 Subject: [PATCH 18/22] remove redundant code --- R/bambu.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/bambu.R b/R/bambu.R index ac14344f..ab310f5b 100644 --- a/R/bambu.R +++ b/R/bambu.R @@ -317,8 +317,7 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL, ColNames <- c(ColNames, names(iter)) colData.all[[i]] <- data.frame( id = names(countsSeCompressed), - sampleName = names(countsSeCompressed), - row.names = names(countsSeCompressed) + sampleName = names(countsSeCompressed) ) } else{ ColNames <- c(ColNames, colnames(quantData_i)) From 44e791dc1b9ed81663712f53aec90dbd31789a20 Mon Sep 17 00:00:00 2001 From: lingminhao Date: Thu, 9 Apr 2026 15:00:56 +0800 Subject: [PATCH 19/22] correct order of extracted barcode to match devel_pre_v4 --- R/bambu-processReads.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/bambu-processReads.R b/R/bambu-processReads.R index 694cec03..b1442d1d 100644 --- a/R/bambu-processReads.R +++ b/R/bambu-processReads.R @@ -214,10 +214,11 @@ bambu.processReadsByFile <- function(bam.file, genomeSequence, annotations, verbose = verbose) if (demultiplexed) { + barcodes <- levels(mcols(readGrgList)$CB) metadata(se)$sampleData <- tibble( - id = paste(names(bam.file)[1], levels(mcols(readGrgList)$CB), sep = '_'), + id = paste(names(bam.file)[1], barcodes, sep = '_'), sampleName = names(bam.file)[1], - barcode = levels(mcols(readGrgList)$CB) + barcode = barcodes ) } else{ metadata(se)$sampleData <- tibble( From 446407afbf82109a61cf8af4e87096f3f193e59f Mon Sep 17 00:00:00 2001 From: lingminhao Date: Thu, 9 Apr 2026 15:49:01 +0800 Subject: [PATCH 20/22] tidy for cleaner code --- R/bambu-processReads.R | 9 +++------ R/bambu_utilityFunctions.R | 14 +++++++------- R/prepareDataFromBam.R | 5 +++++ 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/R/bambu-processReads.R b/R/bambu-processReads.R index b1442d1d..dd988584 100644 --- a/R/bambu-processReads.R +++ b/R/bambu-processReads.R @@ -54,7 +54,7 @@ bambu.processReads <- function(reads, annotations, genomeSequence, returnModel <- isoreParameters[["returnModel"]] min.exonOverlap <- isoreParameters[["min.exonOverlap"]] - if(processByBam){ # bulk mode + if(processByBam){ readClassList <- bplapply(seq_along(reads), function(i) { bambu.processReadsByFile(bam.file = reads[i], genomeSequence = genomeSequence,annotations = annotations, @@ -64,7 +64,7 @@ bambu.processReads <- function(reads, annotations, genomeSequence, processByChromosome = processByChromosome, trackReads = trackReads, fusionMode = fusionMode, demultiplexed = demultiplexed, cleanReads = cleanReads, dedupUMI = dedupUMI, index = 1, barcodesToFilter = barcodesToFilter)}, BPPARAM = bpParameters) - } else { # single cell mode + } else { readGrgList <- bplapply(seq_along(reads), function(i) { bambu.readsByFile(bam.file = reads[i], genomeSequence = genomeSequence,annotations = annotations, @@ -173,10 +173,7 @@ bambu.processReadsByFile <- function(bam.file, genomeSequence, annotations, mcols(readGrgList)$id <- seq_along(readGrgList) - sampleName <- names(bam.file)[1] - if(!isFALSE(demultiplexed)){ - mcols(readGrgList)$CB <- as.factor(mcols(readGrgList)$CB) mcols(readGrgList)$sampleID <- as.numeric(mcols(readGrgList)$CB) } else { mcols(readGrgList)$sampleID <- index @@ -409,7 +406,7 @@ splitReadClassFiles = function(readClassFile){ i = rep(seq_along(counts.table), lengths(counts.table)), j = as.numeric(names(unlist(counts.table))), x = unlist(counts.table), - dims = c(nrow(eqClasses), length(metadata(readClassFile)$sampleData$id))) + dims = c(nrow(eqClasses), nrow(metadata(readClassFile)$sampleData))) #incompatible counts distTable <- metadata(metadata(readClassFile)$readClassDist)$distTable.incompatible if(nrow(distTable)==0) { diff --git a/R/bambu_utilityFunctions.R b/R/bambu_utilityFunctions.R index a80c70a3..44c2d747 100644 --- a/R/bambu_utilityFunctions.R +++ b/R/bambu_utilityFunctions.R @@ -310,14 +310,14 @@ generateColData <- function(readClassList, sampleMetadata, demultiplexed) { joinKey <- if (demultiplexed) "barcode" else "sampleName" + colData <- tibble( + id = metadata(readClassList)$sampleData$id, + sampleName = metadata(readClassList)$sampleData$sampleName + ) + if (demultiplexed) { - colData <- tibble( - id = paste(metadata(readClassList)$sampleData$sampleName, metadata(readClassList)$sampleData$barcode, sep = '_'), - sampleName = metadata(readClassList)$sampleData$sampleName, - barcode = metadata(readClassList)$sampleData$barcode - ) - } else{ - colData <- tibble(id = metadata(readClassList)$sampleData$sampleName, sampleName = metadata(readClassList)$sampleData$sampleName) + colData <- colData %>% + mutate(barcode = metadata(readClassList)$sampleData$barcode) } colData <- colData %>% diff --git a/R/prepareDataFromBam.R b/R/prepareDataFromBam.R index 980bd312..2c4486f3 100755 --- a/R/prepareDataFromBam.R +++ b/R/prepareDataFromBam.R @@ -101,6 +101,11 @@ prepareDataFromBam <- function(bamFile, yieldSize = NULL, verbose = FALSE, } else { readGrgList <- readGrgList[[1]] } + + if (demultiplexed){ + mcols(readGrgList)$CB <- as.factor(mcols(readGrgList)$CB) + } + # remove microexons of width 1bp from list readGrgList <- readGrgList <- readGrgList[sum(width(readGrgList)) > 1] numNoCBs <- sum(is.na(mcols(readGrgList)$CB)) From 93b2e7d482155b79f1156eaf330b5e9fc4366371 Mon Sep 17 00:00:00 2001 From: lingminhao Date: Thu, 9 Apr 2026 16:44:53 +0800 Subject: [PATCH 21/22] add back row.names to prevent missing colnames in se_pseudobulk --- R/bambu.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/bambu.R b/R/bambu.R index ab310f5b..cf61a555 100644 --- a/R/bambu.R +++ b/R/bambu.R @@ -317,7 +317,8 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL, ColNames <- c(ColNames, names(iter)) colData.all[[i]] <- data.frame( id = names(countsSeCompressed), - sampleName = names(countsSeCompressed) + sampleName = names(countsSeCompressed), + row.names = names(countsSeCompressed) ) } else{ ColNames <- c(ColNames, colnames(quantData_i)) From 01f4ff8db7fab687c8097b3aecaa33a0eac25908 Mon Sep 17 00:00:00 2001 From: lingminhao Date: Thu, 9 Apr 2026 19:34:15 +0800 Subject: [PATCH 22/22] correct order of extracted barcode to match devel_pre_v4 --- R/prepareDataFromBam.R | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/R/prepareDataFromBam.R b/R/prepareDataFromBam.R index 2c4486f3..81beb4b2 100755 --- a/R/prepareDataFromBam.R +++ b/R/prepareDataFromBam.R @@ -68,10 +68,6 @@ prepareDataFromBam <- function(bamFile, yieldSize = NULL, verbose = FALSE, mcols(readGrgList[[counter]])$UMI <- readMap[,3][match(names(readGrgList[[counter]]),readMap[,1])] } } - cells <- unique(c(cells, mcols(readGrgList[[counter]])$CB)) - mcols(readGrgList[[counter]])$CB <- factor(mcols(readGrgList[[counter]])$CB, levels = cells) - umi <- unique(c(umi, mcols(readGrgList[[counter]])$UMI)) - mcols(readGrgList[[counter]])$UMI <- factor(mcols(readGrgList[[counter]])$UMI, levels = umi) } if(cleanReads){ softClip5Prime <- clipFunction(cigarData = GenomicAlignments::cigar(alignmentInfo), grep_pattern = '^(\\d*)[S].*', replace_pattern = '\\1') @@ -103,7 +99,7 @@ prepareDataFromBam <- function(bamFile, yieldSize = NULL, verbose = FALSE, } if (demultiplexed){ - mcols(readGrgList)$CB <- as.factor(mcols(readGrgList)$CB) + mcols(readGrgList)$CB <- factor(mcols(readGrgList)$CB, levels = sort(unique(mcols(readGrgList)$CB))) } # remove microexons of width 1bp from list