From 14f5547a8d632831e95af914388a9c1d4b97cee6 Mon Sep 17 00:00:00 2001
From: lingminhao <lingminhao31@gmail.com>
Date: Sun, 15 Feb 2026 21:33:47 +0800
Subject: [PATCH 01/22] tidy up code

---
 R/bambu-processReads.R                        |  1 -
 ...processReads_utilityConstructReadClasses.R | 22 +++++++++++--------
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/R/bambu-processReads.R b/R/bambu-processReads.R
index f144c036..29082ad6 100644
--- a/R/bambu-processReads.R
+++ b/R/bambu-processReads.R
@@ -307,7 +307,6 @@ constructReadClasses <- function(readGrgList, genomeSequence, annotations,
     stranded = FALSE, min.readCount = 2, 
     fitReadClassModel = TRUE, min.exonOverlap = 10, defaultModels = NULL, returnModel = FALSE, 
     verbose = FALSE, processByChromosome = FALSE, trackReads = FALSE, fusionMode = FALSE){
-    warnings <- c() ###TODO
     
     if(processByChromosome){
         # construct read classes for each chromosome seperately 
diff --git a/R/bambu-processReads_utilityConstructReadClasses.R b/R/bambu-processReads_utilityConstructReadClasses.R
index fec9adbf..a9610045 100644
--- a/R/bambu-processReads_utilityConstructReadClasses.R
+++ b/R/bambu-processReads_utilityConstructReadClasses.R
@@ -25,12 +25,14 @@ isore.constructReadClasses <- function(readGrgList, unlisted_junctions,
             Please report this")
     start.ptm <- proc.time()
     if(!is.null(uniqueJunctions)){
-        exonsByRC.spliced <- constructSplicedReadClasses(
-            uniqueJunctions = uniqueJunctions,
-            unlisted_junctions = unlisted_junctions,
-            readGrgList = readGrgList,
-            stranded = stranded)}
-    else{exonsByRC.spliced = GRangesList()}
+      exonsByRC.spliced <- constructSplicedReadClasses(
+        uniqueJunctions = uniqueJunctions,
+        unlisted_junctions = unlisted_junctions,
+        readGrgList = readGrgList,
+        stranded = stranded)
+    } else{
+      exonsByRC.spliced = GRangesList()
+    }
     end.ptm <- proc.time()
     rm(readGrgList, unlisted_junctions, uniqueJunctions)
     if (verbose) 
@@ -38,11 +40,13 @@ isore.constructReadClasses <- function(readGrgList, unlisted_junctions,
     "spliced junctions in ", round((end.ptm - start.ptm)[3] / 60, 1)," mins.")
     if(length(reads.singleExon)==0) { 
         exonsByRC.unspliced <- NULL
-    } else {exonsByRC.unspliced <- constructUnsplicedReadClasses(reads.singleExon, 
-        annotations, exonsByRC.spliced, stranded, verbose)}
+    } else {
+      exonsByRC.unspliced <- constructUnsplicedReadClasses(reads.singleExon, 
+                             annotations, exonsByRC.spliced, stranded, verbose)
+    }
     exonsByRC <- c(exonsByRC.spliced, exonsByRC.unspliced)
     colDataDf <- DataFrame(name = runName, row.names = runName)
-    #TODO later remove assays = SimpleList(counts = counts)
+
     counts <- matrix(mcols(exonsByRC)$readCount,
                      dimnames = list(names(exonsByRC), runName))
     se <- SummarizedExperiment(assays = SimpleList(counts = counts),

From d63fdae91b762f33793b13f6db2c102969be9f56 Mon Sep 17 00:00:00 2001
From: lingminhao <lingminhao31@gmail.com>
Date: Sun, 15 Feb 2026 21:37:13 +0800
Subject: [PATCH 02/22] refactor generateColData to take sampleData as argument

---
 R/bambu_utilityFunctions.R | 70 ++++++++++++++++----------------------
 1 file changed, 30 insertions(+), 40 deletions(-)

diff --git a/R/bambu_utilityFunctions.R b/R/bambu_utilityFunctions.R
index 5deac8ba..965f72e8 100644
--- a/R/bambu_utilityFunctions.R
+++ b/R/bambu_utilityFunctions.R
@@ -288,47 +288,37 @@ combineCountSes <- function(countsSe, annotations){
     return(countsSe)
 }
 
-#' Generate the coldata for se options using colnames, and other option inputs
-#' @noRd
-generateColData <- function(sampleNames, clusters, demultiplexed, spatial){
-    ColData <- DataFrame(id = sampleNames)
-    if(!isFALSE(demultiplexed) & is.null(clusters)){
-        ColData <- DataFrame(id = sampleNames, 
-                        sampleName = gsub("_[^_]+$","", sampleNames, perl = TRUE), 
-                        Barcode = gsub(".*_(?=[^_]*$)","", sampleNames, perl = TRUE))
-    }
-    if(!is.null(spatial) & is.null(clusters)){
-        ColData$x_coordinate <- NA
-        ColData$y_coordinate <- NA
-        if(length(spatial)==1){
-            # the following line takes a regular delimited file as input
-            # it can either has header or without header
-            # it can also be compressed 
-            bc_coords <- fread(spatial, 
-                col.names = c("Barcode", "x_coordinate", "y_coordinate"),
-                data.table = FALSE)
-                # DataFrame(read.table(gzfile(spatial),
-                # col.names = c("Barcode", "x_coordinate", "y_coordinate")))
-            bcMatch <- match(ColData$Barcode, bc_coords$Barcode)
-            ColData$x_coordinate <- bc_coords$x_coordinate[bcMatch]
-            ColData$y_coordinate <- bc_coords$y_coordinate[bcMatch]
+#' Generate the colData using the external sampleData.csv provided by the user in the sampleData argument
+#' @param readClassList A list object containingmetadata about read classes.
+#' @param sampleData A path to a CSV file or NULL/NA if there is no metadata for the sample.
+#' @param demultiplexed Logical; indicates if data is demultiplexed.
+#'
+#' @return A DataFrame containing colData for the sample.
+#' @export
+generateColData <- function(readClassList, sampleData, demultiplexed) {
+  sampleDataDf <- if (is.null(sampleData) || is.na(sampleData)) {
+    if (demultiplexed) tibble(barcode = character()) else tibble(sampleName = character())
+  } else {
+    read.csv(sampleData)
+  }
+  samples <- metadata(readClassList)$samples
+  joinKey <- if (demultiplexed) "barcode" else "sampleName"
+  
+  colData <- tibble(id = samples)
+  if (demultiplexed) {
+    colData$sampleName = sub('_[^_]+$', '', samples)
+    colData$barcode <- sub('.*_', '', samples)
         } else{
-            spatial.unique <- unique(spatial)
-            for(whitelist in spatial.unique){
-                i <- which(spatial.unique==whitelist)
-                bc_coords <- fread(whitelist, 
-                                   col.names = c("Barcode", "x_coordinate", "y_coordinate"),
-                                   data.table = FALSE)
-                    # DataFrame(read.table(gzfile(whitelist), 
-                    # col.names = c("Barcode", "x_coordinate", "y_coordinate")))
-                bcSampleIndex <- ColData$sampleName %in% sampleNames[i]
-                bcMatch <- match(ColData$Barcode[bcSampleIndex], bc_coords$Barcode)
-                ColData$x_coordinate[bcSampleIndex] <- bc_coords$x_coordinate[bcMatch]
-                ColData$y_coordinate[bcSampleIndex] <- bc_coords$y_coordinate[bcMatch]
-            }
-        }
-    }
-    return(ColData)
+    colData$sampleName <- samples
+  }
+  
+  colData <- colData %>%
+    left_join(sampleDataDf, by = joinKey) %>%
+    as.data.frame()
+  
+  rownames(colData) <- colData$id
+  
+  colData
 }
 
 # Quick wrapper function (https://stackoverflow.com/questions/13273833/merging-multiple-data-tables)

From f86ce9c01dd2751c97d91d7ae6bd017c4c01e045 Mon Sep 17 00:00:00 2001
From: lingminhao <lingminhao31@gmail.com>
Date: Sun, 15 Feb 2026 21:43:08 +0800
Subject: [PATCH 03/22] refactor combineCountSes to inherit colData directly
 from quantData

---
 R/bambu.R                  | 10 +++-------
 R/bambu_utilityFunctions.R | 23 ++++++++++++-----------
 2 files changed, 15 insertions(+), 18 deletions(-)

diff --git a/R/bambu.R b/R/bambu.R
index edd8a8cc..78a7df03 100644
--- a/R/bambu.R
+++ b/R/bambu.R
@@ -315,8 +315,9 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
             }
             countsSeCompressed.all <- c(countsSeCompressed.all, countsSeCompressed)
         }
-        countsSeCompressed.all$colnames <- ColNames            
-        countsSe <- combineCountSes(countsSeCompressed.all, annotations)
+        names(countsSeCompressed.all) <- ColNames   
+        
+        countsSe <- combineCountSes(countsSeCompressed.all, colData.all, annotations)
         if(returnDistTable){
             distTables = list()
             for(i in seq_along(quantData)){
@@ -324,11 +325,6 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
             }
             metadata(countsSe)$distTables <- distTables
         }
-        #metadata(countsSe)$warnings = warnings
-
-        ColData <- generateColData(colnames(countsSe), clusters, demultiplexed, spatial)
-        colData(countsSe) <- ColData
-        colnames(countsSe) <- ColData[,1]
         return(countsSe)
     }
   }
\ No newline at end of file
diff --git a/R/bambu_utilityFunctions.R b/R/bambu_utilityFunctions.R
index 965f72e8..b5960c00 100644
--- a/R/bambu_utilityFunctions.R
+++ b/R/bambu_utilityFunctions.R
@@ -255,13 +255,11 @@ calculateDistTable <- function(readClassList, annotations, isoreParameters, verb
         return(readClassDist)
 }
 
-#' Combine count se object while preserving the metadata objects
+#' Combine combined count se object from multiple samples, cells or spatial locations
 #' @noRd
-combineCountSes <- function(countsSe, annotations){
-    countsData <- c("counts", "CPM", "fullLengthCounts", 
-                    "uniqueCounts", "incompatibleCounts")
-    sampleNames <- countsSe$colnames
-    countsSe$colnames <- NULL
+combineCountSes <- function(countsSe, colData, annotations){
+    countsData <- c("counts", "CPM", "fullLengthCounts", "uniqueCounts", "incompatibleCounts")
+    sampleNames <- names(countsSe)
     countsDataMat <- lapply(countsData, FUN = function(k){
         countsVecList <- lapply(countsSe, function(j){j[[k]]})
         countsMat <- sparseMatrix(i = unlist(lapply(countsVecList, function(j) j@i)),
@@ -279,13 +277,16 @@ combineCountSes <- function(countsSe, annotations){
         return(countsMat)
     })
     names(countsDataMat) <- countsData
-    countsSe <- SummarizedExperiment(assays = SimpleList(counts = countsDataMat$counts, 
+    combinedCountsSe <- SummarizedExperiment(assays = SimpleList(counts = countsDataMat$counts, 
                                                         CPM = countsDataMat$CPM, 
                                                         fullLengthCounts = countsDataMat$fullLengthCounts, 
                                                         uniqueCounts = countsDataMat$uniqueCounts))
-    metadata(countsSe)$incompatibleCounts <- countsDataMat$incompatibleCounts
-    rowRanges(countsSe) <- annotations
-    return(countsSe)
+    metadata(combinedCountsSe)$incompatibleCounts <- countsDataMat$incompatibleCounts
+    rowRanges(combinedCountsSe) <- annotations
+
+    colData(combinedCountsSe) <- DataFrame(bind_rows(colData))
+    
+    return(combinedCountsSe)
 }
 
 #' Generate the colData using the external sampleData.csv provided by the user in the sampleData argument
@@ -308,7 +309,7 @@ generateColData <- function(readClassList, sampleData, demultiplexed) {
   if (demultiplexed) {
     colData$sampleName = sub('_[^_]+$', '', samples)
     colData$barcode <- sub('.*_', '', samples)
-        } else{
+  } else{
     colData$sampleName <- samples
   }
   

From ed26b1ffdbe884780ccaa788d0a45f2dd0c24abb Mon Sep 17 00:00:00 2001
From: lingminhao <lingminhao31@gmail.com>
Date: Sun, 15 Feb 2026 21:44:04 +0800
Subject: [PATCH 04/22] update colData for pseudobulk single-cell

---
 R/bambu.R | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/R/bambu.R b/R/bambu.R
index 78a7df03..82ae0253 100644
--- a/R/bambu.R
+++ b/R/bambu.R
@@ -262,6 +262,7 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
         start.ptm <- proc.time()
         countsSeCompressed.all <- NULL
         ColNames <- c()
+        colData.all <- list()
         for(i in seq_along(quantData)){
             quantData_i <- quantData[[i]]
             #load in the barcode clustering from file if provided
@@ -310,8 +311,14 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
             message("Total Time ", round((end.ptm - start.ptm)[3] / 60, 3), " mins.")
             if(!is.null(clusters)){
                 ColNames <- c(ColNames, names(iter))
+                colData.all[[i]] <- data.frame(
+                  id = names(countsSeCompressed), 
+                  sampleName = names(countsSeCompressed), 
+                  row.names = names(countsSeCompressed)
+                )
             } else{
                 ColNames <- c(ColNames, colnames(quantData_i)) 
+                colData.all[[i]] <- data.frame(colData(quantData_i))
             }
             countsSeCompressed.all <- c(countsSeCompressed.all, countsSeCompressed)
         }

From f9554b5096351e3c8a7d652c30adc7637e264f64 Mon Sep 17 00:00:00 2001
From: lingminhao <lingminhao31@gmail.com>
Date: Sun, 15 Feb 2026 21:44:37 +0800
Subject: [PATCH 05/22] add sampleData argument

---
 R/bambu-assignDist.R |  4 ++--
 R/bambu.R            | 26 +++++++++++++++-----------
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/R/bambu-assignDist.R b/R/bambu-assignDist.R
index be73e0a8..d5c626ed 100644
--- a/R/bambu-assignDist.R
+++ b/R/bambu-assignDist.R
@@ -3,7 +3,7 @@
 #' @import data.table
 #' @noRd
 assignReadClasstoTranscripts <- function(readClassList, annotations, isoreParameters, 
-                                        verbose, demultiplexed, spatial, 
+                                        verbose, sampleData, demultiplexed, spatial, 
                                         returnDistTable = FALSE, trackReads = TRUE) {
     if (is.character(readClassList)) readClassList <- readRDS(file = readClassList)
     metadata(readClassList)$readClassDist <- calculateDistTable(readClassList, annotations, isoreParameters, verbose, returnDistTable)
@@ -17,7 +17,7 @@ assignReadClasstoTranscripts <- function(readClassList, annotations, isoreParame
         mutate(aval = 1) %>%
         data.table()
     #return non-em counts
-    ColData <- generateColData(colnames(metadata(readClassList)$countMatrix), clusters = NULL, demultiplexed, spatial)
+    ColData <- generateColData(readClassList, sampleData, demultiplexed)
     quantData <- SummarizedExperiment(assays = SimpleList(
         counts = generateUniqueCounts(readClassDt, metadata(readClassList)$countMatrix, annotations)),
         rowRanges = annotations,
diff --git a/R/bambu.R b/R/bambu.R
index 82ae0253..c30b3553 100644
--- a/R/bambu.R
+++ b/R/bambu.R
@@ -138,7 +138,7 @@
 bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
     mode = NULL, opt.discovery = NULL, opt.em = NULL, rcOutDir = NULL, discovery = TRUE, 
     assignDist = TRUE, quant = TRUE, stranded = FALSE,  ncore = 1, yieldSize = NULL,  
-    trackReads = FALSE, returnDistTable = FALSE, lowMemory = FALSE,
+    trackReads = FALSE, returnDistTable = FALSE, lowMemory = FALSE, sampleData = NULL,
     fusionMode = FALSE, verbose = FALSE, demultiplexed = FALSE, spatial = NULL, quantData = NULL,
     sampleNames = NULL, cleanReads = FALSE, dedupUMI = FALSE, barcodesToFilter = NULL, clusters = NULL,
     processByChromosome = FALSE, processByBam = TRUE) {
@@ -234,16 +234,20 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
         }
         if(assignDist){
             message("--- Start calculating equivilance classes ---")
-            quantData <- bplapply(readClassList, 
-                                  FUN = assignReadClasstoTranscripts, 
-                                  annotations = annotations, 
-                                  isoreParameters = isoreParameters, 
-                                  verbose = verbose, 
-                                  demultiplexed = demultiplexed, 
-                                  spatial = spatial, 
-                                  returnDistTable = returnDistTable,
-                                  trackReads = trackReads,
-                                  BPPARAM = bpParameters)
+            quantData <- bplapply(seq_along(readClassList), function(i){
+              assignReadClasstoTranscripts(
+                readClassList = readClassList[[i]],
+                annotations = annotations, 
+                isoreParameters = isoreParameters, 
+                verbose = verbose, 
+                # for bulk data, there is one sampleData (keep sampleData[1]), for single-cell, there is one per sample
+                sampleData = if(length(sampleData) == 1) sampleData[1] else sampleData[i],
+                demultiplexed = demultiplexed, 
+                spatial = spatial, 
+                returnDistTable = returnDistTable,
+                trackReads = trackReads
+              )
+            }, BPPARAM = bpParameters)
             if (!quant) return(quantData)
         }
     }

From 03078553b0b6301cafd4bae5990ae96e006d19c8 Mon Sep 17 00:00:00 2001
From: lingminhao <lingminhao31@gmail.com>
Date: Thu, 19 Feb 2026 09:02:25 +0800
Subject: [PATCH 06/22] remove spatial argument from bambu

---
 R/bambu-assignDist.R       |  2 +-
 R/bambu.R                  |  3 +--
 R/bambu_utilityFunctions.R | 14 +++++++-------
 3 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/R/bambu-assignDist.R b/R/bambu-assignDist.R
index d5c626ed..8e331223 100644
--- a/R/bambu-assignDist.R
+++ b/R/bambu-assignDist.R
@@ -3,7 +3,7 @@
 #' @import data.table
 #' @noRd
 assignReadClasstoTranscripts <- function(readClassList, annotations, isoreParameters, 
-                                        verbose, sampleData, demultiplexed, spatial, 
+                                        verbose, sampleData, demultiplexed,
                                         returnDistTable = FALSE, trackReads = TRUE) {
     if (is.character(readClassList)) readClassList <- readRDS(file = readClassList)
     metadata(readClassList)$readClassDist <- calculateDistTable(readClassList, annotations, isoreParameters, verbose, returnDistTable)
diff --git a/R/bambu.R b/R/bambu.R
index c30b3553..dc3271ae 100644
--- a/R/bambu.R
+++ b/R/bambu.R
@@ -173,7 +173,7 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
         annotations <- checkInputs(annotations, reads,
             readClass.outputDir = rcOutDir, 
             genomeSequence = genome, discovery = discovery, 
-            sampleNames = sampleNames, spatial = spatial,quantData = quantData)
+            sampleNames = sampleNames, sampleData = sampleData, quantData = quantData)
     }
     isoreParameters <- setIsoreParameters(isoreParameters = opt.discovery)
     #below line is to be compatible with earlier version of running bambu
@@ -243,7 +243,6 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
                 # for bulk data, there is one sampleData (keep sampleData[1]), for single-cell, there is one per sample
                 sampleData = if(length(sampleData) == 1) sampleData[1] else sampleData[i],
                 demultiplexed = demultiplexed, 
-                spatial = spatial, 
                 returnDistTable = returnDistTable,
                 trackReads = trackReads
               )
diff --git a/R/bambu_utilityFunctions.R b/R/bambu_utilityFunctions.R
index b5960c00..8740cc52 100644
--- a/R/bambu_utilityFunctions.R
+++ b/R/bambu_utilityFunctions.R
@@ -73,7 +73,7 @@ updateParameters <- function(Parameters, Parameters.default) {
 #' @importFrom methods is
 #' @noRd
 checkInputs <- function(annotations, reads, readClass.outputDir, genomeSequence, 
-                        discovery, sampleNames, spatial, quantData){
+                        discovery, sampleNames, quantData){
     # ===# Check annotation inputs #===#
     if (!is.null(annotations)) {
         if (is(annotations, "CompressedGRangesList")) {
@@ -156,12 +156,12 @@ checkInputs <- function(annotations, reads, readClass.outputDir, genomeSequence,
         }
     }
 
-    if(!is.null(spatial)){
-        #if(!all(grepl(".tsv^", spatial))){stop("Not all paths for spatial are .tsv files")}
-        if(length(spatial)==1 & length(reads)>1){
-            warning("Using the same whitelist and coordinates for all input samples")
-        } else if(length(reads)!=length(spatial)){
-            stop("There are not the same number spatial whitelist paths as input files to reads. ",
+    if(!is.null(sampleData)){
+        if(!all(grepl(".csv^", sampleData))){stop("Not all paths for sample metadata files are .csv files")}
+        if(length(sampleData)==1 & length(reads)>1){
+            warning("Using the same sample metadata file for all input samples")
+        } else if(length(reads)!=length(sampleData)){
+            stop("There are not the same number sample metadata files paths as input files to reads. ",
             "Make sure these two arguments are vectors of the same length")
         }
     }

From 8e25154d48e740020099334b11918cd37a40c1b4 Mon Sep 17 00:00:00 2001
From: lingminhao <lingminhao31@gmail.com>
Date: Thu, 19 Feb 2026 09:09:35 +0800
Subject: [PATCH 07/22] rename colData  parameter combineCountSes to
 colDataList (avoid same name as function)

---
 R/bambu_utilityFunctions.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/bambu_utilityFunctions.R b/R/bambu_utilityFunctions.R
index 8740cc52..7b3c647f 100644
--- a/R/bambu_utilityFunctions.R
+++ b/R/bambu_utilityFunctions.R
@@ -257,7 +257,7 @@ calculateDistTable <- function(readClassList, annotations, isoreParameters, verb
 
 #' Combine combined count se object from multiple samples, cells or spatial locations
 #' @noRd
-combineCountSes <- function(countsSe, colData, annotations){
+combineCountSes <- function(countsSe, colDataList, annotations){
     countsData <- c("counts", "CPM", "fullLengthCounts", "uniqueCounts", "incompatibleCounts")
     sampleNames <- names(countsSe)
     countsDataMat <- lapply(countsData, FUN = function(k){
@@ -284,7 +284,7 @@ combineCountSes <- function(countsSe, colData, annotations){
     metadata(combinedCountsSe)$incompatibleCounts <- countsDataMat$incompatibleCounts
     rowRanges(combinedCountsSe) <- annotations
 
-    colData(combinedCountsSe) <- DataFrame(bind_rows(colData))
+    colData(combinedCountsSe) <- DataFrame(bind_rows(colDataList))
     
     return(combinedCountsSe)
 }

From c29eab421ac9133e74e5b4a5f484fc9677939c39 Mon Sep 17 00:00:00 2001
From: lingminhao <lingminhao31@gmail.com>
Date: Thu, 19 Feb 2026 09:32:16 +0800
Subject: [PATCH 08/22] update bambu sampleData parameter description

---
 R/bambu.R | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/R/bambu.R b/R/bambu.R
index dc3271ae..ef18fed3 100644
--- a/R/bambu.R
+++ b/R/bambu.R
@@ -93,6 +93,11 @@
 #' distTables. The output is a list with an entry for each sample.
 #' @param lowMemory Read classes will be processed by chromosomes when lowMemory 
 #' is specified. This option provides an efficient way to process big samples.
+#' @param sampleData A character vector of paths to metadata CSV files (or \code{NA} if 
+#' unavailable for specific samples); defaults to \code{NULL}. Files must contain a 
+#' "sampleName" column for bulk data or a "barcode" column for single-cell/spatial data. 
+#' For bulk data, one metadata CSV file for all samples is sufficient, whereas single-cell/spatial 
+#' data requires one metadata CSV file per sample.
 #' @param fusionMode A logical variable indicating whether run in fusion mode
 #' @param verbose A logical variable indicating whether processing messages will
 #' be printed.

From bfa131e583b93ca40704cab5fe395447f67a21bb Mon Sep 17 00:00:00 2001
From: lingminhao <lingminhao31@gmail.com>
Date: Thu, 19 Feb 2026 09:46:54 +0800
Subject: [PATCH 09/22] refine sampleData input check description

---
 R/bambu_utilityFunctions.R | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/R/bambu_utilityFunctions.R b/R/bambu_utilityFunctions.R
index 7b3c647f..1dec180a 100644
--- a/R/bambu_utilityFunctions.R
+++ b/R/bambu_utilityFunctions.R
@@ -158,11 +158,14 @@ checkInputs <- function(annotations, reads, readClass.outputDir, genomeSequence,
 
     if(!is.null(sampleData)){
         if(!all(grepl(".csv^", sampleData))){stop("Not all paths for sample metadata files are .csv files")}
-        if(length(sampleData)==1 & length(reads)>1){
-            warning("Using the same sample metadata file for all input samples")
-        } else if(length(reads)!=length(sampleData)){
-            stop("There are not the same number sample metadata files paths as input files to reads. ",
-            "Make sure these two arguments are vectors of the same length")
+        if(length(sampleData)==1 & length(reads)>1){ # normally used for bulk samples
+            message("Using the same sample metadata file for all input samples")
+        } else if(length(reads)!=length(sampleData)){ # normally used for single-cell/spatial samples
+            stop(
+                "The number of sample metadata files does not match the number of input read files. ",
+                "These two arguments (sampleData & reads) must be vectors of the same length. ",
+                "If a specific sample has no metadata, please use 'NA' as a placeholder in the sampleData vector."
+            )
         }
     }
     return(annotations)

From 9c2d8bab127646f26ebfff6837cf4beec5a15cbf Mon Sep 17 00:00:00 2001
From: lingminhao <lingminhao31@gmail.com>
Date: Fri, 20 Feb 2026 14:00:53 +0800
Subject: [PATCH 10/22] tidy up spatial & sampleData argument

---
 R/bambu.R                  | 2 +-
 R/bambu_utilityFunctions.R | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/bambu.R b/R/bambu.R
index ef18fed3..b391a2c3 100644
--- a/R/bambu.R
+++ b/R/bambu.R
@@ -144,7 +144,7 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
     mode = NULL, opt.discovery = NULL, opt.em = NULL, rcOutDir = NULL, discovery = TRUE, 
     assignDist = TRUE, quant = TRUE, stranded = FALSE,  ncore = 1, yieldSize = NULL,  
     trackReads = FALSE, returnDistTable = FALSE, lowMemory = FALSE, sampleData = NULL,
-    fusionMode = FALSE, verbose = FALSE, demultiplexed = FALSE, spatial = NULL, quantData = NULL,
+    fusionMode = FALSE, verbose = FALSE, demultiplexed = FALSE, quantData = NULL,
     sampleNames = NULL, cleanReads = FALSE, dedupUMI = FALSE, barcodesToFilter = NULL, clusters = NULL,
     processByChromosome = FALSE, processByBam = TRUE) {
     message(paste0("Running Bambu-v", "3.9.0"))
diff --git a/R/bambu_utilityFunctions.R b/R/bambu_utilityFunctions.R
index 1dec180a..3424698a 100644
--- a/R/bambu_utilityFunctions.R
+++ b/R/bambu_utilityFunctions.R
@@ -73,7 +73,7 @@ updateParameters <- function(Parameters, Parameters.default) {
 #' @importFrom methods is
 #' @noRd
 checkInputs <- function(annotations, reads, readClass.outputDir, genomeSequence, 
-                        discovery, sampleNames, quantData){
+                        discovery, sampleNames, sampleData, quantData){
     # ===# Check annotation inputs #===#
     if (!is.null(annotations)) {
         if (is(annotations, "CompressedGRangesList")) {

From fda300b1b9200188bf5d81e0e3b1bbf637c3d079 Mon Sep 17 00:00:00 2001
From: lingminhao <lingminhao31@gmail.com>
Date: Fri, 20 Feb 2026 14:03:42 +0800
Subject: [PATCH 11/22] change sampleData to sampleMetadata in
 assignReadClasstoTranscripts for variable clarity

---
 R/bambu-assignDist.R       |  4 ++--
 R/bambu.R                  |  2 +-
 R/bambu_utilityFunctions.R | 12 ++++++------
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/R/bambu-assignDist.R b/R/bambu-assignDist.R
index 8e331223..c5908ace 100644
--- a/R/bambu-assignDist.R
+++ b/R/bambu-assignDist.R
@@ -3,7 +3,7 @@
 #' @import data.table
 #' @noRd
 assignReadClasstoTranscripts <- function(readClassList, annotations, isoreParameters, 
-                                        verbose, sampleData, demultiplexed,
+                                        verbose, sampleMetadata, demultiplexed,
                                         returnDistTable = FALSE, trackReads = TRUE) {
     if (is.character(readClassList)) readClassList <- readRDS(file = readClassList)
     metadata(readClassList)$readClassDist <- calculateDistTable(readClassList, annotations, isoreParameters, verbose, returnDistTable)
@@ -17,7 +17,7 @@ assignReadClasstoTranscripts <- function(readClassList, annotations, isoreParame
         mutate(aval = 1) %>%
         data.table()
     #return non-em counts
-    ColData <- generateColData(readClassList, sampleData, demultiplexed)
+    ColData <- generateColData(readClassList, sampleMetadata, demultiplexed)
     quantData <- SummarizedExperiment(assays = SimpleList(
         counts = generateUniqueCounts(readClassDt, metadata(readClassList)$countMatrix, annotations)),
         rowRanges = annotations,
diff --git a/R/bambu.R b/R/bambu.R
index b391a2c3..0254ca4e 100644
--- a/R/bambu.R
+++ b/R/bambu.R
@@ -246,7 +246,7 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
                 isoreParameters = isoreParameters, 
                 verbose = verbose, 
                 # for bulk data, there is one sampleData (keep sampleData[1]), for single-cell, there is one per sample
-                sampleData = if(length(sampleData) == 1) sampleData[1] else sampleData[i],
+                sampleMetadata = if(length(sampleData) == 1) sampleData[1] else sampleData[i],
                 demultiplexed = demultiplexed, 
                 returnDistTable = returnDistTable,
                 trackReads = trackReads
diff --git a/R/bambu_utilityFunctions.R b/R/bambu_utilityFunctions.R
index 3424698a..16c53f8e 100644
--- a/R/bambu_utilityFunctions.R
+++ b/R/bambu_utilityFunctions.R
@@ -292,18 +292,18 @@ combineCountSes <- function(countsSe, colDataList, annotations){
     return(combinedCountsSe)
 }
 
-#' Generate the colData using the external sampleData.csv provided by the user in the sampleData argument
+#' Generate the colData using the external sampleMetadata.csv provided by the user in the sampleMetadata argument
 #' @param readClassList A list object containingmetadata about read classes.
-#' @param sampleData A path to a CSV file or NULL/NA if there is no metadata for the sample.
+#' @param sampleMetadata A path to a CSV file or NULL/NA if there is no metadata for the sample.
 #' @param demultiplexed Logical; indicates if data is demultiplexed.
 #'
 #' @return A DataFrame containing colData for the sample.
 #' @export
-generateColData <- function(readClassList, sampleData, demultiplexed) {
-  sampleDataDf <- if (is.null(sampleData) || is.na(sampleData)) {
+generateColData <- function(readClassList, sampleMetadata, demultiplexed) {
+  sampleMetadataDf <- if (is.null(sampleMetadata) || is.na(sampleMetadata)) {
     if (demultiplexed) tibble(barcode = character()) else tibble(sampleName = character())
   } else {
-    read.csv(sampleData)
+    read.csv(sampleMetadata)
   }
   samples <- metadata(readClassList)$samples
   joinKey <- if (demultiplexed) "barcode" else "sampleName"
@@ -317,7 +317,7 @@ generateColData <- function(readClassList, sampleData, demultiplexed) {
   }
   
   colData <- colData %>%
-    left_join(sampleDataDf, by = joinKey) %>%
+    left_join(sampleMetadataDf, by = joinKey) %>%
     as.data.frame()
   
   rownames(colData) <- colData$id

From e80cb39ddce340fbd5d6e85ad067bfe0624a3959 Mon Sep 17 00:00:00 2001
From: lingminhao <lingminhao31@gmail.com>
Date: Fri, 20 Feb 2026 14:04:19 +0800
Subject: [PATCH 12/22] fix bug: omit the check for NA elements in sampleData

---
 R/bambu_utilityFunctions.R | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/R/bambu_utilityFunctions.R b/R/bambu_utilityFunctions.R
index 16c53f8e..0f641e52 100644
--- a/R/bambu_utilityFunctions.R
+++ b/R/bambu_utilityFunctions.R
@@ -157,7 +157,9 @@ checkInputs <- function(annotations, reads, readClass.outputDir, genomeSequence,
     }
 
     if(!is.null(sampleData)){
-        if(!all(grepl(".csv^", sampleData))){stop("Not all paths for sample metadata files are .csv files")}
+        if (!all(grepl("\\.csv$", na.omit(sampleData)))){
+            stop("Not all paths for sample metadata files are .csv files")
+        }
         if(length(sampleData)==1 & length(reads)>1){ # normally used for bulk samples
             message("Using the same sample metadata file for all input samples")
         } else if(length(reads)!=length(sampleData)){ # normally used for single-cell/spatial samples

From 7d60435ae944acbaff84ad7b18a4edc6b2fd70be Mon Sep 17 00:00:00 2001
From: lingminhao <lingminhao31@gmail.com>
Date: Mon, 2 Mar 2026 15:11:00 +0800
Subject: [PATCH 13/22] allow . csv/.tsv/.txt file input type in sampleData

---
 R/bambu_utilityFunctions.R | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/R/bambu_utilityFunctions.R b/R/bambu_utilityFunctions.R
index 0f641e52..0f88a8b0 100644
--- a/R/bambu_utilityFunctions.R
+++ b/R/bambu_utilityFunctions.R
@@ -157,12 +157,12 @@ checkInputs <- function(annotations, reads, readClass.outputDir, genomeSequence,
     }
 
     if(!is.null(sampleData)){
-        if (!all(grepl("\\.csv$", na.omit(sampleData)))){
-            stop("Not all paths for sample metadata files are .csv files")
+        if (!all(grepl("\\.(csv|tsv|txt)$", na.omit(sampleData), ignore.case = TRUE))){
+            stop("Not all paths for sample metadata files are .csv/.tsv/.txt files")
         }
-        if(length(sampleData)==1 & length(reads)>1){ # normally used for bulk samples
+        if(length(sampleData)==1 & length(reads)>1){ # one sample metadata for all samples
             message("Using the same sample metadata file for all input samples")
-        } else if(length(reads)!=length(sampleData)){ # normally used for single-cell/spatial samples
+        } else if(length(reads)!=length(sampleData)){ # multiple sample metadatas for multiple samples
             stop(
                 "The number of sample metadata files does not match the number of input read files. ",
                 "These two arguments (sampleData & reads) must be vectors of the same length. ",
@@ -305,7 +305,7 @@ generateColData <- function(readClassList, sampleMetadata, demultiplexed) {
   sampleMetadataDf <- if (is.null(sampleMetadata) || is.na(sampleMetadata)) {
     if (demultiplexed) tibble(barcode = character()) else tibble(sampleName = character())
   } else {
-    read.csv(sampleMetadata)
+    fread(sampleMetadata)
   }
   samples <- metadata(readClassList)$samples
   joinKey <- if (demultiplexed) "barcode" else "sampleName"

From 8f7f506c89900c7985232d82977b53a844f33409 Mon Sep 17 00:00:00 2001
From: lingminhao <lingminhao31@gmail.com>
Date: Fri, 27 Mar 2026 10:02:07 +0800
Subject: [PATCH 14/22] refactor: store sampleData in readClassList for parsing

---
 R/bambu-assignDist.R       |  2 +-
 R/bambu-processReads.R     | 34 ++++++++++++++++++++--------------
 R/bambu_utilityFunctions.R | 14 ++++++++------
 3 files changed, 29 insertions(+), 21 deletions(-)

diff --git a/R/bambu-assignDist.R b/R/bambu-assignDist.R
index c5908ace..750c1e87 100644
--- a/R/bambu-assignDist.R
+++ b/R/bambu-assignDist.R
@@ -32,7 +32,7 @@ assignReadClasstoTranscripts <- function(readClassList, annotations, isoreParame
     metadata(quantData)$readClassDt <- readClassDt
     metadata(quantData)$countMatrix <- metadata(readClassList)$countMatrix
     metadata(quantData)$incompatibleCountMatrix <- metadata(readClassList)$incompatibleCountMatrix 
-    metadata(quantData)$sampleNames <- metadata(readClassList)$sampleNames 
+    metadata(quantData)$sampleName <- metadata(readClassList)$sampleData$sampleName 
     if(returnDistTable)
         metadata(quantData)$distTable <- metadata(metadata(readClassList)$readClassDist)$distTableOld
 
diff --git a/R/bambu-processReads.R b/R/bambu-processReads.R
index 29082ad6..694cec03 100644
--- a/R/bambu-processReads.R
+++ b/R/bambu-processReads.R
@@ -174,13 +174,9 @@ bambu.processReadsByFile <- function(bam.file, genomeSequence, annotations,
     mcols(readGrgList)$id <- seq_along(readGrgList) 
 
     sampleName <- names(bam.file)[1]
-    if(!isFALSE(demultiplexed)){
-        mcols(readGrgList)$CB <- paste0(sampleName, '_', mcols(readGrgList)$CB)
-    } else{
-        mcols(readGrgList)$CB <- sampleName
-        }
-    mcols(readGrgList)$CB <- as.factor(mcols(readGrgList)$CB)
+
     if(!isFALSE(demultiplexed)){ 
+        mcols(readGrgList)$CB <- as.factor(mcols(readGrgList)$CB)
         mcols(readGrgList)$sampleID <- as.numeric(mcols(readGrgList)$CB)
     } else {
         mcols(readGrgList)$sampleID <- index
@@ -217,9 +213,19 @@ bambu.processReadsByFile <- function(bam.file, genomeSequence, annotations,
                              fusionMode = fusionMode,
                              verbose = verbose)
 
-    metadata(se)$samples <- names(bam.file)[1]
-    metadata(se)$sampleNames <- names(bam.file)[1]
-    if(!isFALSE(demultiplexed)) metadata(se)$samples <- levels(mcols(readGrgList)$CB)                         
+    if (demultiplexed) {
+        metadata(se)$sampleData <- tibble(
+          id = paste(names(bam.file)[1], levels(mcols(readGrgList)$CB), sep = '_'),
+          sampleName = names(bam.file)[1],
+          barcode = levels(mcols(readGrgList)$CB)
+        )
+    } else{
+        metadata(se)$sampleData <- tibble(
+          id = names(bam.file)[1],
+          sampleName = names(bam.file)[1]
+        )
+    }
+
     return(se)
 }
 
@@ -402,12 +408,12 @@ splitReadClassFiles = function(readClassFile){
         i = rep(seq_along(counts.table), lengths(counts.table)),
         j = as.numeric(names(unlist(counts.table))),
         x = unlist(counts.table),
-        dims = c(nrow(eqClasses), length(metadata(readClassFile)$samples)))
+        dims = c(nrow(eqClasses), length(metadata(readClassFile)$sampleData$id)))
     #incompatible counts
     distTable <- metadata(metadata(readClassFile)$readClassDist)$distTable.incompatible
     if(nrow(distTable)==0) {
         counts.incompatible <- sparseMatrix(i= 1, j = 1, x = 0,
-        dims = c(1, length(metadata(readClassFile)$samples)))
+        dims = c(1, length(metadata(readClassFile)$sampleData$id)))
         rownames(counts.incompatible) <- "TODO"
     } else{
         distTable$sampleIDs <- rowData(readClassFile)$sampleIDs[match(distTable$readClassId, rownames(readClassFile))]
@@ -418,11 +424,11 @@ splitReadClassFiles = function(readClassFile){
             i = rep(seq_along(counts.table), lengths(counts.table)),
             j = as.numeric(names(unlist(counts.table))),
             x = unlist(counts.table),
-            dims = c(nrow(distTable), length(metadata(readClassFile)$samples)))
-        colnames(counts.incompatible) <- metadata(readClassFile)$samples
+            dims = c(nrow(distTable), length(metadata(readClassFile)$sampleData$id)))
+        colnames(counts.incompatible) <- metadata(readClassFile)$sampleData$id
         rownames(counts.incompatible) <- distTable$GENEID.i 
     }
-    colnames(counts) <- metadata(readClassFile)$samples
+    colnames(counts) <- metadata(readClassFile)$sampleData$id
     metadata(readClassFile)$eqClassById <- eqClasses$eqClassById
     #rownames(counts) = eqClasses$eqClassById
     metadata(readClassFile)$countMatrix <- counts
diff --git a/R/bambu_utilityFunctions.R b/R/bambu_utilityFunctions.R
index 0f88a8b0..a80c70a3 100644
--- a/R/bambu_utilityFunctions.R
+++ b/R/bambu_utilityFunctions.R
@@ -307,15 +307,17 @@ generateColData <- function(readClassList, sampleMetadata, demultiplexed) {
   } else {
     fread(sampleMetadata)
   }
-  samples <- metadata(readClassList)$samples
+
   joinKey <- if (demultiplexed) "barcode" else "sampleName"
-  
-  colData <- tibble(id = samples)
+
   if (demultiplexed) {
-    colData$sampleName = sub('_[^_]+$', '', samples)
-    colData$barcode <- sub('.*_', '', samples)
+    colData <- tibble(
+        id = paste(metadata(readClassList)$sampleData$sampleName, metadata(readClassList)$sampleData$barcode, sep = '_'),
+        sampleName = metadata(readClassList)$sampleData$sampleName,
+        barcode = metadata(readClassList)$sampleData$barcode
+    )
   } else{
-    colData$sampleName <- samples
+    colData <- tibble(id = metadata(readClassList)$sampleData$sampleName, sampleName = metadata(readClassList)$sampleData$sampleName)
   }
   
   colData <- colData %>%

From 547e0348abaa635486c51b5c41c90595872bcb86 Mon Sep 17 00:00:00 2001
From: lingminhao <lingminhao31@gmail.com>
Date: Fri, 27 Mar 2026 10:16:50 +0800
Subject: [PATCH 15/22] update comment to describe CB/UMI parsing from bam

---
 R/prepareDataFromBam.R | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/R/prepareDataFromBam.R b/R/prepareDataFromBam.R
index e9634a92..11ce950d 100755
--- a/R/prepareDataFromBam.R
+++ b/R/prepareDataFromBam.R
@@ -46,11 +46,13 @@ prepareDataFromBam <- function(bamFile, yieldSize = NULL, verbose = FALSE,
         if (!isFALSE(demultiplexed)){ # if demultiplexed is TRUE or a string path 
             if(isTRUE(demultiplexed)){ # if demultiplexed is TRUE
       
-                mcols(readGrgList[[counter]])$CB <- case_when(grepl("^[^_]+_[^#]+#", names(readGrgList[[counter]]), perl = TRUE) ~ sub("_.*", "", names(readGrgList[[counter]])), # a checkpoint to see whether CB is contained in the name, with specific format CB_UMI#READNAME, 
+                # a checkpoint to parse CB and UMI from the bam file, either from reads or CB/UMI tags.
+                # currently read name only accepts the format CB_UMI#READNAME (CB & UMI cannot have '_', otherwise parsing fails) 
+                mcols(readGrgList[[counter]])$CB <- case_when(grepl("^[^_]+_[^#]+#", names(readGrgList[[counter]]), perl = TRUE) ~ sub("_.*", "", names(readGrgList[[counter]])), 
                                                               !is.na(mcols(alignmentInfo)$CB) ~ mcols(alignmentInfo)$CB, 
                                                               TRUE ~ NA) 
 
-                mcols(readGrgList[[counter]])$UMI <- case_when(grepl("^[^_]+_[^#]+#", names(readGrgList[[counter]]), perl = TRUE) ~ sub("^[^_]+_([^#]+)#.*$", "\\1", names(readGrgList[[counter]])), # a checkpoint to see whether UMI is contained in the name, with specific format CB_UMI#READNAME, 
+                mcols(readGrgList[[counter]])$UMI <- case_when(grepl("^[^_]+_[^#]+#", names(readGrgList[[counter]]), perl = TRUE) ~ sub("^[^_]+_([^#]+)#.*$", "\\1", names(readGrgList[[counter]])), 
                                                                !is.na(mcols(alignmentInfo)$UB) ~ mcols(alignmentInfo)$UB, 
                                                                TRUE ~ NA) 
             } else{ # if demultiplexed is a string path

From 586cdb36d76b4432dd127acdb4badd51d8afb7c6 Mon Sep 17 00:00:00 2001
From: lingminhao <lingminhao31@gmail.com>
Date: Fri, 27 Mar 2026 10:30:38 +0800
Subject: [PATCH 16/22] change priority in CB & UMI name extraction from bam
 file

---
 R/prepareDataFromBam.R | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/R/prepareDataFromBam.R b/R/prepareDataFromBam.R
index 11ce950d..980bd312 100755
--- a/R/prepareDataFromBam.R
+++ b/R/prepareDataFromBam.R
@@ -48,13 +48,18 @@ prepareDataFromBam <- function(bamFile, yieldSize = NULL, verbose = FALSE,
       
                 # a checkpoint to parse CB and UMI from the bam file, either from reads or CB/UMI tags.
                 # currently read name only accepts the format CB_UMI#READNAME (CB & UMI cannot have '_', otherwise parsing fails) 
-                mcols(readGrgList[[counter]])$CB <- case_when(grepl("^[^_]+_[^#]+#", names(readGrgList[[counter]]), perl = TRUE) ~ sub("_.*", "", names(readGrgList[[counter]])), 
-                                                              !is.na(mcols(alignmentInfo)$CB) ~ mcols(alignmentInfo)$CB, 
-                                                              TRUE ~ NA) 
+                mcols(readGrgList[[counter]])$CB <- case_when(
+                    !is.na(mcols(alignmentInfo)$CB) ~ mcols(alignmentInfo)$CB,
+                    grepl("^[^_]+_[^#]+#", names(readGrgList[[counter]]), perl = TRUE) ~ sub("_.*", "", names(readGrgList[[counter]])),
+                    TRUE ~ NA
+                ) 
 
-                mcols(readGrgList[[counter]])$UMI <- case_when(grepl("^[^_]+_[^#]+#", names(readGrgList[[counter]]), perl = TRUE) ~ sub("^[^_]+_([^#]+)#.*$", "\\1", names(readGrgList[[counter]])), 
-                                                               !is.na(mcols(alignmentInfo)$UB) ~ mcols(alignmentInfo)$UB, 
-                                                               TRUE ~ NA) 
+                mcols(readGrgList[[counter]])$UMI <- case_when(
+                    !is.na(mcols(alignmentInfo)$UB) ~ mcols(alignmentInfo)$UB,
+                    grepl("^[^_]+_[^#]+#", names(readGrgList[[counter]]), perl = TRUE) ~ sub("^[^_]+_([^#]+)#.*$", "\\1", names(readGrgList[[counter]])), 
+                    TRUE ~ NA
+                )
+                
             } else{ # if demultiplexed is a string path
                 mcols(readGrgList[[counter]])$CB <- NA
                 mcols(readGrgList[[counter]])$UMI <- NA

From 4bd48bf163c45365626402ec6317cb3e6d0ec250 Mon Sep 17 00:00:00 2001
From: lingminhao <lingminhao31@gmail.com>
Date: Fri, 27 Mar 2026 16:19:46 +0800
Subject: [PATCH 17/22] fix: standardize list access for all sample sizes

---
 R/bambu.R | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/R/bambu.R b/R/bambu.R
index 0254ca4e..ac14344f 100644
--- a/R/bambu.R
+++ b/R/bambu.R
@@ -294,11 +294,7 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
                 iter <- clustering
                 
               } else{ #if clusters is a list
-                if(length(quantData)>1){
-                  iter <- clusters[[i]] #lowMemory mode
-                }else{
-                  iter <- clusters#do.call(c,clusters)
-                }
+                iter <- clusters[[i]] 
               }
             }
             countsSeCompressed <- bplapply(iter, FUN = function(j){ # previous i changed to j to avoid duplicated assignment 

From 807ad06b96a87a60218b3b040b2bcbe2c011b974 Mon Sep 17 00:00:00 2001
From: lingminhao <lingminhao31@gmail.com>
Date: Fri, 27 Mar 2026 17:02:08 +0800
Subject: [PATCH 18/22] remove redundant code

---
 R/bambu.R | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/R/bambu.R b/R/bambu.R
index ac14344f..ab310f5b 100644
--- a/R/bambu.R
+++ b/R/bambu.R
@@ -317,8 +317,7 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
                 ColNames <- c(ColNames, names(iter))
                 colData.all[[i]] <- data.frame(
                   id = names(countsSeCompressed), 
-                  sampleName = names(countsSeCompressed), 
-                  row.names = names(countsSeCompressed)
+                  sampleName = names(countsSeCompressed)
                 )
             } else{
                 ColNames <- c(ColNames, colnames(quantData_i)) 

From 44e791dc1b9ed81663712f53aec90dbd31789a20 Mon Sep 17 00:00:00 2001
From: lingminhao <lingminhao31@gmail.com>
Date: Thu, 9 Apr 2026 15:00:56 +0800
Subject: [PATCH 19/22] correct order of extracted barcode to match
 devel_pre_v4

---
 R/bambu-processReads.R | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/R/bambu-processReads.R b/R/bambu-processReads.R
index 694cec03..b1442d1d 100644
--- a/R/bambu-processReads.R
+++ b/R/bambu-processReads.R
@@ -214,10 +214,11 @@ bambu.processReadsByFile <- function(bam.file, genomeSequence, annotations,
                              verbose = verbose)
 
     if (demultiplexed) {
+        barcodes <- levels(mcols(readGrgList)$CB)
         metadata(se)$sampleData <- tibble(
-          id = paste(names(bam.file)[1], levels(mcols(readGrgList)$CB), sep = '_'),
+          id = paste(names(bam.file)[1], barcodes, sep = '_'),
           sampleName = names(bam.file)[1],
-          barcode = levels(mcols(readGrgList)$CB)
+          barcode = barcodes
         )
     } else{
         metadata(se)$sampleData <- tibble(

From 446407afbf82109a61cf8af4e87096f3f193e59f Mon Sep 17 00:00:00 2001
From: lingminhao <lingminhao31@gmail.com>
Date: Thu, 9 Apr 2026 15:49:01 +0800
Subject: [PATCH 20/22] tidy for cleaner code

---
 R/bambu-processReads.R     |  9 +++------
 R/bambu_utilityFunctions.R | 14 +++++++-------
 R/prepareDataFromBam.R     |  5 +++++
 3 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/R/bambu-processReads.R b/R/bambu-processReads.R
index b1442d1d..dd988584 100644
--- a/R/bambu-processReads.R
+++ b/R/bambu-processReads.R
@@ -54,7 +54,7 @@ bambu.processReads <- function(reads, annotations, genomeSequence,
     returnModel <- isoreParameters[["returnModel"]]
     min.exonOverlap <- isoreParameters[["min.exonOverlap"]]
 
-    if(processByBam){ # bulk mode
+    if(processByBam){
         readClassList <- bplapply(seq_along(reads), function(i) {
             bambu.processReadsByFile(bam.file = reads[i],
             genomeSequence = genomeSequence,annotations = annotations,
@@ -64,7 +64,7 @@ bambu.processReads <- function(reads, annotations, genomeSequence,
             processByChromosome = processByChromosome, trackReads = trackReads, fusionMode = fusionMode, 
             demultiplexed = demultiplexed, cleanReads = cleanReads, dedupUMI = dedupUMI, index = 1, barcodesToFilter = barcodesToFilter)},
             BPPARAM = bpParameters)
-    } else { # single cell mode 
+    } else {
         readGrgList <- bplapply(seq_along(reads), function(i) {
             bambu.readsByFile(bam.file = reads[i],
             genomeSequence = genomeSequence,annotations = annotations,
@@ -173,10 +173,7 @@ bambu.processReadsByFile <- function(bam.file, genomeSequence, annotations,
 
     mcols(readGrgList)$id <- seq_along(readGrgList) 
 
-    sampleName <- names(bam.file)[1]
-
     if(!isFALSE(demultiplexed)){ 
-        mcols(readGrgList)$CB <- as.factor(mcols(readGrgList)$CB)
         mcols(readGrgList)$sampleID <- as.numeric(mcols(readGrgList)$CB)
     } else {
         mcols(readGrgList)$sampleID <- index
@@ -409,7 +406,7 @@ splitReadClassFiles = function(readClassFile){
         i = rep(seq_along(counts.table), lengths(counts.table)),
         j = as.numeric(names(unlist(counts.table))),
         x = unlist(counts.table),
-        dims = c(nrow(eqClasses), length(metadata(readClassFile)$sampleData$id)))
+        dims = c(nrow(eqClasses), nrow(metadata(readClassFile)$sampleData)))
     #incompatible counts
     distTable <- metadata(metadata(readClassFile)$readClassDist)$distTable.incompatible
     if(nrow(distTable)==0) {
diff --git a/R/bambu_utilityFunctions.R b/R/bambu_utilityFunctions.R
index a80c70a3..44c2d747 100644
--- a/R/bambu_utilityFunctions.R
+++ b/R/bambu_utilityFunctions.R
@@ -310,14 +310,14 @@ generateColData <- function(readClassList, sampleMetadata, demultiplexed) {
 
   joinKey <- if (demultiplexed) "barcode" else "sampleName"
 
+  colData <- tibble(
+      id = metadata(readClassList)$sampleData$id, 
+      sampleName = metadata(readClassList)$sampleData$sampleName
+  ) 
+
   if (demultiplexed) {
-    colData <- tibble(
-        id = paste(metadata(readClassList)$sampleData$sampleName, metadata(readClassList)$sampleData$barcode, sep = '_'),
-        sampleName = metadata(readClassList)$sampleData$sampleName,
-        barcode = metadata(readClassList)$sampleData$barcode
-    )
-  } else{
-    colData <- tibble(id = metadata(readClassList)$sampleData$sampleName, sampleName = metadata(readClassList)$sampleData$sampleName)
+      colData <- colData %>%
+        mutate(barcode = metadata(readClassList)$sampleData$barcode)
   }
   
   colData <- colData %>%
diff --git a/R/prepareDataFromBam.R b/R/prepareDataFromBam.R
index 980bd312..2c4486f3 100755
--- a/R/prepareDataFromBam.R
+++ b/R/prepareDataFromBam.R
@@ -101,6 +101,11 @@ prepareDataFromBam <- function(bamFile, yieldSize = NULL, verbose = FALSE,
     } else {
         readGrgList <- readGrgList[[1]]
     }
+
+    if (demultiplexed){
+        mcols(readGrgList)$CB <- as.factor(mcols(readGrgList)$CB)
+    }
+
     # remove microexons of width 1bp from list
     readGrgList <- readGrgList <- readGrgList[sum(width(readGrgList)) > 1]
     numNoCBs <- sum(is.na(mcols(readGrgList)$CB))

From 93b2e7d482155b79f1156eaf330b5e9fc4366371 Mon Sep 17 00:00:00 2001
From: lingminhao <lingminhao31@gmail.com>
Date: Thu, 9 Apr 2026 16:44:53 +0800
Subject: [PATCH 21/22] add back row.names to prevent missing colnames in
 se_pseudobulk

---
 R/bambu.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/bambu.R b/R/bambu.R
index ab310f5b..cf61a555 100644
--- a/R/bambu.R
+++ b/R/bambu.R
@@ -317,7 +317,8 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
                 ColNames <- c(ColNames, names(iter))
                 colData.all[[i]] <- data.frame(
                   id = names(countsSeCompressed), 
-                  sampleName = names(countsSeCompressed)
+                  sampleName = names(countsSeCompressed),
+                  row.names = names(countsSeCompressed)
                 )
             } else{
                 ColNames <- c(ColNames, colnames(quantData_i)) 

From 01f4ff8db7fab687c8097b3aecaa33a0eac25908 Mon Sep 17 00:00:00 2001
From: lingminhao <lingminhao31@gmail.com>
Date: Thu, 9 Apr 2026 19:34:15 +0800
Subject: [PATCH 22/22] correct order of extracted barcode to match
 devel_pre_v4

---
 R/prepareDataFromBam.R | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/R/prepareDataFromBam.R b/R/prepareDataFromBam.R
index 2c4486f3..81beb4b2 100755
--- a/R/prepareDataFromBam.R
+++ b/R/prepareDataFromBam.R
@@ -68,10 +68,6 @@ prepareDataFromBam <- function(bamFile, yieldSize = NULL, verbose = FALSE,
                     mcols(readGrgList[[counter]])$UMI <- readMap[,3][match(names(readGrgList[[counter]]),readMap[,1])]
                 }
             }
-            cells <- unique(c(cells, mcols(readGrgList[[counter]])$CB))
-            mcols(readGrgList[[counter]])$CB <- factor(mcols(readGrgList[[counter]])$CB, levels = cells)
-            umi <- unique(c(umi, mcols(readGrgList[[counter]])$UMI))
-            mcols(readGrgList[[counter]])$UMI <- factor(mcols(readGrgList[[counter]])$UMI, levels = umi)
         }
         if(cleanReads){
             softClip5Prime <- clipFunction(cigarData = GenomicAlignments::cigar(alignmentInfo), grep_pattern = '^(\\d*)[S].*', replace_pattern = '\\1')
@@ -103,7 +99,7 @@ prepareDataFromBam <- function(bamFile, yieldSize = NULL, verbose = FALSE,
     }
 
     if (demultiplexed){
-        mcols(readGrgList)$CB <- as.factor(mcols(readGrgList)$CB)
+        mcols(readGrgList)$CB <- factor(mcols(readGrgList)$CB, levels = sort(unique(mcols(readGrgList)$CB)))
     }
 
     # remove microexons of width 1bp from list