diff --git a/R/bambu-assignDist.R b/R/bambu-assignDist.R
index be73e0a8..750c1e87 100644
--- a/R/bambu-assignDist.R
+++ b/R/bambu-assignDist.R
@@ -3,7 +3,7 @@
 #' @import data.table
 #' @noRd
 assignReadClasstoTranscripts <- function(readClassList, annotations, isoreParameters, 
-                                        verbose, demultiplexed, spatial, 
+                                        verbose, sampleMetadata, demultiplexed,
                                         returnDistTable = FALSE, trackReads = TRUE) {
     if (is.character(readClassList)) readClassList <- readRDS(file = readClassList)
     metadata(readClassList)$readClassDist <- calculateDistTable(readClassList, annotations, isoreParameters, verbose, returnDistTable)
@@ -17,7 +17,7 @@ assignReadClasstoTranscripts <- function(readClassList, annotations, isoreParame
         mutate(aval = 1) %>%
         data.table()
     #return non-em counts
-    ColData <- generateColData(colnames(metadata(readClassList)$countMatrix), clusters = NULL, demultiplexed, spatial)
+    ColData <- generateColData(readClassList, sampleMetadata, demultiplexed)
     quantData <- SummarizedExperiment(assays = SimpleList(
         counts = generateUniqueCounts(readClassDt, metadata(readClassList)$countMatrix, annotations)),
         rowRanges = annotations,
@@ -32,7 +32,7 @@ assignReadClasstoTranscripts <- function(readClassList, annotations, isoreParame
     metadata(quantData)$readClassDt <- readClassDt
     metadata(quantData)$countMatrix <- metadata(readClassList)$countMatrix
     metadata(quantData)$incompatibleCountMatrix <- metadata(readClassList)$incompatibleCountMatrix 
-    metadata(quantData)$sampleNames <- metadata(readClassList)$sampleNames 
+    metadata(quantData)$sampleName <- metadata(readClassList)$sampleData$sampleName 
     if(returnDistTable)
         metadata(quantData)$distTable <- metadata(metadata(readClassList)$readClassDist)$distTableOld
 
diff --git a/R/bambu-processReads.R b/R/bambu-processReads.R
index f144c036..dd988584 100644
--- a/R/bambu-processReads.R
+++ b/R/bambu-processReads.R
@@ -54,7 +54,7 @@ bambu.processReads <- function(reads, annotations, genomeSequence,
     returnModel <- isoreParameters[["returnModel"]]
     min.exonOverlap <- isoreParameters[["min.exonOverlap"]]
 
-    if(processByBam){ # bulk mode
+    if(processByBam){
         readClassList <- bplapply(seq_along(reads), function(i) {
             bambu.processReadsByFile(bam.file = reads[i],
             genomeSequence = genomeSequence,annotations = annotations,
@@ -64,7 +64,7 @@ bambu.processReads <- function(reads, annotations, genomeSequence,
             processByChromosome = processByChromosome, trackReads = trackReads, fusionMode = fusionMode, 
             demultiplexed = demultiplexed, cleanReads = cleanReads, dedupUMI = dedupUMI, index = 1, barcodesToFilter = barcodesToFilter)},
             BPPARAM = bpParameters)
-    } else { # single cell mode 
+    } else {
         readGrgList <- bplapply(seq_along(reads), function(i) {
             bambu.readsByFile(bam.file = reads[i],
             genomeSequence = genomeSequence,annotations = annotations,
@@ -173,13 +173,6 @@ bambu.processReadsByFile <- function(bam.file, genomeSequence, annotations,
 
     mcols(readGrgList)$id <- seq_along(readGrgList) 
 
-    sampleName <- names(bam.file)[1]
-    if(!isFALSE(demultiplexed)){
-        mcols(readGrgList)$CB <- paste0(sampleName, '_', mcols(readGrgList)$CB)
-    } else{
-        mcols(readGrgList)$CB <- sampleName
-        }
-    mcols(readGrgList)$CB <- as.factor(mcols(readGrgList)$CB)
     if(!isFALSE(demultiplexed)){ 
         mcols(readGrgList)$sampleID <- as.numeric(mcols(readGrgList)$CB)
     } else {
@@ -217,9 +210,20 @@ bambu.processReadsByFile <- function(bam.file, genomeSequence, annotations,
                              fusionMode = fusionMode,
                              verbose = verbose)
 
-    metadata(se)$samples <- names(bam.file)[1]
-    metadata(se)$sampleNames <- names(bam.file)[1]
-    if(!isFALSE(demultiplexed)) metadata(se)$samples <- levels(mcols(readGrgList)$CB)                         
+    if (demultiplexed) {
+        barcodes <- levels(mcols(readGrgList)$CB)
+        metadata(se)$sampleData <- tibble(
+          id = paste(names(bam.file)[1], barcodes, sep = '_'),
+          sampleName = names(bam.file)[1],
+          barcode = barcodes
+        )
+    } else{
+        metadata(se)$sampleData <- tibble(
+          id = names(bam.file)[1],
+          sampleName = names(bam.file)[1]
+        )
+    }
+
     return(se)
 }
 
@@ -307,7 +311,6 @@ constructReadClasses <- function(readGrgList, genomeSequence, annotations,
     stranded = FALSE, min.readCount = 2, 
     fitReadClassModel = TRUE, min.exonOverlap = 10, defaultModels = NULL, returnModel = FALSE, 
     verbose = FALSE, processByChromosome = FALSE, trackReads = FALSE, fusionMode = FALSE){
-    warnings <- c() ###TODO
     
     if(processByChromosome){
         # construct read classes for each chromosome seperately 
@@ -403,12 +406,12 @@ splitReadClassFiles = function(readClassFile){
         i = rep(seq_along(counts.table), lengths(counts.table)),
         j = as.numeric(names(unlist(counts.table))),
         x = unlist(counts.table),
-        dims = c(nrow(eqClasses), length(metadata(readClassFile)$samples)))
+        dims = c(nrow(eqClasses), nrow(metadata(readClassFile)$sampleData)))
     #incompatible counts
     distTable <- metadata(metadata(readClassFile)$readClassDist)$distTable.incompatible
     if(nrow(distTable)==0) {
         counts.incompatible <- sparseMatrix(i= 1, j = 1, x = 0,
-        dims = c(1, length(metadata(readClassFile)$samples)))
+        dims = c(1, length(metadata(readClassFile)$sampleData$id)))
         rownames(counts.incompatible) <- "TODO"
     } else{
         distTable$sampleIDs <- rowData(readClassFile)$sampleIDs[match(distTable$readClassId, rownames(readClassFile))]
@@ -419,11 +422,11 @@ splitReadClassFiles = function(readClassFile){
             i = rep(seq_along(counts.table), lengths(counts.table)),
             j = as.numeric(names(unlist(counts.table))),
             x = unlist(counts.table),
-            dims = c(nrow(distTable), length(metadata(readClassFile)$samples)))
-        colnames(counts.incompatible) <- metadata(readClassFile)$samples
+            dims = c(nrow(distTable), length(metadata(readClassFile)$sampleData$id)))
+        colnames(counts.incompatible) <- metadata(readClassFile)$sampleData$id
         rownames(counts.incompatible) <- distTable$GENEID.i 
     }
-    colnames(counts) <- metadata(readClassFile)$samples
+    colnames(counts) <- metadata(readClassFile)$sampleData$id
     metadata(readClassFile)$eqClassById <- eqClasses$eqClassById
     #rownames(counts) = eqClasses$eqClassById
     metadata(readClassFile)$countMatrix <- counts
diff --git a/R/bambu-processReads_utilityConstructReadClasses.R b/R/bambu-processReads_utilityConstructReadClasses.R
index fec9adbf..a9610045 100644
--- a/R/bambu-processReads_utilityConstructReadClasses.R
+++ b/R/bambu-processReads_utilityConstructReadClasses.R
@@ -25,12 +25,14 @@ isore.constructReadClasses <- function(readGrgList, unlisted_junctions,
             Please report this")
     start.ptm <- proc.time()
     if(!is.null(uniqueJunctions)){
-        exonsByRC.spliced <- constructSplicedReadClasses(
-            uniqueJunctions = uniqueJunctions,
-            unlisted_junctions = unlisted_junctions,
-            readGrgList = readGrgList,
-            stranded = stranded)}
-    else{exonsByRC.spliced = GRangesList()}
+      exonsByRC.spliced <- constructSplicedReadClasses(
+        uniqueJunctions = uniqueJunctions,
+        unlisted_junctions = unlisted_junctions,
+        readGrgList = readGrgList,
+        stranded = stranded)
+    } else{
+      exonsByRC.spliced = GRangesList()
+    }
     end.ptm <- proc.time()
     rm(readGrgList, unlisted_junctions, uniqueJunctions)
     if (verbose) 
@@ -38,11 +40,13 @@ isore.constructReadClasses <- function(readGrgList, unlisted_junctions,
     "spliced junctions in ", round((end.ptm - start.ptm)[3] / 60, 1)," mins.")
     if(length(reads.singleExon)==0) { 
         exonsByRC.unspliced <- NULL
-    } else {exonsByRC.unspliced <- constructUnsplicedReadClasses(reads.singleExon, 
-        annotations, exonsByRC.spliced, stranded, verbose)}
+    } else {
+      exonsByRC.unspliced <- constructUnsplicedReadClasses(reads.singleExon, 
+                             annotations, exonsByRC.spliced, stranded, verbose)
+    }
     exonsByRC <- c(exonsByRC.spliced, exonsByRC.unspliced)
     colDataDf <- DataFrame(name = runName, row.names = runName)
-    #TODO later remove assays = SimpleList(counts = counts)
+
     counts <- matrix(mcols(exonsByRC)$readCount,
                      dimnames = list(names(exonsByRC), runName))
     se <- SummarizedExperiment(assays = SimpleList(counts = counts),
diff --git a/R/bambu.R b/R/bambu.R
index edd8a8cc..cf61a555 100644
--- a/R/bambu.R
+++ b/R/bambu.R
@@ -93,6 +93,11 @@
 #' distTables. The output is a list with an entry for each sample.
 #' @param lowMemory Read classes will be processed by chromosomes when lowMemory 
 #' is specified. This option provides an efficient way to process big samples.
+#' @param sampleData A character vector of paths to metadata CSV files (or \code{NA} if 
+#' unavailable for specific samples); defaults to \code{NULL}. Files must contain a 
+#' "sampleName" column for bulk data or a "barcode" column for single-cell/spatial data. 
+#' For bulk data, one metadata CSV file for all samples is sufficient, whereas single-cell/spatial 
+#' data requires one metadata CSV file per sample.
 #' @param fusionMode A logical variable indicating whether run in fusion mode
 #' @param verbose A logical variable indicating whether processing messages will
 #' be printed.
@@ -138,8 +143,8 @@
 bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
     mode = NULL, opt.discovery = NULL, opt.em = NULL, rcOutDir = NULL, discovery = TRUE, 
     assignDist = TRUE, quant = TRUE, stranded = FALSE,  ncore = 1, yieldSize = NULL,  
-    trackReads = FALSE, returnDistTable = FALSE, lowMemory = FALSE,
-    fusionMode = FALSE, verbose = FALSE, demultiplexed = FALSE, spatial = NULL, quantData = NULL,
+    trackReads = FALSE, returnDistTable = FALSE, lowMemory = FALSE, sampleData = NULL,
+    fusionMode = FALSE, verbose = FALSE, demultiplexed = FALSE, quantData = NULL,
     sampleNames = NULL, cleanReads = FALSE, dedupUMI = FALSE, barcodesToFilter = NULL, clusters = NULL,
     processByChromosome = FALSE, processByBam = TRUE) {
     message(paste0("Running Bambu-v", "3.9.0"))
@@ -173,7 +178,7 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
         annotations <- checkInputs(annotations, reads,
             readClass.outputDir = rcOutDir, 
             genomeSequence = genome, discovery = discovery, 
-            sampleNames = sampleNames, spatial = spatial,quantData = quantData)
+            sampleNames = sampleNames, sampleData = sampleData, quantData = quantData)
     }
     isoreParameters <- setIsoreParameters(isoreParameters = opt.discovery)
     #below line is to be compatible with earlier version of running bambu
@@ -234,16 +239,19 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
         }
         if(assignDist){
             message("--- Start calculating equivilance classes ---")
-            quantData <- bplapply(readClassList, 
-                                  FUN = assignReadClasstoTranscripts, 
-                                  annotations = annotations, 
-                                  isoreParameters = isoreParameters, 
-                                  verbose = verbose, 
-                                  demultiplexed = demultiplexed, 
-                                  spatial = spatial, 
-                                  returnDistTable = returnDistTable,
-                                  trackReads = trackReads,
-                                  BPPARAM = bpParameters)
+            quantData <- bplapply(seq_along(readClassList), function(i){
+              assignReadClasstoTranscripts(
+                readClassList = readClassList[[i]],
+                annotations = annotations, 
+                isoreParameters = isoreParameters, 
+                verbose = verbose, 
+                # for bulk data, there is one sampleData (keep sampleData[1]), for single-cell, there is one per sample
+                sampleMetadata = if(length(sampleData) == 1) sampleData[1] else sampleData[i],
+                demultiplexed = demultiplexed, 
+                returnDistTable = returnDistTable,
+                trackReads = trackReads
+              )
+            }, BPPARAM = bpParameters)
             if (!quant) return(quantData)
         }
     }
@@ -262,6 +270,7 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
         start.ptm <- proc.time()
         countsSeCompressed.all <- NULL
         ColNames <- c()
+        colData.all <- list()
         for(i in seq_along(quantData)){
             quantData_i <- quantData[[i]]
             #load in the barcode clustering from file if provided
@@ -285,11 +294,7 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
                 iter <- clustering
                 
               } else{ #if clusters is a list
-                if(length(quantData)>1){
-                  iter <- clusters[[i]] #lowMemory mode
-                }else{
-                  iter <- clusters#do.call(c,clusters)
-                }
+                iter <- clusters[[i]] 
               }
             }
             countsSeCompressed <- bplapply(iter, FUN = function(j){ # previous i changed to j to avoid duplicated assignment 
@@ -310,13 +315,20 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
             message("Total Time ", round((end.ptm - start.ptm)[3] / 60, 3), " mins.")
             if(!is.null(clusters)){
                 ColNames <- c(ColNames, names(iter))
+                colData.all[[i]] <- data.frame(
+                  id = names(countsSeCompressed), 
+                  sampleName = names(countsSeCompressed),
+                  row.names = names(countsSeCompressed)
+                )
             } else{
                 ColNames <- c(ColNames, colnames(quantData_i)) 
+                colData.all[[i]] <- data.frame(colData(quantData_i))
             }
             countsSeCompressed.all <- c(countsSeCompressed.all, countsSeCompressed)
         }
-        countsSeCompressed.all$colnames <- ColNames            
-        countsSe <- combineCountSes(countsSeCompressed.all, annotations)
+        names(countsSeCompressed.all) <- ColNames   
+        
+        countsSe <- combineCountSes(countsSeCompressed.all, colData.all, annotations)
         if(returnDistTable){
             distTables = list()
             for(i in seq_along(quantData)){
@@ -324,11 +336,6 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
             }
             metadata(countsSe)$distTables <- distTables
         }
-        #metadata(countsSe)$warnings = warnings
-
-        ColData <- generateColData(colnames(countsSe), clusters, demultiplexed, spatial)
-        colData(countsSe) <- ColData
-        colnames(countsSe) <- ColData[,1]
         return(countsSe)
     }
   }
\ No newline at end of file
diff --git a/R/bambu_utilityFunctions.R b/R/bambu_utilityFunctions.R
index 5deac8ba..44c2d747 100644
--- a/R/bambu_utilityFunctions.R
+++ b/R/bambu_utilityFunctions.R
@@ -73,7 +73,7 @@ updateParameters <- function(Parameters, Parameters.default) {
 #' @importFrom methods is
 #' @noRd
 checkInputs <- function(annotations, reads, readClass.outputDir, genomeSequence, 
-                        discovery, sampleNames, spatial, quantData){
+                        discovery, sampleNames, sampleData, quantData){
     # ===# Check annotation inputs #===#
     if (!is.null(annotations)) {
         if (is(annotations, "CompressedGRangesList")) {
@@ -156,13 +156,18 @@ checkInputs <- function(annotations, reads, readClass.outputDir, genomeSequence,
         }
     }
 
-    if(!is.null(spatial)){
-        #if(!all(grepl(".tsv^", spatial))){stop("Not all paths for spatial are .tsv files")}
-        if(length(spatial)==1 & length(reads)>1){
-            warning("Using the same whitelist and coordinates for all input samples")
-        } else if(length(reads)!=length(spatial)){
-            stop("There are not the same number spatial whitelist paths as input files to reads. ",
-            "Make sure these two arguments are vectors of the same length")
+    if(!is.null(sampleData)){
+        if (!all(grepl("\\.(csv|tsv|txt)$", na.omit(sampleData), ignore.case = TRUE))){
+            stop("Not all paths for sample metadata files are .csv/.tsv/.txt files")
+        }
+        if(length(sampleData)==1 & length(reads)>1){ # one sample metadata for all samples
+            message("Using the same sample metadata file for all input samples")
+        } else if(length(reads)!=length(sampleData)){ # multiple sample metadatas for multiple samples
+            stop(
+                "The number of sample metadata files does not match the number of input read files. ",
+                "These two arguments (sampleData & reads) must be vectors of the same length. ",
+                "If a specific sample has no metadata, please use 'NA' as a placeholder in the sampleData vector."
+            )
         }
     }
     return(annotations)
@@ -255,13 +260,11 @@ calculateDistTable <- function(readClassList, annotations, isoreParameters, verb
         return(readClassDist)
 }
 
-#' Combine count se object while preserving the metadata objects
+#' Combine combined count se object from multiple samples, cells or spatial locations
 #' @noRd
-combineCountSes <- function(countsSe, annotations){
-    countsData <- c("counts", "CPM", "fullLengthCounts", 
-                    "uniqueCounts", "incompatibleCounts")
-    sampleNames <- countsSe$colnames
-    countsSe$colnames <- NULL
+combineCountSes <- function(countsSe, colDataList, annotations){
+    countsData <- c("counts", "CPM", "fullLengthCounts", "uniqueCounts", "incompatibleCounts")
+    sampleNames <- names(countsSe)
     countsDataMat <- lapply(countsData, FUN = function(k){
         countsVecList <- lapply(countsSe, function(j){j[[k]]})
         countsMat <- sparseMatrix(i = unlist(lapply(countsVecList, function(j) j@i)),
@@ -279,56 +282,51 @@ combineCountSes <- function(countsSe, annotations){
         return(countsMat)
     })
     names(countsDataMat) <- countsData
-    countsSe <- SummarizedExperiment(assays = SimpleList(counts = countsDataMat$counts, 
+    combinedCountsSe <- SummarizedExperiment(assays = SimpleList(counts = countsDataMat$counts, 
                                                         CPM = countsDataMat$CPM, 
                                                         fullLengthCounts = countsDataMat$fullLengthCounts, 
                                                         uniqueCounts = countsDataMat$uniqueCounts))
-    metadata(countsSe)$incompatibleCounts <- countsDataMat$incompatibleCounts
-    rowRanges(countsSe) <- annotations
-    return(countsSe)
+    metadata(combinedCountsSe)$incompatibleCounts <- countsDataMat$incompatibleCounts
+    rowRanges(combinedCountsSe) <- annotations
+
+    colData(combinedCountsSe) <- DataFrame(bind_rows(colDataList))
+    
+    return(combinedCountsSe)
 }
 
-#' Generate the coldata for se options using colnames, and other option inputs
-#' @noRd
-generateColData <- function(sampleNames, clusters, demultiplexed, spatial){
-    ColData <- DataFrame(id = sampleNames)
-    if(!isFALSE(demultiplexed) & is.null(clusters)){
-        ColData <- DataFrame(id = sampleNames, 
-                        sampleName = gsub("_[^_]+$","", sampleNames, perl = TRUE), 
-                        Barcode = gsub(".*_(?=[^_]*$)","", sampleNames, perl = TRUE))
-    }
-    if(!is.null(spatial) & is.null(clusters)){
-        ColData$x_coordinate <- NA
-        ColData$y_coordinate <- NA
-        if(length(spatial)==1){
-            # the following line takes a regular delimited file as input
-            # it can either has header or without header
-            # it can also be compressed 
-            bc_coords <- fread(spatial, 
-                col.names = c("Barcode", "x_coordinate", "y_coordinate"),
-                data.table = FALSE)
-                # DataFrame(read.table(gzfile(spatial),
-                # col.names = c("Barcode", "x_coordinate", "y_coordinate")))
-            bcMatch <- match(ColData$Barcode, bc_coords$Barcode)
-            ColData$x_coordinate <- bc_coords$x_coordinate[bcMatch]
-            ColData$y_coordinate <- bc_coords$y_coordinate[bcMatch]
-        } else{
-            spatial.unique <- unique(spatial)
-            for(whitelist in spatial.unique){
-                i <- which(spatial.unique==whitelist)
-                bc_coords <- fread(whitelist, 
-                                   col.names = c("Barcode", "x_coordinate", "y_coordinate"),
-                                   data.table = FALSE)
-                    # DataFrame(read.table(gzfile(whitelist), 
-                    # col.names = c("Barcode", "x_coordinate", "y_coordinate")))
-                bcSampleIndex <- ColData$sampleName %in% sampleNames[i]
-                bcMatch <- match(ColData$Barcode[bcSampleIndex], bc_coords$Barcode)
-                ColData$x_coordinate[bcSampleIndex] <- bc_coords$x_coordinate[bcMatch]
-                ColData$y_coordinate[bcSampleIndex] <- bc_coords$y_coordinate[bcMatch]
-            }
-        }
-    }
-    return(ColData)
+#' Generate the colData using the external sampleMetadata.csv provided by the user in the sampleMetadata argument
+#' @param readClassList A list object containingmetadata about read classes.
+#' @param sampleMetadata A path to a CSV file or NULL/NA if there is no metadata for the sample.
+#' @param demultiplexed Logical; indicates if data is demultiplexed.
+#'
+#' @return A DataFrame containing colData for the sample.
+#' @export
+generateColData <- function(readClassList, sampleMetadata, demultiplexed) {
+  sampleMetadataDf <- if (is.null(sampleMetadata) || is.na(sampleMetadata)) {
+    if (demultiplexed) tibble(barcode = character()) else tibble(sampleName = character())
+  } else {
+    fread(sampleMetadata)
+  }
+
+  joinKey <- if (demultiplexed) "barcode" else "sampleName"
+
+  colData <- tibble(
+      id = metadata(readClassList)$sampleData$id, 
+      sampleName = metadata(readClassList)$sampleData$sampleName
+  ) 
+
+  if (demultiplexed) {
+      colData <- colData %>%
+        mutate(barcode = metadata(readClassList)$sampleData$barcode)
+  }
+  
+  colData <- colData %>%
+    left_join(sampleMetadataDf, by = joinKey) %>%
+    as.data.frame()
+  
+  rownames(colData) <- colData$id
+  
+  colData
 }
 
 # Quick wrapper function (https://stackoverflow.com/questions/13273833/merging-multiple-data-tables)
diff --git a/R/prepareDataFromBam.R b/R/prepareDataFromBam.R
index e9634a92..81beb4b2 100755
--- a/R/prepareDataFromBam.R
+++ b/R/prepareDataFromBam.R
@@ -46,13 +46,20 @@ prepareDataFromBam <- function(bamFile, yieldSize = NULL, verbose = FALSE,
         if (!isFALSE(demultiplexed)){ # if demultiplexed is TRUE or a string path 
             if(isTRUE(demultiplexed)){ # if demultiplexed is TRUE
       
-                mcols(readGrgList[[counter]])$CB <- case_when(grepl("^[^_]+_[^#]+#", names(readGrgList[[counter]]), perl = TRUE) ~ sub("_.*", "", names(readGrgList[[counter]])), # a checkpoint to see whether CB is contained in the name, with specific format CB_UMI#READNAME, 
-                                                              !is.na(mcols(alignmentInfo)$CB) ~ mcols(alignmentInfo)$CB, 
-                                                              TRUE ~ NA) 
+                # a checkpoint to parse CB and UMI from the bam file, either from reads or CB/UMI tags.
+                # currently read name only accepts the format CB_UMI#READNAME (CB & UMI cannot have '_', otherwise parsing fails) 
+                mcols(readGrgList[[counter]])$CB <- case_when(
+                    !is.na(mcols(alignmentInfo)$CB) ~ mcols(alignmentInfo)$CB,
+                    grepl("^[^_]+_[^#]+#", names(readGrgList[[counter]]), perl = TRUE) ~ sub("_.*", "", names(readGrgList[[counter]])),
+                    TRUE ~ NA
+                ) 
 
-                mcols(readGrgList[[counter]])$UMI <- case_when(grepl("^[^_]+_[^#]+#", names(readGrgList[[counter]]), perl = TRUE) ~ sub("^[^_]+_([^#]+)#.*$", "\\1", names(readGrgList[[counter]])), # a checkpoint to see whether UMI is contained in the name, with specific format CB_UMI#READNAME, 
-                                                               !is.na(mcols(alignmentInfo)$UB) ~ mcols(alignmentInfo)$UB, 
-                                                               TRUE ~ NA) 
+                mcols(readGrgList[[counter]])$UMI <- case_when(
+                    !is.na(mcols(alignmentInfo)$UB) ~ mcols(alignmentInfo)$UB,
+                    grepl("^[^_]+_[^#]+#", names(readGrgList[[counter]]), perl = TRUE) ~ sub("^[^_]+_([^#]+)#.*$", "\\1", names(readGrgList[[counter]])), 
+                    TRUE ~ NA
+                )
+                
             } else{ # if demultiplexed is a string path
                 mcols(readGrgList[[counter]])$CB <- NA
                 mcols(readGrgList[[counter]])$UMI <- NA
@@ -61,10 +68,6 @@ prepareDataFromBam <- function(bamFile, yieldSize = NULL, verbose = FALSE,
                     mcols(readGrgList[[counter]])$UMI <- readMap[,3][match(names(readGrgList[[counter]]),readMap[,1])]
                 }
             }
-            cells <- unique(c(cells, mcols(readGrgList[[counter]])$CB))
-            mcols(readGrgList[[counter]])$CB <- factor(mcols(readGrgList[[counter]])$CB, levels = cells)
-            umi <- unique(c(umi, mcols(readGrgList[[counter]])$UMI))
-            mcols(readGrgList[[counter]])$UMI <- factor(mcols(readGrgList[[counter]])$UMI, levels = umi)
         }
         if(cleanReads){
             softClip5Prime <- clipFunction(cigarData = GenomicAlignments::cigar(alignmentInfo), grep_pattern = '^(\\d*)[S].*', replace_pattern = '\\1')
@@ -94,6 +97,11 @@ prepareDataFromBam <- function(bamFile, yieldSize = NULL, verbose = FALSE,
     } else {
         readGrgList <- readGrgList[[1]]
     }
+
+    if (demultiplexed){
+        mcols(readGrgList)$CB <- factor(mcols(readGrgList)$CB, levels = sort(unique(mcols(readGrgList)$CB)))
+    }
+
     # remove microexons of width 1bp from list
     readGrgList <- readGrgList <- readGrgList[sum(width(readGrgList)) > 1]
     numNoCBs <- sum(is.na(mcols(readGrgList)$CB))