9393# ' distTables. The output is a list with an entry for each sample.
9494# ' @param lowMemory Read classes will be processed by chromosomes when lowMemory
9595# ' is specified. This option provides an efficient way to process big samples.
96+ # ' @param sampleData A character vector of paths to metadata CSV files (or \code{NA} if
97+ # ' unavailable for specific samples); defaults to \code{NULL}. Files must contain a
98+ # ' "sampleName" column for bulk data or a "barcode" column for single-cell/spatial data.
99+ # ' For bulk data, one metadata CSV file for all samples is sufficient, whereas single-cell/spatial
100+ # ' data requires one metadata CSV file per sample.
96101# ' @param fusionMode A logical variable indicating whether run in fusion mode
97102# ' @param verbose A logical variable indicating whether processing messages will
98103# ' be printed.
138143bambu <- function (reads , annotations = NULL , genome = NULL , NDR = NULL ,
139144 mode = NULL , opt.discovery = NULL , opt.em = NULL , rcOutDir = NULL , discovery = TRUE ,
140145 assignDist = TRUE , quant = TRUE , stranded = FALSE , ncore = 1 , yieldSize = NULL ,
141- trackReads = FALSE , returnDistTable = FALSE , lowMemory = FALSE ,
142- fusionMode = FALSE , verbose = FALSE , demultiplexed = FALSE , spatial = NULL , quantData = NULL ,
146+ trackReads = FALSE , returnDistTable = FALSE , lowMemory = FALSE , sampleData = NULL ,
147+ fusionMode = FALSE , verbose = FALSE , demultiplexed = FALSE , quantData = NULL ,
143148 sampleNames = NULL , cleanReads = FALSE , dedupUMI = FALSE , barcodesToFilter = NULL , clusters = NULL ,
144149 processByChromosome = FALSE , processByBam = TRUE ) {
145150 message(paste0(" Running Bambu-v" , " 3.9.0" ))
@@ -173,7 +178,7 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
173178 annotations <- checkInputs(annotations , reads ,
174179 readClass.outputDir = rcOutDir ,
175180 genomeSequence = genome , discovery = discovery ,
176- sampleNames = sampleNames , spatial = spatial , quantData = quantData )
181+ sampleNames = sampleNames , sampleData = sampleData , quantData = quantData )
177182 }
178183 isoreParameters <- setIsoreParameters(isoreParameters = opt.discovery )
179184 # below line is to be compatible with earlier version of running bambu
@@ -234,16 +239,19 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
234239 }
235240 if (assignDist ){
236241 message(" --- Start calculating equivilance classes ---" )
237- quantData <- bplapply(readClassList ,
238- FUN = assignReadClasstoTranscripts ,
239- annotations = annotations ,
240- isoreParameters = isoreParameters ,
241- verbose = verbose ,
242- demultiplexed = demultiplexed ,
243- spatial = spatial ,
244- returnDistTable = returnDistTable ,
245- trackReads = trackReads ,
246- BPPARAM = bpParameters )
242+ quantData <- bplapply(seq_along(readClassList ), function (i ){
243+ assignReadClasstoTranscripts(
244+ readClassList = readClassList [[i ]],
245+ annotations = annotations ,
246+ isoreParameters = isoreParameters ,
247+ verbose = verbose ,
248+ # for bulk data, there is one sampleData (keep sampleData[1]), for single-cell, there is one per sample
249+ sampleMetadata = if (length(sampleData ) == 1 ) sampleData [1 ] else sampleData [i ],
250+ demultiplexed = demultiplexed ,
251+ returnDistTable = returnDistTable ,
252+ trackReads = trackReads
253+ )
254+ }, BPPARAM = bpParameters )
247255 if (! quant ) return (quantData )
248256 }
249257 }
@@ -262,6 +270,7 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
262270 start.ptm <- proc.time()
263271 countsSeCompressed.all <- NULL
264272 ColNames <- c()
273+ colData.all <- list ()
265274 for (i in seq_along(quantData )){
266275 quantData_i <- quantData [[i ]]
267276 # load in the barcode clustering from file if provided
@@ -285,11 +294,7 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
285294 iter <- clustering
286295
287296 } else { # if clusters is a list
288- if (length(quantData )> 1 ){
289- iter <- clusters [[i ]] # lowMemory mode
290- }else {
291- iter <- clusters # do.call(c,clusters)
292- }
297+ iter <- clusters [[i ]]
293298 }
294299 }
295300 countsSeCompressed <- bplapply(iter , FUN = function (j ){ # previous i changed to j to avoid duplicated assignment
@@ -310,25 +315,27 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
310315 message(" Total Time " , round((end.ptm - start.ptm )[3 ] / 60 , 3 ), " mins." )
311316 if (! is.null(clusters )){
312317 ColNames <- c(ColNames , names(iter ))
318+ colData.all [[i ]] <- data.frame (
319+ id = names(countsSeCompressed ),
320+ sampleName = names(countsSeCompressed ),
321+ row.names = names(countsSeCompressed )
322+ )
313323 } else {
314324 ColNames <- c(ColNames , colnames(quantData_i ))
325+ colData.all [[i ]] <- data.frame (colData(quantData_i ))
315326 }
316327 countsSeCompressed.all <- c(countsSeCompressed.all , countsSeCompressed )
317328 }
318- countsSeCompressed.all $ colnames <- ColNames
319- countsSe <- combineCountSes(countsSeCompressed.all , annotations )
329+ names(countsSeCompressed.all ) <- ColNames
330+
331+ countsSe <- combineCountSes(countsSeCompressed.all , colData.all , annotations )
320332 if (returnDistTable ){
321333 distTables = list ()
322334 for (i in seq_along(quantData )){
323335 distTables [[i ]] <- metadata(quantData [[i ]])$ distTable
324336 }
325337 metadata(countsSe )$ distTables <- distTables
326338 }
327- # metadata(countsSe)$warnings = warnings
328-
329- ColData <- generateColData(colnames(countsSe ), clusters , demultiplexed , spatial )
330- colData(countsSe ) <- ColData
331- colnames(countsSe ) <- ColData [,1 ]
332339 return (countsSe )
333340 }
334341 }
0 commit comments