Skip to content

Commit 14081c6

Browse files
author
Chen Ying
authored
Merge pull request #496 from GoekeLab/devel
update multiplex_major_patch to keep track of the changes made to main branch
2 parents 9572dc0 + c5e923d commit 14081c6

8 files changed

+52
-67
lines changed

.github/workflows/check-bioc.yml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,9 @@ jobs:
5454
fail-fast: false
5555
matrix:
5656
config:
57-
- { os: ubuntu-latest, r: '4.4', bioc: '3.19', cont: "bioconductor/bioconductor_docker:RELEASE_3_19", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" }
58-
- { os: macOS-latest, r: '4.4', bioc: '3.19'}
59-
- { os: windows-latest, r: '4.4', bioc: '3.19'}
57+
- { os: ubuntu-latest, r: '4.4.2', bioc: '3.20', cont: "bioconductor/bioconductor_docker:RELEASE_3_20", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" }
58+
- { os: macOS-latest, r: '4.4.2', bioc: '3.20'}
59+
## - { os: windows-latest, r: '4.4', bioc: '3.20'}
6060
## Check https://github.com/r-lib/actions/tree/master/examples
6161
## for examples using the http-user-agent
6262
env:
@@ -107,16 +107,16 @@ jobs:
107107
uses: actions/cache@v4
108108
with:
109109
path: ${{ env.R_LIBS_USER }}
110-
key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE-r-4.3-${{ hashFiles('.github/depends.Rds') }}
111-
restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE-r-4.3-
110+
key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE-r-4.4.2-${{ hashFiles('.github/depends.Rds') }}
111+
restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE-r-4.4.2-
112112

113113
- name: Cache R packages on Linux
114114
if: "!contains(github.event.head_commit.message, '/nocache') && runner.os == 'Linux' "
115115
uses: actions/cache@v4
116116
with:
117117
path: /home/runner/work/_temp/Library
118-
key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-4.3-${{ hashFiles('.github/depends.Rds') }}
119-
restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-4.3-
118+
key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-4.4.2-${{ hashFiles('.github/depends.Rds') }}
119+
restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-4.4.2-
120120

121121
- name: Install Linux system dependencies
122122
if: runner.os == 'Linux'
@@ -339,7 +339,7 @@ jobs:
339339
if: failure()
340340
uses: actions/upload-artifact@v4
341341
with:
342-
name: ${{ runner.os }}-biocversion-RELEASE-r-4.4-results
342+
name: ${{ runner.os }}-biocversion-RELEASE-r-4.4.2-results
343343
path: check
344344

345345
- uses: docker/build-push-action@v1

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: bambu
22
Type: Package
33
Title: Context-Aware Transcript Quantification from Long Read RNA-Seq data
4-
Version: 3.5.1
4+
Version: 3.11.1
55
Authors@R: c(person("Ying", "Chen", role = c("cre","aut"),
66
email = "chen_ying@gis.a-star.edu.sg"),
77
person("Andre", "Sim", role = "aut",

R/bambu-extendAnnotations-utilityCombine.R

Lines changed: 24 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ isore.combineTranscriptCandidates <- function(readClassList,
1717
combinedSplicedTranscripts <-
1818
combineSplicedTranscriptModels(readClassList, bpParameters,
1919
min.readCount, min.readFractionByGene,
20-
min.txScore.multiExon, min.txScore.singleExon, verbose) %>% data.table()
20+
min.txScore.multiExon, min.txScore.singleExon, verbose)
2121
combinedSplicedTranscripts[,confidenceType := "highConfidenceJunctionReads"]
2222
# when single exon min score is greater than 1, skip unspliced transcripts combination
2323
# this is a very customized config, useful when data is very big
@@ -92,40 +92,34 @@ sequentialCombineFeatureTibble <- function(readClassList,
9292

9393
#' @noRd
9494
updateStartEndReadCount <- function(combinedFeatureTibble){
95-
combinedFeatureTibble <- combinedFeatureTibble %>%
96-
mutate(rowID = row_number())
97-
98-
startEndCountTibble <- combinedFeatureTibble %>%
99-
select(rowID, starts_with("start"),starts_with("end"),
100-
starts_with("readCount")) %>%
101-
tidyr::pivot_longer(c(starts_with("start"),starts_with("end"),
102-
starts_with("readCount")), names_to = c(".value","set"),
103-
names_pattern = "(.*)\\.(.)") %>%
104-
group_by(rowID) %>%
105-
mutate(sumReadCount = sum(readCount,na.rm = TRUE))
95+
setDT(combinedFeatureTibble)
96+
combinedFeatureTibble[, rowID := .I]
10697

107-
startTibble <- select(startEndCountTibble, rowID, start, readCount,
108-
sumReadCount) %>%
109-
arrange(start) %>%
110-
filter(cumsum(readCount)/sumReadCount>=0.5) %>%
111-
filter(row_number()==1)
112-
endTibble <- select(startEndCountTibble, rowID, end, readCount,
113-
sumReadCount) %>%
114-
arrange(end) %>%
115-
filter(cumsum(readCount)/sumReadCount>=0.5) %>%
116-
filter(row_number()==1)
98+
colNames <- colnames(combinedFeatureTibble)
99+
readCountCols <- sort(colNames[grep("^readCount", colNames)]) # to make sure it's ordered by sample name
100+
startCols <- sort(colNames[grep("^start", colNames)])
101+
endCols <- sort(colNames[grep("^end", colNames)])
117102

118-
combinedFeatureTibble <- combinedFeatureTibble %>%
119-
dplyr::select(intronStarts, intronEnds, chr, strand, maxTxScore,
120-
maxTxScore.noFit, NSampleReadCount, NSampleReadProp,
121-
NSampleTxScore, rowID) %>%
122-
full_join(select(startTibble, rowID, start), by = "rowID") %>%
123-
full_join(select(endTibble, rowID, end, readCount=sumReadCount),
124-
by = "rowID") %>%
125-
select(-rowID)
103+
startEndDt <- combinedFeatureTibble[,
104+
.(start = readCountWeightedMedian(.SD,x,y),
105+
end = readCountWeightedMedian(.SD,z,y),
106+
readCount = sum(.SD[,y], na.rm = TRUE)),
107+
by = rowID, env = I(list(x = startCols, y = readCountCols,z = endCols))]
108+
109+
combinedFeatureTibble <- startEndDt[combinedFeatureTibble[,.(intronStarts, intronEnds, chr, strand, maxTxScore,
110+
maxTxScore.noFit, NSampleReadCount, NSampleReadProp,
111+
NSampleTxScore, rowID)], on = "rowID"]
112+
combinedFeatureTibble[, rowID := NULL]
126113
return(combinedFeatureTibble)
127114
}
128115

116+
#' Function to get median value without interpolation using certain column names
117+
#' @noRd
118+
readCountWeightedMedian <- function(dt, valuevar, timesvar){
119+
sortVector <- rep(na.omit(unlist(dt[,..valuevar])),
120+
times = as.integer(na.omit(unlist(dt[,..timesvar]))))
121+
return(min(sortVector[sortVector>=quantile(sortVector, probs = 0.5)]))
122+
}
129123

130124

131125
#' Function to combine featureTibble and create the NSample variables

R/prepareAnnotations_utilityFunctions.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ prepareAnnotationsFromGTF <- function(file) {
2525
"score", "strand", "frame", "attribute")
2626
data <- data[data$type == "exon", ]
2727
data$strand[data$strand == "."] <- "*"
28-
data$GENEID <- gsub("gene_id (.*?);.*", "\\1", data$attribute)
28+
data$GENEID <- gsub(".*gene_id (.*?);.*", "\\1", data$attribute)
2929
data$TXNAME <- gsub(".*transcript_id (.*?);.*", "\\1", data$attribute)
3030
data$NDR <- NULL
3131
data$maxTxScore <- NULL

README.md

Lines changed: 16 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -216,10 +216,16 @@ By default bambu will write four .gtf files
216216
```rscript
217217
writeBambuOutput(se, path = "./bambu/")
218218
```
219-
If you would like to avoid outputting any of the above .gtf for space concerns, each can be toggled off with the below arguments.
219+
220+
If you are only interested in the novel transcripts, one can filter this 'se' object first to remove reference annotations.
220221
```rscript
221-
writeBambuOutput(se.novel, path = "./bambu/", outputExtendedAnno = FALSE, outputAll = FALSE, outputBambuModels = FALSE, outputNovelOnly = FALSE)
222+
se.novel = se[mcols(se)$novelTranscript,]
223+
writeBambuOutput(se.novel, path = "./bambu/")
222224
```
225+
If you are only interested in full-length transcripts that were detected by Bambu in at least 1 sample.
226+
```rscript
227+
se.novel = se[mcols(se)$novelTranscript&(apply(assays(se)$fullLengthCounts >= 1,1,sum)>=1),]
228+
writeBambuOutput(se.novel, path = "./bambu/")
223229

224230
If quant is set to FALSE i.e. only transcript discovery is performed, only the rowRanges output of the extended annotations is returned (a GRangesList object). The equivalent rowData can be accessed with mcols()
225231
These annotations can be written to a .gtf file using writeAnnotationsToGTF(GRangesList_object, output_path).
@@ -679,31 +685,16 @@ metadata(rowRanges(se))$warnings
679685

680686
### Release History
681687

682-
**bambu v3.9.0**
683-
684-
Release date: 2025-xxx-xx
685-
686-
- Subset transcripts and those above the NDR threshold are placed into the metadata of the annotations in $subsetTranscripts and $lowConfidenceTranscripts respectively (when filtered out by default).
687-
- adds the setNDR function
688-
- outputs the NDR, txScore and txScore.noFit as attributes to the gtf file and these are also read in with prepareAnnotations.
689-
- Added setNDR as part of quant, which means that users can provide their extendedAnnotations alongside an NDR threshold when running bambu and it will automatically adjust the NDR used for quant. This means users do not need to manually filter the NDR value themselves.
690-
- NDR and other stats are now copied over to equal transcripts even if above the NDR threshold (previously only happened for those below the NDR threshold)
691-
- Read class to transcript assignment is now its own step instead of being done with quant. This is turned on and off with assignDist.
692-
- Added demultiplexed argument
693-
- Added spatial argument
694-
- Added sampleNames argument
695-
- Added cleanReads argument
696-
- Added dedupUMI argument
697-
- Added clusters argument
698-
- Deprecated lowMemory - This has been replaced by processByChromosome
699-
- Added processByChomosome (the old memory)
700-
- Added processByBam argument
701-
- Added importBambuResults()
702-
- writeBambuOutput now outputs all information needed to import Bambu results from text files
703-
- Count outputs are all now in sparse matrix format
688+
689+
**bambu v3.8.2**
690+
691+
Release date: 2025-02-06
704692

705693
Minor changes:
706-
- Warnings will no longer occur if there are seqlevels in the readGrgList that are not in the annotations or genome. This was done by setting seqlevels of the reads to only those in the reads. Warning was constantly occuring because all the scaffolds used in alignment were in the bam files, even if no reads from these scaffolds existed.
694+
695+
- Fix large number of samples [issue](https://github.com/GoekeLab/bambu/issues/450)
696+
- Fix denovo bug issue
697+
707698

708699
**bambu v3.2.6**
709700

Binary file not shown.
Binary file not shown.

tests/testthat/test_isore.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,8 @@ test_that("isore.combineTranscriptCandidates completes successfully", {
7171

7272
expect_equal(seIsoReCombined, seIsoReCombinedExpected)
7373
expect_named(seIsoReCombined,
74-
c('intronStarts', 'intronEnds', 'chr', 'strand', 'maxTxScore', 'maxTxScore.noFit',
75-
'NSampleReadCount', 'NSampleReadProp', 'NSampleTxScore', 'start', 'end', 'readCount', 'confidenceType')
74+
c('start', 'end', 'readCount','intronStarts', 'intronEnds', 'chr', 'strand', 'maxTxScore', 'maxTxScore.noFit',
75+
'NSampleReadCount', 'NSampleReadProp', 'NSampleTxScore', 'confidenceType')
7676
)
7777
})
7878

0 commit comments

Comments
 (0)