diff --git a/DESCRIPTION b/DESCRIPTION index 298d987..7dc3289 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: immunedeconv Title: Methods for immune cell deconvolution Description: Collection of methods for immune cell deconvolution from bulk RNA-seq samples. -Version: 2.1.0 +Version: 2.1.4 Authors@R: c( person("Lorenzo", "Merotto", role=c("aut", "cre"), email="lorenzo.merotto@uibk.ac.at"), person("Gregor", "Sturm", role=c("aut"), email="mail@gregor-sturm.de") @@ -66,4 +66,4 @@ Roxygen: list(markdown = TRUE) LazyData: true URL: https:/omnideconv.org/immunedeconv, https://github.com/omnideconv/immunedeconv BugReports: https://github.com/omnideconv/immunedeconv/issues -RoxygenNote: 7.3.3 +Config/roxygen2/version: 8.0.0 diff --git a/R/cell_type_mapping.R b/R/cell_type_mapping.R index 3bc21c8..9c6c6ae 100644 --- a/R/cell_type_mapping.R +++ b/R/cell_type_mapping.R @@ -134,7 +134,7 @@ summarise_children <- function(df) { df[set_to_zero] <- 0 # finally, sum up the children. - df_sum <- summarise_all(df, funs(sum)) + df_sum <- summarise(df, across(everything(), sum)) df_sum } diff --git a/R/immune_deconvolution_methods.R b/R/immune_deconvolution_methods.R index 4c773a7..551f268 100644 --- a/R/immune_deconvolution_methods.R +++ b/R/immune_deconvolution_methods.R @@ -159,12 +159,43 @@ deconvolute_xcell <- function(gene_expression_matrix, arrays, expected_cell_type #' @param gene_expression_matrix a m x n matrix with m genes and n samples #' @param feature_types type of identifiers used for expression features. May be #' one of `"affy133P2_probesets","HUGO_symbols","ENTREZ_ID"` +#' @param log_transform Controls whether the expression matrix is log2-transformed before +#' running MCP-counter. MCP-counter expects log-transformed data. One of `NULL` (default), +#' `TRUE`, or `FALSE`. +#' \itemize{ +#' \item `NULL` – auto-detect: if `max(gene_expression_matrix) > 50` the data are assumed +#' to be in linear (TPM) scale and will be log2(x + 1)-transformed. +#' \item `TRUE` – always apply log2(x + 1) transformation. +#' \item `FALSE` – assume data are already log-transformed; no transformation is applied. +#' } #' @param ... passed through to original MCP-counter function. A native argument takes precedence #' over an immunedeconv argument (e.g. `featureType` takes precedence over `feature_types`) #' See [MCPcounter.estimate](https://github.com/ebecht/MCPcounter/blob/master/Source/R/MCPcounter.R#L19). #' #' @export -deconvolute_mcp_counter <- function(gene_expression_matrix, feature_types = "HUGO_symbols", ...) { +deconvolute_mcp_counter <- function(gene_expression_matrix, feature_types = "HUGO_symbols", + log_transform = NULL, ...) { + if (is.null(log_transform)) { + if (max(gene_expression_matrix, na.rm = TRUE) > 50) { + message( + "MCP-counter expects log-transformed expression data. ", + "The input data appears to be in linear (TPM) scale (max value > 50) and will be ", + "log2(x + 1)-transformed automatically. Set `log_transform = TRUE` or ", + "`log_transform = FALSE` to force or disable this behaviour." + ) + gene_expression_matrix <- log2(as.matrix(gene_expression_matrix) + 1) + } else { + message( + "MCP-counter expects log-transformed expression data. ", + "The input data appears to be already log-transformed (max value <= 50). ", + "No transformation applied. Set `log_transform = TRUE` or ", + "`log_transform = FALSE` to override this automatic detection." + ) + } + } else if (isTRUE(log_transform)) { + gene_expression_matrix <- log2(as.matrix(gene_expression_matrix) + 1) + } + arguments <- dots_list(gene_expression_matrix, featuresType = feature_types, ..., .homonyms = "last") call <- rlang::call2(MCPcounter::MCPcounter.estimate, !!!arguments) eval(call) diff --git a/man/available_datasets.Rd b/man/available_datasets.Rd index 986f39c..30bb778 100644 --- a/man/available_datasets.Rd +++ b/man/available_datasets.Rd @@ -1,12 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cell_type_mapping.R -\docType{data} \name{available_datasets} \alias{available_datasets} \title{Available methods and datasets.} -\format{ -An object of class \code{character} of length 20. -} \usage{ available_datasets } @@ -14,4 +10,3 @@ available_datasets A list of all methods (e.g. \code{cibersort}) and datasets (e.g. \code{schelker_ovarian}) for that the cell types are mapped to the controlled vocabulary. } -\keyword{datasets} diff --git a/man/cell_type_list.Rd b/man/cell_type_list.Rd index 1fbb0e7..4910410 100644 --- a/man/cell_type_list.Rd +++ b/man/cell_type_list.Rd @@ -1,16 +1,11 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cell_type_mapping.R -\docType{data} \name{cell_type_list} \alias{cell_type_list} \title{List with controlled cell-type vocabulary} -\format{ -An object of class \code{tbl_df} (inherits from \code{tbl}, \code{data.frame}) with 76 rows and 3 columns. -} \usage{ cell_type_list } \description{ List with controlled cell-type vocabulary } -\keyword{datasets} diff --git a/man/cell_type_map.Rd b/man/cell_type_map.Rd index 00f7558..afa0139 100644 --- a/man/cell_type_map.Rd +++ b/man/cell_type_map.Rd @@ -1,12 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cell_type_mapping.R -\docType{data} \name{cell_type_map} \alias{cell_type_map} \title{Table mapping the cell types from methods/datasets to a single, controlled vocabulary.} -\format{ -An object of class \code{tbl_df} (inherits from \code{tbl}, \code{data.frame}) with 258 rows and 3 columns. -} \usage{ cell_type_map } @@ -16,4 +12,3 @@ Columns: \code{method_dataset}, \code{method_cell_type}, \code{cell_type}. \details{ See \code{inst/extdata/cell_type_mapping.xlsx} for more details. } -\keyword{datasets} diff --git a/man/cell_type_tree.Rd b/man/cell_type_tree.Rd index 221cbc1..1f29947 100644 --- a/man/cell_type_tree.Rd +++ b/man/cell_type_tree.Rd @@ -1,12 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cell_type_mapping.R -\docType{data} \name{cell_type_tree} \alias{cell_type_tree} \title{Available cell types in the controlled vocabulary organized as a lineage tree.} -\format{ -An object of class \code{Node} (inherits from \code{R6}) of length 41. -} \usage{ cell_type_tree } @@ -16,4 +12,3 @@ Available cell types in the controlled vocabulary organized as a lineage tree. \details{ a \code{data.tree} object } -\keyword{datasets} diff --git a/man/custom_deconvolution_methods.Rd b/man/custom_deconvolution_methods.Rd index c8a1fbe..7393e48 100644 --- a/man/custom_deconvolution_methods.Rd +++ b/man/custom_deconvolution_methods.Rd @@ -1,12 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/custom_deconvolution_methods.R -\docType{data} \name{custom_deconvolution_methods} \alias{custom_deconvolution_methods} \title{List of methods that support the use of a custom signature} -\format{ -An object of class \code{character} of length 4. -} \usage{ custom_deconvolution_methods } @@ -18,4 +14,3 @@ The available methods are The object is a named vector. The names correspond to the display name of the method, the values to the internal name. } -\keyword{datasets} diff --git a/man/deconvolute_mcp_counter.Rd b/man/deconvolute_mcp_counter.Rd index 71f6512..0a695a2 100644 --- a/man/deconvolute_mcp_counter.Rd +++ b/man/deconvolute_mcp_counter.Rd @@ -7,6 +7,7 @@ deconvolute_mcp_counter( gene_expression_matrix, feature_types = "HUGO_symbols", + log_transform = NULL, ... ) } @@ -16,6 +17,16 @@ deconvolute_mcp_counter( \item{feature_types}{type of identifiers used for expression features. May be one of \verb{"affy133P2_probesets","HUGO_symbols","ENTREZ_ID"}} +\item{log_transform}{Controls whether the expression matrix is log2-transformed before +running MCP-counter. MCP-counter expects log-transformed data. One of \code{NULL} (default), +\code{TRUE}, or \code{FALSE}. +\itemize{ +\item \code{NULL} – auto-detect: if \code{max(gene_expression_matrix) > 50} the data are assumed +to be in linear (TPM) scale and will be log2(x + 1)-transformed. +\item \code{TRUE} – always apply log2(x + 1) transformation. +\item \code{FALSE} – assume data are already log-transformed; no transformation is applied. +}} + \item{...}{passed through to original MCP-counter function. A native argument takes precedence over an immunedeconv argument (e.g. \code{featureType} takes precedence over \code{feature_types}) See \href{https://github.com/ebecht/MCPcounter/blob/master/Source/R/MCPcounter.R#L19}{MCPcounter.estimate}.} diff --git a/man/deconvolution_methods.Rd b/man/deconvolution_methods.Rd index 3796009..86e2b7f 100644 --- a/man/deconvolution_methods.Rd +++ b/man/deconvolution_methods.Rd @@ -1,12 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/immune_deconvolution_methods.R -\docType{data} \name{deconvolution_methods} \alias{deconvolution_methods} \title{List of supported immune deconvolution methods} -\format{ -An object of class \code{character} of length 10. -} \usage{ deconvolution_methods } @@ -18,4 +14,3 @@ The methods currently supported are The object is a named vector. The names correspond to the display name of the method, the values to the internal name. } -\keyword{datasets} diff --git a/man/deconvolution_methods_mouse.Rd b/man/deconvolution_methods_mouse.Rd index 6754db1..b4a1196 100644 --- a/man/deconvolution_methods_mouse.Rd +++ b/man/deconvolution_methods_mouse.Rd @@ -1,6 +1,5 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/mouse_deconvolution_methods.R -\docType{data} \name{deconvolution_methods_mouse} \alias{deconvolution_methods_mouse} \title{List of supported mouse deconvolution methods @@ -8,9 +7,6 @@ The methods currently supported are \code{mmcp_counter}, \code{seqimmucc}, \code{dcq}, \code{base} The object is a named vector. The names correspond to the display name of the method, the values to the internal name.} -\format{ -An object of class \code{character} of length 4. -} \usage{ deconvolution_methods_mouse } @@ -21,4 +17,3 @@ The methods currently supported are The object is a named vector. The names correspond to the display name of the method, the values to the internal name. } -\keyword{datasets} diff --git a/man/immunedeconv.Rd b/man/immunedeconv.Rd index ab241d1..737e10f 100644 --- a/man/immunedeconv.Rd +++ b/man/immunedeconv.Rd @@ -12,7 +12,7 @@ estimating immune cell fractions from bulk RNA sequencing data. \seealso{ Useful links: \itemize{ - \item \url{https:/omnideconv.org/immunedeconv} + \item https:/omnideconv.org/immunedeconv \item \url{https://github.com/omnideconv/immunedeconv} \item Report bugs at \url{https://github.com/omnideconv/immunedeconv/issues} } @@ -23,6 +23,7 @@ Useful links: Authors: \itemize{ + \item Lorenzo Merotto \email{lorenzo.merotto@uibk.ac.at} \item Gregor Sturm \email{mail@gregor-sturm.de} } diff --git a/man/node_by_name.Rd b/man/node_by_name.Rd index 04eccb6..93f8e96 100644 --- a/man/node_by_name.Rd +++ b/man/node_by_name.Rd @@ -1,12 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cell_type_mapping.R -\docType{data} \name{node_by_name} \alias{node_by_name} \title{Lookup dictionary for cell-type nodes} -\format{ -An object of class \code{list} of length 77. -} \usage{ node_by_name } @@ -14,4 +10,3 @@ node_by_name Access nodes by name in O(1). Node names are unique in our tree. gets attached on .onLoad, see zzz.R } -\keyword{datasets} diff --git a/man/timer_available_cancers.Rd b/man/timer_available_cancers.Rd index b892445..8a9b5e4 100644 --- a/man/timer_available_cancers.Rd +++ b/man/timer_available_cancers.Rd @@ -1,16 +1,11 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/timer.R -\docType{data} \name{timer_available_cancers} \alias{timer_available_cancers} \title{TIMER signatures are cancer specific. This is the list of available cancer types.} -\format{ -An object of class \code{character} of length 32. -} \usage{ timer_available_cancers } \description{ TIMER signatures are cancer specific. This is the list of available cancer types. } -\keyword{datasets} diff --git a/man/xCell.data.Rd b/man/xCell.data.Rd index ece30fc..50ba07f 100644 --- a/man/xCell.data.Rd +++ b/man/xCell.data.Rd @@ -1,12 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/immune_deconvolution_methods.R -\docType{data} \name{xCell.data} \alias{xCell.data} \title{Data object from xCell.} -\format{ -An object of class \code{list} of length 4. -} \usage{ xCell.data } @@ -14,4 +10,3 @@ xCell.data For some reason, this object is not properly exported from the xCell namespace. This is a workaround, that \code{xCellAnalysis} can be properly called from this package. } -\keyword{datasets} diff --git a/tests/testthat/test_deconvolution.R b/tests/testthat/test_deconvolution.R index 23905de..c264764 100644 --- a/tests/testthat/test_deconvolution.R +++ b/tests/testthat/test_deconvolution.R @@ -41,6 +41,45 @@ test_that("mcp_counter works", { assert("matrix dimensions consistent", ncol(res) == ncol(test_mat)) }) +test_that("mcp_counter log_transform auto-detects raw TPM data", { + # test_mat has max value > 50, so auto-detection should log-transform + expect_message( + res <- deconvolute_mcp_counter(test_mat), + "log2\\(x \\+ 1\\)-transformed automatically" + ) + assert("matrix dimensions consistent", ncol(res) == ncol(test_mat)) +}) + +test_that("mcp_counter log_transform auto-detects already log-transformed data", { + log_mat <- log2(test_mat + 1) + # max value should be <= 50, so auto-detection should not transform + expect_message( + res <- deconvolute_mcp_counter(log_mat), + "appears to be already log-transformed" + ) + assert("matrix dimensions consistent", ncol(res) == ncol(log_mat)) +}) + +test_that("mcp_counter log_transform NULL and TRUE give same result for raw TPM", { + # test_mat is raw TPM (max > 50), so NULL should auto-transform just like TRUE + suppressMessages(res_null <- deconvolute_mcp_counter(test_mat, log_transform = NULL)) + res_true <- deconvolute_mcp_counter(test_mat, log_transform = TRUE) + expect_equal(res_null, res_true) +}) + +test_that("mcp_counter log_transform NULL and FALSE give same result for log-transformed data", { + log_mat <- log2(test_mat + 1) + # log_mat is already log-transformed (max <= 50), so NULL should skip like FALSE + suppressMessages(res_null <- deconvolute_mcp_counter(log_mat, log_transform = NULL)) + res_false <- deconvolute_mcp_counter(log_mat, log_transform = FALSE) + expect_equal(res_null, res_false) +}) + +test_that("mcp_counter log_transform can be passed via deconvolute", { + res <- deconvolute(test_mat, "mcp_counter", log_transform = FALSE) + assert("matrix dimensions consistent", ncol(res) == ncol(test_mat) + 1) +}) + test_that("epic works", { res <- deconvolute_epic(test_mat, tumor = TRUE, scale_mrna = TRUE) assert("matrix dimensions consistent", ncol(res) == ncol(test_mat))