diff --git a/R/utilities.R b/R/utilities.R index a377514..db8c8d5 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -592,11 +592,25 @@ get_x_y_annotation_columns = function(.data, .column, .row, .abundance){ select_if(negate(is.list)) %>% ungroup() %>% { - # Rows + # Get column, row, and abundance column names + col_col <- quo_name(.column) + row_col <- quo_name(.row) + abundance_col <- quo_name(.abundance) + + # Columns for column orientation + col_orient <- colnames(subset(., !!.column)) + # Columns for row orientation + row_orient <- colnames(subset(., !!.row)) + + # Always include abundance column in both orientations + col_orient <- unique(c(col_orient, abundance_col)) + row_orient <- unique(c(row_orient, abundance_col)) + bind_rows( - (.) %>% subset(!!.column) %>% colnames %>% as_tibble %>% rename(column = value) %>% gather(orientation, col_name), - (.) %>% subset(!!.row) %>% colnames %>% as_tibble %>% rename(row = value) %>% gather(orientation, col_name) - ) + tibble(orientation = "column", col_name = col_orient), + tibble(orientation = "row", col_name = row_orient) + ) %>% + distinct() } } diff --git a/tests/testthat/test-get_x_y_annotation_columns.R b/tests/testthat/test-get_x_y_annotation_columns.R new file mode 100644 index 0000000..1361b4c --- /dev/null +++ b/tests/testthat/test-get_x_y_annotation_columns.R @@ -0,0 +1,319 @@ +context("tidyHeatmap:::get_x_y_annotation_columns tests") + +# Load required packages +library(dplyr) +library(tidyr) +library(tibble) +# Line removed as `rlang` is not used in this test file. + +# Create test data sets for different scenarios +create_basic_test_data <- function() { + # Create rectangular data: all combinations of 2 samples and 2 genes + tidyr::expand_grid( + sample = c("S1", "S2"), + gene = c("G1", "G2") + ) %>% + dplyr::mutate( + count = c(10, 20, 30, 40), + sample_type = rep(c("A", "B"), each = 2), # annotation for samples + gene_pathway = rep(c("P1", "P2"), times = 2), # annotation for genes + batch = rep(c("B1", "B2"), each = 2), # could be either sample or gene annotation + irrelevant_col = rep(c("X", "Y"), each = 2) # not related to either dimension + ) +} + +create_complex_test_data <- function() { + # Create rectangular data: all combinations of 3 patients and 4 biomarkers + tidyr::expand_grid( + patient_id = c("P1", "P2", "P3"), + biomarker = c("BM1", "BM2", "BM3", "BM4") + ) %>% + dplyr::mutate( + expression = runif(12), + age = rep(c(25, 35, 45), each = 4), # patient annotation + gender = rep(c("M", "F", "M"), each = 4), # patient annotation + pathway = rep(c("Path1", "Path2", "Path1", "Path2"), times = 3), # biomarker annotation + category = rep(c("Cat1", "Cat2", "Cat1", "Cat2"), times = 3), # biomarker annotation + treatment = rep(c("T1", "T2", "T3"), each = 4), # patient annotation + tissue_type = rep(c("Normal", "Tumor"), each = 6) # could be either + ) +} + +# Test basic functionality +test_that("tidyHeatmap:::get_x_y_annotation_columns works with basic data", { + test_data <- create_basic_test_data() + + result <- tidyHeatmap:::get_x_y_annotation_columns(test_data, sample, gene, count) + + # Should be a tibble with orientation and col_name columns + expect_s3_class(result, "tbl_df") + expect_equal(names(result), c("orientation", "col_name")) + + # Should have entries for both orientations + expect_true("column" %in% result$orientation) + expect_true("row" %in% result$orientation) + + # Check that sample-related columns are marked as column orientation + column_cols <- result %>% filter(orientation == "column") %>% pull(col_name) + expect_true("sample" %in% column_cols) + expect_true("sample_type" %in% column_cols) + + # Check that gene-related columns are marked as row orientation + row_cols <- result %>% filter(orientation == "row") %>% pull(col_name) + expect_true("gene" %in% row_cols) + expect_true("gene_pathway" %in% row_cols) + + # Count should be in both orientations (as it's the abundance measure) + expect_true("count" %in% column_cols) + expect_true("count" %in% row_cols) +}) + +# Test with more complex data structure +test_that("tidyHeatmap:::get_x_y_annotation_columns handles complex data correctly", { + test_data <- create_complex_test_data() + + result <- tidyHeatmap:::get_x_y_annotation_columns(test_data, patient_id, biomarker, expression) + + # Should identify patient-specific annotations + column_cols <- result %>% filter(orientation == "column") %>% pull(col_name) + expect_true("patient_id" %in% column_cols) + expect_true("age" %in% column_cols) + expect_true("gender" %in% column_cols) + expect_true("treatment" %in% column_cols) + + # Should identify biomarker-specific annotations + row_cols <- result %>% filter(orientation == "row") %>% pull(col_name) + expect_true("biomarker" %in% row_cols) + expect_true("pathway" %in% row_cols) + expect_true("category" %in% row_cols) +}) + +# Test with empty data +test_that("tidyHeatmap:::get_x_y_annotation_columns handles empty data", { + empty_data <- tibble( + sample = character(0), + gene = character(0), + count = numeric(0) + ) + + result <- tidyHeatmap:::get_x_y_annotation_columns(empty_data, sample, gene, count) + + expect_s3_class(result, "tbl_df") + expect_equal(names(result), c("orientation", "col_name")) + # With empty data, we should still get the basic structure + expect_true(nrow(result) >= 0) +}) + +# Test with single row/column +test_that("tidyHeatmap:::get_x_y_annotation_columns handles single row/column data", { + # Single sample and single gene (still rectangular) + single_data <- tibble( + sample = "S1", + gene = "G1", + count = 100, + sample_annotation = "A", + gene_annotation = "X" + ) + + result <- tidyHeatmap:::get_x_y_annotation_columns(single_data, sample, gene, count) + + expect_s3_class(result, "tbl_df") + expect_equal(names(result), c("orientation", "col_name")) + + # Should still categorize annotations correctly + column_cols <- result %>% filter(orientation == "column") %>% pull(col_name) + row_cols <- result %>% filter(orientation == "row") %>% pull(col_name) + + expect_true("sample" %in% column_cols) + expect_true("gene" %in% row_cols) +}) + +# Test with grouped data +test_that("tidyHeatmap:::get_x_y_annotation_columns works with grouped data", { + test_data <- create_basic_test_data() %>% + group_by(sample_type) + + result <- tidyHeatmap:::get_x_y_annotation_columns(test_data, sample, gene, count) + + # Should still work correctly and ungroup the data + expect_s3_class(result, "tbl_df") + expect_equal(names(result), c("orientation", "col_name")) + + # Should still categorize correctly + column_cols <- result %>% filter(orientation == "column") %>% pull(col_name) + row_cols <- result %>% filter(orientation == "row") %>% pull(col_name) + + expect_true("sample" %in% column_cols) + expect_true("gene" %in% row_cols) +}) + +# Test with list columns (should be filtered out) +test_that("tidyHeatmap:::get_x_y_annotation_columns filters out list columns", { + test_data <- create_basic_test_data() %>% + mutate(list_col = list(c(1, 2), c(3, 4), c(5, 6), c(7, 8))) + + result <- tidyHeatmap:::get_x_y_annotation_columns(test_data, sample, gene, count) + + # list_col should not appear in the result + all_cols <- result %>% pull(col_name) + expect_false("list_col" %in% all_cols) + + # Other columns should still be present + column_cols <- result %>% filter(orientation == "column") %>% pull(col_name) + row_cols <- result %>% filter(orientation == "row") %>% pull(col_name) + + expect_true("sample" %in% column_cols) + expect_true("gene" %in% row_cols) +}) + +# Test with factor columns +test_that("tidyHeatmap:::get_x_y_annotation_columns handles factor columns", { + test_data <- create_basic_test_data() %>% + mutate( + sample_type = factor(sample_type), + gene_pathway = factor(gene_pathway) + ) + + result <- tidyHeatmap:::get_x_y_annotation_columns(test_data, sample, gene, count) + + # Should handle factors correctly + column_cols <- result %>% filter(orientation == "column") %>% pull(col_name) + row_cols <- result %>% filter(orientation == "row") %>% pull(col_name) + + expect_true("sample_type" %in% column_cols) + expect_true("gene_pathway" %in% row_cols) +}) + +# Test with numeric annotation columns +test_that("tidyHeatmap:::get_x_y_annotation_columns handles numeric annotations", { + # Create rectangular data: all combinations of 2 samples and 2 genes + test_data <- tidyr::expand_grid( + sample = c("S1", "S2"), + gene = c("G1", "G2") + ) %>% + dplyr::mutate( + count = c(10, 20, 30, 40), + sample_score = rep(c(1.5, 2.5), each = 2), # numeric annotation for samples + gene_weight = rep(c(10.1, 20.2), times = 2) # numeric annotation for genes + ) + + result <- tidyHeatmap:::get_x_y_annotation_columns(test_data, sample, gene, count) + + column_cols <- result %>% filter(orientation == "column") %>% pull(col_name) + row_cols <- result %>% filter(orientation == "row") %>% pull(col_name) + + expect_true("sample_score" %in% column_cols) + expect_true("gene_weight" %in% row_cols) +}) + +# Test with real-world-like data (using structure similar to N52) +test_that("tidyHeatmap:::get_x_y_annotation_columns works with N52-like structure", { + # Create rectangular data: all combinations of 4 samples and 3 genes + n52_like_data <- tidyr::expand_grid( + symbol_ct = c("G1", "G2", "G3"), + UBR = c("S1", "S2", "S3", "S4") + ) %>% + dplyr::mutate( + `read count normalised log` = runif(12), + Category = rep(c("Cat1", "Cat2", "Cat1"), each = 4), # gene annotation + `Cell type` = rep(c("TypeA", "TypeB", "TypeA", "TypeB"), times = 3), # sample annotation + CAPRA_TOTAL = rep(c(1, 2, 3, 4), times = 3), # sample annotation + inflection = rep(c(10, 20, 30), each = 4) # gene annotation + ) + + result <- tidyHeatmap:::get_x_y_annotation_columns( + n52_like_data, + UBR, + symbol_ct, + `read count normalised log` + ) + + column_cols <- result %>% filter(orientation == "column") %>% pull(col_name) + row_cols <- result %>% filter(orientation == "row") %>% pull(col_name) + + # UBR should be in column orientation + expect_true("UBR" %in% column_cols) + # symbol_ct should be in row orientation + expect_true("symbol_ct" %in% row_cols) + + # Cell type and CAPRA_TOTAL should be sample (column) annotations + expect_true("Cell type" %in% column_cols) + expect_true("CAPRA_TOTAL" %in% column_cols) + + # Category and inflection should be gene (row) annotations + expect_true("Category" %in% row_cols) + expect_true("inflection" %in% row_cols) + + # count (abundance) should be in both orientations + expect_true("count" %in% column_cols) + expect_true("count" %in% row_cols) +}) + +# Test with missing abundance column specified +# Removed due to redundancy with the N52-like structure test. +# Test result structure and types +test_that("tidyHeatmap:::get_x_y_annotation_columns returns correct structure", { + test_data <- create_basic_test_data() + + result <- tidyHeatmap:::get_x_y_annotation_columns(test_data, sample, gene, count) + + # Check return type and structure + expect_s3_class(result, "tbl_df") + expect_equal(ncol(result), 2) + expect_equal(names(result), c("orientation", "col_name")) + + # Check column types + expect_type(result$orientation, "character") + expect_type(result$col_name, "character") + + # Check that orientation only contains expected values + unique_orientations <- unique(result$orientation) + expect_true(all(unique_orientations %in% c("column", "row"))) + + # Check that col_name contains actual column names from the data + expect_true(all(result$col_name %in% names(test_data))) +}) + +# Test with duplicate column names handling +test_that("tidyHeatmap:::get_x_y_annotation_columns handles edge cases", { + # Test with minimal rectangular data + minimal_data <- tidyr::expand_grid( + x = c("A", "B"), + y = c("1", "2") + ) %>% + dplyr::mutate(z = c(100, 200, 300, 400)) + + result <- tidyHeatmap:::get_x_y_annotation_columns(minimal_data, x, y, z) + + expect_s3_class(result, "tbl_df") + expect_equal(names(result), c("orientation", "col_name")) + + # All columns should appear in the result + all_cols <- result %>% pull(col_name) + expect_true("x" %in% all_cols) + expect_true("y" %in% all_cols) + expect_true("z" %in% all_cols) +}) + +# Test with boolean column specific to row values in a rectangular matrix + +test_that("tidyHeatmap:::get_x_y_annotation_columns handles boolean row-specific annotation in rectangular data", { + # Create all combinations of 2 samples and 2 genes (rectangular) + test_data <- tidyr::expand_grid( + sample = c("S1", "S2"), + gene = c("G1", "G2") + ) %>% + dplyr::mutate( + count = c(10, 20, 30, 40), + is_special_gene = gene == "G2" # TRUE only for G2 rows + ) + + result <- tidyHeatmap:::get_x_y_annotation_columns(test_data, sample, gene, count) + + # Should classify is_special_gene as a row annotation + row_cols <- result %>% filter(orientation == "row") %>% pull(col_name) + column_cols <- result %>% filter(orientation == "column") %>% pull(col_name) + + expect_true("is_special_gene" %in% row_cols) + expect_false("is_special_gene" %in% column_cols) +}) \ No newline at end of file