Skip to content

Commit 425238a

Browse files
WT: Bug fixes and updates for Seurat V5
1 parent bcbe2a2 commit 425238a

1 file changed

Lines changed: 33 additions & 26 deletions

File tree

R/downstream_analysis.R

Lines changed: 33 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -113,31 +113,36 @@ RunDE_RNA <- function(sc_obj, metadata_df, output_dir = getwd(), log_flag = FALS
113113
# Run DESeq2 for pseudobulk filtering
114114
Seurat::DefaultAssay(sc_obj_cell_type_subset) <- "RNA"
115115
pseudobulk_counts <- create_pseudobulk_counts(sc_obj_cell_type_subset, log_flag)
116-
pseudobulk_metadata <- metadata_df
117-
pseudobulk_metadata$aliquots <- rownames(pseudobulk_metadata)
118-
pseudobulk_metadata <- pseudobulk_metadata[match(colnames(pseudobulk_counts), pseudobulk_metadata$aliquots),]
119-
# Dummy declaration to avoid check() complaining
120-
aliquots <- NULL
121-
pseudobulk_metadata <- subset(pseudobulk_metadata, select = -c(aliquots))
122-
pseudobulk_analysis <- DESeq2::DESeqDataSetFromMatrix(countData = pseudobulk_counts, colData = pseudobulk_metadata, design = stats::formula(paste("~",metadata_attribute)))
123-
pseudobulk_analysis <- DESeq2::DESeq(pseudobulk_analysis)
124-
pseudobulk_analysis_results_contrast <- utils::tail(DESeq2::resultsNames(pseudobulk_analysis), n=1)
125-
pseudobulk_analysis_results <- DESeq2::results(pseudobulk_analysis, name=pseudobulk_analysis_results_contrast)
126-
pseudobulk_analysis_results <- pseudobulk_analysis_results[rowSums(is.na(pseudobulk_analysis_results)) == 0, ] # Remove NAs
127-
pseudobulk_analysis_results <- pseudobulk_analysis_results[pseudobulk_analysis_results$pvalue < 0.05,]
128-
pseudobulk_analysis_results <- pseudobulk_analysis_results[pseudobulk_analysis_results$log2FoldChange < -0.3 | pseudobulk_analysis_results$log2FoldChange > 0.3,]
129-
# Filter genes from single cell based on DESeq2 pseudobulk results
130-
final_genes <- intersect(rownames(current_de), rownames(pseudobulk_analysis_results))
131-
# Record information about remaining genes in final_current_de
132-
for(current_gene in final_genes) {
133-
current_sc_pval_adj <- current_de[rownames(current_de) == current_gene,]$p_val_adj
134-
current_sc_log2FC <- current_de[rownames(current_de) == current_gene,]$avg_log2FC
135-
current_pseudo_bulk_pval <- pseudobulk_analysis_results[rownames(pseudobulk_analysis_results) == current_gene,]$pvalue
136-
current_pseudo_bulk_log2FC <- pseudobulk_analysis_results[rownames(pseudobulk_analysis_results) == current_gene,]$log2FoldChange
137-
current_pseudo_bulk_log2FC <- current_pseudo_bulk_log2FC * -1 # Pseudobulk test has condition 1 / condition 2 flipped relative to single cell test, so we just multiply by -1 so FC is in consistent direction
138-
current_row <- data.frame(current_cell_type, current_gene, current_sc_pval_adj, current_sc_log2FC, current_pseudo_bulk_pval, current_pseudo_bulk_log2FC)
139-
names(current_row) <- c("Cell_Type", "Gene_Name", "sc_pval_adj", "sc_log2FC", "pseudo_bulk_pval", "pseudo_bulk_log2FC")
140-
final_current_de <- rbind(final_current_de, current_row)
116+
if(all(apply(pseudobulk_counts, 1, function(row) any(row == 0)))) {
117+
print_SPEEDI("Warning: At least one zero found for each gene in pseudobulk counts, so can't run DESeq2.", log_flag)
118+
print_SPEEDI("Skipping remaining differential expression analysis for metadata attribute", log_flag)
119+
} else {
120+
pseudobulk_metadata <- metadata_df
121+
pseudobulk_metadata$aliquots <- rownames(pseudobulk_metadata)
122+
pseudobulk_metadata <- pseudobulk_metadata[match(colnames(pseudobulk_counts), pseudobulk_metadata$aliquots),]
123+
# Dummy declaration to avoid check() complaining
124+
aliquots <- NULL
125+
pseudobulk_metadata <- subset(pseudobulk_metadata, select = -c(aliquots))
126+
pseudobulk_analysis <- DESeq2::DESeqDataSetFromMatrix(countData = pseudobulk_counts, colData = pseudobulk_metadata, design = stats::formula(paste("~",metadata_attribute)))
127+
pseudobulk_analysis <- DESeq2::DESeq(pseudobulk_analysis)
128+
pseudobulk_analysis_results_contrast <- utils::tail(DESeq2::resultsNames(pseudobulk_analysis), n=1)
129+
pseudobulk_analysis_results <- DESeq2::results(pseudobulk_analysis, name=pseudobulk_analysis_results_contrast)
130+
pseudobulk_analysis_results <- pseudobulk_analysis_results[rowSums(is.na(pseudobulk_analysis_results)) == 0, ] # Remove NAs
131+
pseudobulk_analysis_results <- pseudobulk_analysis_results[pseudobulk_analysis_results$pvalue < 0.05,]
132+
pseudobulk_analysis_results <- pseudobulk_analysis_results[pseudobulk_analysis_results$log2FoldChange < -0.3 | pseudobulk_analysis_results$log2FoldChange > 0.3,]
133+
# Filter genes from single cell based on DESeq2 pseudobulk results
134+
final_genes <- intersect(rownames(current_de), rownames(pseudobulk_analysis_results))
135+
# Record information about remaining genes in final_current_de
136+
for(current_gene in final_genes) {
137+
current_sc_pval_adj <- current_de[rownames(current_de) == current_gene,]$p_val_adj
138+
current_sc_log2FC <- current_de[rownames(current_de) == current_gene,]$avg_log2FC
139+
current_pseudo_bulk_pval <- pseudobulk_analysis_results[rownames(pseudobulk_analysis_results) == current_gene,]$pvalue
140+
current_pseudo_bulk_log2FC <- pseudobulk_analysis_results[rownames(pseudobulk_analysis_results) == current_gene,]$log2FoldChange
141+
current_pseudo_bulk_log2FC <- current_pseudo_bulk_log2FC * -1 # Pseudobulk test has condition 1 / condition 2 flipped relative to single cell test, so we just multiply by -1 so FC is in consistent direction
142+
current_row <- data.frame(current_cell_type, current_gene, current_sc_pval_adj, current_sc_log2FC, current_pseudo_bulk_pval, current_pseudo_bulk_log2FC)
143+
names(current_row) <- c("Cell_Type", "Gene_Name", "sc_pval_adj", "sc_log2FC", "pseudo_bulk_pval", "pseudo_bulk_log2FC")
144+
final_current_de <- rbind(final_current_de, current_row)
145+
}
141146
}
142147
}
143148
print_SPEEDI(paste0("Writing results for metadata attribute ", metadata_attribute, "to file"), log_flag)
@@ -326,7 +331,9 @@ create_pseudobulk_counts <- function(sc_obj, log_flag = FALSE) {
326331
cells_pseudobulk <- list()
327332
for (sample_name in unique(sc_obj$sample)) {
328333
idxMatch <- which(stringr::str_detect(as.character(sc_obj$sample), sample_name))
329-
if(length(idxMatch)>=1) {
334+
# Note - ideally, this should be >= 1, but there's a bug with Seurat V5 where data from objects with 1 cell cannot be
335+
# sampled correctly. Thus, in this edge case, we assume object has 0 cells
336+
if(length(idxMatch)>1) {
330337
samples_subset <- subset(x = sc_obj, subset = sample %in% sample_name)
331338
samples_data <- samples_subset@assays$RNA$counts
332339
samples_data <- rowSums(as.matrix(samples_data))

0 commit comments

Comments
 (0)