@@ -113,31 +113,36 @@ RunDE_RNA <- function(sc_obj, metadata_df, output_dir = getwd(), log_flag = FALS
113113 # Run DESeq2 for pseudobulk filtering
114114 Seurat :: DefaultAssay(sc_obj_cell_type_subset ) <- " RNA"
115115 pseudobulk_counts <- create_pseudobulk_counts(sc_obj_cell_type_subset , log_flag )
116- pseudobulk_metadata <- metadata_df
117- pseudobulk_metadata $ aliquots <- rownames(pseudobulk_metadata )
118- pseudobulk_metadata <- pseudobulk_metadata [match(colnames(pseudobulk_counts ), pseudobulk_metadata $ aliquots ),]
119- # Dummy declaration to avoid check() complaining
120- aliquots <- NULL
121- pseudobulk_metadata <- subset(pseudobulk_metadata , select = - c(aliquots ))
122- pseudobulk_analysis <- DESeq2 :: DESeqDataSetFromMatrix(countData = pseudobulk_counts , colData = pseudobulk_metadata , design = stats :: formula(paste(" ~" ,metadata_attribute )))
123- pseudobulk_analysis <- DESeq2 :: DESeq(pseudobulk_analysis )
124- pseudobulk_analysis_results_contrast <- utils :: tail(DESeq2 :: resultsNames(pseudobulk_analysis ), n = 1 )
125- pseudobulk_analysis_results <- DESeq2 :: results(pseudobulk_analysis , name = pseudobulk_analysis_results_contrast )
126- pseudobulk_analysis_results <- pseudobulk_analysis_results [rowSums(is.na(pseudobulk_analysis_results )) == 0 , ] # Remove NAs
127- pseudobulk_analysis_results <- pseudobulk_analysis_results [pseudobulk_analysis_results $ pvalue < 0.05 ,]
128- pseudobulk_analysis_results <- pseudobulk_analysis_results [pseudobulk_analysis_results $ log2FoldChange < - 0.3 | pseudobulk_analysis_results $ log2FoldChange > 0.3 ,]
129- # Filter genes from single cell based on DESeq2 pseudobulk results
130- final_genes <- intersect(rownames(current_de ), rownames(pseudobulk_analysis_results ))
131- # Record information about remaining genes in final_current_de
132- for (current_gene in final_genes ) {
133- current_sc_pval_adj <- current_de [rownames(current_de ) == current_gene ,]$ p_val_adj
134- current_sc_log2FC <- current_de [rownames(current_de ) == current_gene ,]$ avg_log2FC
135- current_pseudo_bulk_pval <- pseudobulk_analysis_results [rownames(pseudobulk_analysis_results ) == current_gene ,]$ pvalue
136- current_pseudo_bulk_log2FC <- pseudobulk_analysis_results [rownames(pseudobulk_analysis_results ) == current_gene ,]$ log2FoldChange
137- current_pseudo_bulk_log2FC <- current_pseudo_bulk_log2FC * - 1 # Pseudobulk test has condition 1 / condition 2 flipped relative to single cell test, so we just multiply by -1 so FC is in consistent direction
138- current_row <- data.frame (current_cell_type , current_gene , current_sc_pval_adj , current_sc_log2FC , current_pseudo_bulk_pval , current_pseudo_bulk_log2FC )
139- names(current_row ) <- c(" Cell_Type" , " Gene_Name" , " sc_pval_adj" , " sc_log2FC" , " pseudo_bulk_pval" , " pseudo_bulk_log2FC" )
140- final_current_de <- rbind(final_current_de , current_row )
116+ if (all(apply(pseudobulk_counts , 1 , function (row ) any(row == 0 )))) {
117+ print_SPEEDI(" Warning: At least one zero found for each gene in pseudobulk counts, so can't run DESeq2." , log_flag )
118+ print_SPEEDI(" Skipping remaining differential expression analysis for metadata attribute" , log_flag )
119+ } else {
120+ pseudobulk_metadata <- metadata_df
121+ pseudobulk_metadata $ aliquots <- rownames(pseudobulk_metadata )
122+ pseudobulk_metadata <- pseudobulk_metadata [match(colnames(pseudobulk_counts ), pseudobulk_metadata $ aliquots ),]
123+ # Dummy declaration to avoid check() complaining
124+ aliquots <- NULL
125+ pseudobulk_metadata <- subset(pseudobulk_metadata , select = - c(aliquots ))
126+ pseudobulk_analysis <- DESeq2 :: DESeqDataSetFromMatrix(countData = pseudobulk_counts , colData = pseudobulk_metadata , design = stats :: formula(paste(" ~" ,metadata_attribute )))
127+ pseudobulk_analysis <- DESeq2 :: DESeq(pseudobulk_analysis )
128+ pseudobulk_analysis_results_contrast <- utils :: tail(DESeq2 :: resultsNames(pseudobulk_analysis ), n = 1 )
129+ pseudobulk_analysis_results <- DESeq2 :: results(pseudobulk_analysis , name = pseudobulk_analysis_results_contrast )
130+ pseudobulk_analysis_results <- pseudobulk_analysis_results [rowSums(is.na(pseudobulk_analysis_results )) == 0 , ] # Remove NAs
131+ pseudobulk_analysis_results <- pseudobulk_analysis_results [pseudobulk_analysis_results $ pvalue < 0.05 ,]
132+ pseudobulk_analysis_results <- pseudobulk_analysis_results [pseudobulk_analysis_results $ log2FoldChange < - 0.3 | pseudobulk_analysis_results $ log2FoldChange > 0.3 ,]
133+ # Filter genes from single cell based on DESeq2 pseudobulk results
134+ final_genes <- intersect(rownames(current_de ), rownames(pseudobulk_analysis_results ))
135+ # Record information about remaining genes in final_current_de
136+ for (current_gene in final_genes ) {
137+ current_sc_pval_adj <- current_de [rownames(current_de ) == current_gene ,]$ p_val_adj
138+ current_sc_log2FC <- current_de [rownames(current_de ) == current_gene ,]$ avg_log2FC
139+ current_pseudo_bulk_pval <- pseudobulk_analysis_results [rownames(pseudobulk_analysis_results ) == current_gene ,]$ pvalue
140+ current_pseudo_bulk_log2FC <- pseudobulk_analysis_results [rownames(pseudobulk_analysis_results ) == current_gene ,]$ log2FoldChange
141+ current_pseudo_bulk_log2FC <- current_pseudo_bulk_log2FC * - 1 # Pseudobulk test has condition 1 / condition 2 flipped relative to single cell test, so we just multiply by -1 so FC is in consistent direction
142+ current_row <- data.frame (current_cell_type , current_gene , current_sc_pval_adj , current_sc_log2FC , current_pseudo_bulk_pval , current_pseudo_bulk_log2FC )
143+ names(current_row ) <- c(" Cell_Type" , " Gene_Name" , " sc_pval_adj" , " sc_log2FC" , " pseudo_bulk_pval" , " pseudo_bulk_log2FC" )
144+ final_current_de <- rbind(final_current_de , current_row )
145+ }
141146 }
142147 }
143148 print_SPEEDI(paste0(" Writing results for metadata attribute " , metadata_attribute , " to file" ), log_flag )
@@ -326,7 +331,9 @@ create_pseudobulk_counts <- function(sc_obj, log_flag = FALSE) {
326331 cells_pseudobulk <- list ()
327332 for (sample_name in unique(sc_obj $ sample )) {
328333 idxMatch <- which(stringr :: str_detect(as.character(sc_obj $ sample ), sample_name ))
329- if (length(idxMatch )> = 1 ) {
334+ # Note - ideally, this should be >= 1, but there's a bug with Seurat V5 where data from objects with 1 cell cannot be
335+ # sampled correctly. Thus, in this edge case, we assume object has 0 cells
336+ if (length(idxMatch )> 1 ) {
330337 samples_subset <- subset(x = sc_obj , subset = sample %in% sample_name )
331338 samples_data <- samples_subset @ assays $ RNA $ counts
332339 samples_data <- rowSums(as.matrix(samples_data ))
0 commit comments