11# ' @title Transform a Seurat objects / FindAllMarkers result into a ClusterSet.
22# ' @description Transform a Seurat objects into a ClusterSet.
33# ' @param object A Seurat object.
4- # ' @param markers A Seurat::FindAllMarkers() result or a named vector (clusters with gene_names as named).
4+ # ' @param markers A Seurat::FindAllMarkers() result or a named vector (clusters with gene_names as named).
5+ # ' Named vectors can contains several position with the same gene name (overlapping clusters).
56# ' @param layer One of 'data', 'counts' or 'scale.data'. The slot to extract from the seurat object to perform clustering analysis.
67# ' SCT is the recommended method from Seurat package when working with spatial transcriptomics data.
78# ' @param assay The type of assay (e.g. "RNA", "Spatial", "Sketch", "SCT"...).
9+ # ' @param p_val_adj If markers is the output from Seurat::FindAllMarkers(), the adjusted p-value threshold.
810# ' @importFrom SeuratObject LayerData
911# ' @importFrom Matrix Matrix
12+ # ' @importFrom Seurat Assays
1013# ' @examples
1114# ' ## From a scRNA-seq/Seurat object
1215# ' library(SeuratObject)
1316# ' library(Seurat)
1417# ' data("pbmc_small", package="SeuratObject")
15- # ' cs <- cluster_set_from_seurat(pbmc_small, Seurat::FindAllMarkers(pbmc_small))
18+ # ' markers <- Seurat::FindAllMarkers(pbmc_small, only.pos = TRUE)
19+ # ' cs <- cluster_set_from_seurat(pbmc_small, markers)
20+ # ' cs <- top_genes(cs)
1621# ' plot_heatmap(cs)
1722# ' plot_heatmap(cs)
18- # ' plot_heatmap(cs[1, ])
23+ # ' plot_heatmap(cs[2, ])
1924# ' plot_heatmap(cs, cell_clusters = Seurat::Idents(pbmc_small))
2025# ' plot_heatmap(cs[1,Idents(pbmc_small) == "0"],
21- # ' cell_clusters = Seurat::Idents(pbmc_small), label_size = 6)
26+ # ' cell_clusters = Seurat::Idents(pbmc_small), label_size = 6)
2227# ' plot_profiles(cs, ident = Seurat::Idents(pbmc_small))
2328# ' @export
2429cluster_set_from_seurat <- function (object = NULL ,
2530 markers = NULL ,
2631 layer = c(' data' , ' counts' , ' scale.data' ),
27- assay = ' RNA' ){
32+ assay = ' RNA' ,
33+ p_val_adj = 0.001 ){
2834
35+ print_msg(" Converting a Seurat object from cluster_set..." , msg_type = " DEBUG" )
36+
37+ if (is.null(object ) | ! inherits(object , " Seurat" ))
38+ print_msg(" The 'object' argument should be a Seurat object." , msg_type = " STOP" )
39+
40+ if (is.null(markers ))
41+ print_msg(" Please provide a set of markers/clusters..." , msg_type = " STOP" )
42+
43+ if (is.factor(markers ))
44+ markers <- as.character(markers )
45+
46+ print_msg(paste0(" This seurat object contains the following assay : " ,
47+ Seurat :: Assays(object ), " ." ) , msg_type = " INFO" )
48+
49+ if (! assay %in% Seurat :: Assays(object )){
50+ print_msg(" The selected assay is not available. See 'assay' argument..." , msg_type = " STOP" )
51+ }
52+
2953 layer <- match.arg(layer )
3054
3155 object <- SeuratObject :: LayerData(object , assay = assay , layer = layer )
3256
3357 if (inherits(markers , " data.frame" )){
3458
35- gn <- markers $ gene
36- clusters <- markers $ cluster
59+ print_msg(" Selecting markers based on p_val_adj..." , msg_type = " DEBUG" )
60+ markers <- markers [markers $ p_val_adj < = p_val_adj , , drop = FALSE ]
61+ object <- object [markers $ gene , , drop = FALSE ]
62+ print_msg(" Disambiguating gene duplicates using '~' separator" , msg_type = " DEBUG" )
63+ gn <- make.unique(markers $ gene , sep = " ~" )
64+
65+ clusters <- as.character(markers $ cluster )
3766 names(clusters ) <- gn
38- object <- object [markers $ gene , ]
39-
67+
4068 }else if (is.vector(markers )){
4169 if (is.null(names(markers )))
4270 print_msg(" The 'markers' argument should be a named vector." ,
4371 msg_type = " STOP" )
72+ object <- object [names(markers ), , drop = FALSE ]
4473 clusters <- markers
74+ names(clusters ) <- make.unique(names(clusters ), sep = " ~" )
75+ gn <- names(clusters )
76+
4577 }else {
4678 print_msg(" The 'markers' argument should be a data.frame or named vector." ,
4779 msg_type = " STOP" )
4880 }
81+
82+ if (length(grep(" ~" , gn ))){
83+ print_msg(" There are duplicated gene names, handling them..." , msg_type = " DEBUG" )
84+ gn_non_dup <- gn [- grep(" ~[0-9]+$" , gn )]
85+
86+ tmp_mat <- object [gn_non_dup , ]
87+
88+ gn_dup <- gn [grep(" ~[0-9]+$" , gn )]
89+ max_dup <- max(as.numeric(sub(" .*~([0-9]+)$" , " \\ 1" , gn_dup )))
90+
91+ print_msg(paste0(" The maximum number of duplicates for one gene is : " , max_dup , " ." ), msg_type = " DEBUG" )
92+
93+ for (i in 1 : max_dup ){
94+ print_msg(paste0(" Looping though duplicate : " , i , " ." ), msg_type = " DEBUG" )
95+ gn_dup_i <- gn_dup [grep(paste0(" ~" , i , " $" ), gn_dup )]
96+
97+ if (length(gn_dup_i ) > 0 ){
98+ print_msg(paste0(" Adding " , length(gn_dup_i ), " genes to the matrix." ), msg_type = " DEBUG" )
99+ to_select <- gsub(gn_dup_i , pattern = " ~[0-9]+$" , replacement = " " )
100+ print_msg(" Subsetting..." , msg_type = " DEBUG" )
101+ sub_mat <- object [to_select , , drop = FALSE ]
102+ rownames(sub_mat ) <- gn_dup_i
103+ print_msg(" Binding..." , msg_type = " DEBUG" )
104+ tmp_mat <- rbind(tmp_mat , sub_mat )
105+ }
106+
107+ }
108+
109+ object <- tmp_mat
110+ }
49111
50112 gn <- split(names(clusters ), clusters )
51113
52114 obj_out <- new(Class = " ClusterSet" )
53115 obj_out @ gene_clusters <- gn
54- obj_out @ data <- Matrix :: Matrix(object , sparse = TRUE )
116+
117+ obj_out @ data <- object
55118 obj_out @ gene_clusters_metadata <- list (" cluster_id" = setNames(as.character(unique(clusters )),
56119 as.character(unique(clusters ))),
57120 " number" = length(table(clusters )),
@@ -72,6 +135,8 @@ cluster_set_from_seurat <- function(object=NULL,
72135 " all_gene_expression_matrix" = vector(),
73136 " all_neighbor_distances" = vector())
74137
138+ obj_out <- compute_centers(obj_out )
139+
75140 return (obj_out )
76141}
77142
@@ -100,8 +165,7 @@ cluster_set_from_matrix <- function(object=NULL,
100165 print_msg(" The 'object' argument should be a data.frame or matrix." ,
101166 msg_type = " STOP" )
102167 }
103-
104-
168+
105169 if (! inherits(markers , " list" )){
106170 print_msg(" The 'marker' argument should be a list." ,
107171 msg_type = " STOP" )
@@ -114,7 +178,7 @@ cluster_set_from_matrix <- function(object=NULL,
114178 print_msg(" No marker were found in the matrix." )
115179 return (new(Class = " ClusterSet" ))
116180 }
117-
181+
118182 print_msg(" Subsetting object." , msg_type = " INFO" )
119183 object <- object [marker_found , ]
120184
0 commit comments