diff --git a/Dasen_RNAseq_report_brachial_padj_nologFCcutoff.Rmd b/Dasen_RNAseq_report_brachial_padj_nologFCcutoff.Rmd new file mode 100644 index 0000000..a9bd22e --- /dev/null +++ b/Dasen_RNAseq_report_brachial_padj_nologFCcutoff.Rmd @@ -0,0 +1,141 @@ +--- +title: "Dasen lab, Pbx-mutant RNAseq, brachial" +author: "Lisa Cohen" +output: html_document +--- + +# Brachial-mutant vs. Control + +Filenames containing raw transcript counts from htseq-count: +```{r,echo=FALSE, message=FALSE, warning=FALSE} +library(DESeq2) +library("genefilter") +library(gplots) +library(RColorBrewer) +library(biomaRt) +library("genefilter") +library("lattice") +source('~/Documents/scripts/plotPCAWithSampleNames.R') +setwd("~/Documents/NYUMC/Dasen/brachial/htseq_counts") +mypath<-"~/Documents/NYUMC/Dasen/brachial/htseq_counts" +filenames<-list.files(path=mypath, pattern= "_counts.txt", full.names=FALSE) +datalist <-lapply(filenames, function(x){read.table(x,header=FALSE, sep="\t")}) +for (i in 1:length(filenames)) +{ + colnames(datalist[[i]])<-c("ID",filenames[[i]]) +} +mergeddata <- Reduce(function(x,y) {merge(x,y, by="ID")}, datalist) +new_data_merge<-mergeddata[-1:-5,] +#write.csv(new_data_merge,file="Dasen_thoracic_count_data_Ensembl.csv") +rown<-new_data_merge$ID +rownames(new_data_merge)<-rown +new_data_merge<-new_data_merge[,-1] +data<-new_data_merge +colnames(data) +col.names<-c("BR-A-Control","BR-A-Mutant","BR-B-Control","BR-B-Mutant","BR-C-Control","BR-C-Mutant") +colnames(data)<-col.names +ExpDesign <- data.frame(row.names=colnames(data), condition = c("Control","Mutant","Control","Mutant","Control","Mutant")) +cds<-DESeqDataSetFromMatrix(countData=data, colData=ExpDesign,design=~condition) +cds$condition <- relevel(cds$condition, "Control") +cds<-DESeq(cds,betaPrior=FALSE) + +# get norm counts +norm_counts<-counts(cds,normalized=TRUE) +norm_counts_data<-as.data.frame(norm_counts) +ensembl_id<-rownames(norm_counts) +norm_counts_data<-cbind(ensembl_id,norm_counts_data) +filtered_norm_counts<-norm_counts_data[!rowSums(norm_counts_data[,2:7]==0)>=1, ] +``` + +The size of the table with all transcripts is: +```{r,echo=FALSE, message=FALSE, warning=FALSE} +# get gene name from Ensembl gene ID +ensembl=useMart("ensembl") +ensembl = useDataset("mmusculus_gene_ensembl",mart=ensembl) +data_table<-filtered_norm_counts + +query<-getBM(attributes=c('ensembl_gene_id','external_gene_name','gene_biotype'), filters = 'ensembl_gene_id', values = ensembl_id, mart=ensembl) +col.names<-c("ensembl_id","external_gene_id","gene_biotype") +colnames(query)<-col.names +merge_biomart_res_counts <- merge(data_table,query,by="ensembl_id") +temp_data_merged_counts<-merge_biomart_res_counts + +## +res<-results(cds,contrast=c("condition","Mutant","Control")) +res_ordered<-res[order(res$padj),] +ensembl_id<-rownames(res_ordered) +res_ordered<-as.data.frame(res_ordered) +res_ordered<-cbind(res_ordered,ensembl_id) +merge_biomart_res_counts <- merge(temp_data_merged_counts,res_ordered,by="ensembl_id") +merge_biomart_res_all<-subset(merge_biomart_res_counts,merge_biomart_res_counts$padj!="NA") +merge_biomart_res_all<-merge_biomart_res_all[order(merge_biomart_res_all$padj),] +dim(merge_biomart_res_all) +write.csv(new_data_merge,file="Dasen_brachial_Mutant_v_Control_CPM_all.csv") +``` + +The size of the table with only significant transcripts, padj<0.05 is: +```{r,echo=FALSE, message=FALSE, warning=FALSE} +res_merged_cutoff<-subset(merge_biomart_res_all,merge_biomart_res_all$padj<0.05) +dim(res_merged_cutoff) +plot(log2(res$baseMean), res$log2FoldChange, col=ifelse(res$padj < 0.05, "red","gray67"),main="(DESeq2) Brachial Mutant vs. Control (padj<0.05)",xlim=c(1,15),pch=20,cex=1,ylim=c(-5,5)) +abline(h=c(-1,1), col="blue") +write.csv(new_data_merge,file="Dasen_brachial_Mutant_v_Control_CPM_padj0.05.csv") +``` + + +# Heatmap + +All genes padj<0.05, regardless of FC (all red points from MA plot above) + +```{r,echo=FALSE, message=FALSE, warning=FALSE} +#up_down_1FC<-subset(res_merged_cutoff,res_merged_cutoff$log2FoldChange>1 | res_merged_cutoff$log2FoldChange< -1) +#d<-as.matrix(up_down_1FC[,c(2:7)]) +#rownames(d) <- up_down_1FC[,8] +dim(res_merged_cutoff) +d<-as.matrix(res_merged_cutoff[,c(2:7)]) +rownames(d)<-res_merged_cutoff[,8] +d<-na.omit(d) +d<-d[,c(1,3,5,2,4,6)] +colnames(d)<-c("BR-A-Control","BR-B-Control","BR-C-Control","BR-A-Mutant","BR-B-Mutant","BR-C-Mutant") +hr <- hclust(as.dist(1-cor(t(d), method="pearson")), method="complete") +mycl <- cutree(hr, h=max(hr$height/1.5)) +clusterCols <- rainbow(length(unique(mycl))) +myClusterSideBar <- clusterCols[mycl] +myheatcol <- greenred(75) +#tiff("Brachial_heatmap.tiff", width = 1000,height = 1000,units="px",res = NA,pointsize=12) +heatmap.2(d, + Rowv=as.dendrogram(hr), + cexRow=0.8,cexCol=0.8,srtCol= 90, + adjCol = c(NA,0),offsetCol=2.5,offsetRow=0.001, + Colv=NA, dendrogram="row", + scale="row", col=myheatcol, + density.info="none", + trace="none") +#dev.off() +### + +``` + +Versions: +```{r,} +sessionInfo() +``` + +### Sequencing and bioinformatics analysis by: + +NYU Langone Medical Center +Bioinformatics Core, Genome Technology Center, OCS +Email: Genomics@nyumc.org +Phone: 646-501-2834 +http://ocs.med.nyu.edu/bioinformatics-core +http://ocs.med.nyu.edu/genome-technology-center + + +# References + +M. I. Love, W. Huber, S. Anders: Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. +Genome Biology 2014, 15:550. http://dx.doi.org/10.1186/s13059-014-0550-8 + +R-Bioconductor: http://www.bioconductor.org/ + +DESeq2: http://www.bioconductor.org/packages/release/bioc/vignettes/DESeq2/inst/doc/DESeq2.pdf \ No newline at end of file diff --git a/Dasen_RNAseq_report_brachial_padj_nologFCcutoff.html b/Dasen_RNAseq_report_brachial_padj_nologFCcutoff.html new file mode 100644 index 0000000..6f15954 --- /dev/null +++ b/Dasen_RNAseq_report_brachial_padj_nologFCcutoff.html @@ -0,0 +1,223 @@ + + + + + + + + + + + + + + +Dasen lab, Pbx-mutant RNAseq, brachial + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+

Brachial-mutant vs. Control

+

Filenames containing raw transcript counts from htseq-count:

+
## [1] "BR-A-Control_counts.txt" "BR-A-Mutant_counts.txt" 
+## [3] "BR-B-Control_counts.txt" "BR-B-Mutant_counts.txt" 
+## [5] "BR-C-Control_counts.txt" "BR-C-Mutant_counts.txt"
+

The size of the table with all transcripts is:

+
## [1] 13918    15
+

The size of the table with only significant transcripts, padj<0.05 is:

+
## [1] 63 15
+

+
+
+

Heatmap

+

All genes padj<0.05, regardless of FC (all red points from MA plot above)

+
## [1] 63 15
+

+

Versions:

+
sessionInfo()
+
## R version 3.3.0 (2016-05-03)
+## Platform: x86_64-apple-darwin13.4.0 (64-bit)
+## Running under: OS X 10.9.5 (Mavericks)
+## 
+## locale:
+## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
+## 
+## attached base packages:
+## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
+## [8] methods   base     
+## 
+## other attached packages:
+##  [1] lattice_0.20-33            biomaRt_2.28.0            
+##  [3] RColorBrewer_1.1-2         gplots_3.0.1              
+##  [5] genefilter_1.54.2          DESeq2_1.12.3             
+##  [7] SummarizedExperiment_1.2.3 Biobase_2.32.0            
+##  [9] GenomicRanges_1.24.2       GenomeInfoDb_1.8.1        
+## [11] IRanges_2.6.1              S4Vectors_0.10.1          
+## [13] BiocGenerics_0.18.0       
+## 
+## loaded via a namespace (and not attached):
+##  [1] gtools_3.5.0         locfit_1.5-9.1       splines_3.3.0       
+##  [4] colorspace_1.2-6     htmltools_0.3.5      yaml_2.1.13         
+##  [7] chron_2.3-47         survival_2.39-5      XML_3.98-1.4        
+## [10] foreign_0.8-66       DBI_0.4-1            BiocParallel_1.6.2  
+## [13] plyr_1.8.4           stringr_1.0.0        zlibbioc_1.18.0     
+## [16] munsell_0.4.3        gtable_0.2.0         caTools_1.17.1      
+## [19] evaluate_0.9         latticeExtra_0.6-28  knitr_1.13          
+## [22] geneplotter_1.50.0   AnnotationDbi_1.34.3 Rcpp_0.12.5         
+## [25] acepack_1.3-3.3      KernSmooth_2.23-15   xtable_1.8-2        
+## [28] scales_0.4.0         formatR_1.4          gdata_2.17.0        
+## [31] Hmisc_3.17-4         annotate_1.50.0      XVector_0.12.0      
+## [34] gridExtra_2.2.1      ggplot2_2.1.0        digest_0.6.9        
+## [37] stringi_1.1.1        grid_3.3.0           tools_3.3.0         
+## [40] bitops_1.0-6         magrittr_1.5         RCurl_1.95-4.8      
+## [43] RSQLite_1.0.0        Formula_1.2-1        cluster_2.0.4       
+## [46] Matrix_1.2-6         data.table_1.9.6     rmarkdown_0.9.6     
+## [49] rpart_4.1-10         nnet_7.3-12
+
+

Sequencing and bioinformatics analysis by:

+

NYU Langone Medical Center
+Bioinformatics Core, Genome Technology Center, OCS
+Email: Genomics@nyumc.org
+Phone: 646-501-2834
+http://ocs.med.nyu.edu/bioinformatics-core
+http://ocs.med.nyu.edu/genome-technology-center

+
+
+
+

References

+

M. I. Love, W. Huber, S. Anders: Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biology 2014, 15:550. http://dx.doi.org/10.1186/s13059-014-0550-8

+

R-Bioconductor: http://www.bioconductor.org/

+

DESeq2: http://www.bioconductor.org/packages/release/bioc/vignettes/DESeq2/inst/doc/DESeq2.pdf

+
+ + + + +
+ + + + + + + + diff --git a/Dasen_RNAseq_report_thoracic_padj.Rmd b/Dasen_RNAseq_report_thoracic_padj.Rmd new file mode 100644 index 0000000..de5ad97 --- /dev/null +++ b/Dasen_RNAseq_report_thoracic_padj.Rmd @@ -0,0 +1,138 @@ +--- +title: "Dasen lab, Pbx-mutant RNAseq, thoracic" +author: "Lisa Cohen" +output: html_document +--- + +# Thoracic-mutant vs. Control + +Filenames containing raw transcript counts from htseq-count are as follows: +```{r,echo=FALSE, message=FALSE, warning=FALSE} +library(DESeq2) +library(gplots) +library(RColorBrewer) +library(biomaRt) +library("genefilter") +library("lattice") +setwd("~/Documents/NYUMC/Dasen/thoracic/htseq_counts") +mypath<-"~/Documents/NYUMC/Dasen/thoracic/htseq_counts" +filenames<-list.files(path=mypath, pattern= "_counts.txt", full.names=FALSE) +datalist <-lapply(filenames, function(x){read.table(x,header=FALSE, sep="\t")}) +for (i in 1:length(filenames)) +{ + colnames(datalist[[i]])<-c("ID",filenames[[i]]) +} +mergeddata <- Reduce(function(x,y) {merge(x,y, by="ID")}, datalist) +new_data_merge<-mergeddata[-1:-5,] +#write.csv(new_data_merge,file="Dasen_thoracic_count_data_Ensembl.csv") +rownames(new_data_merge)<-new_data_merge$ID +new_data_merge<-new_data_merge[,-1] +data<-new_data_merge +colnames(data) +col.names<-c("TH-A-Control","TH-A-Mutant","TH-B-Control","TH-B-Mutant","TH-C-Control","TH-C-Mutant") +colnames(data)<-col.names + +ExpDesign <- data.frame(row.names=colnames(data), condition = c("Control","Mutant","Control","Mutant","Control","Mutant")) +cds<-DESeqDataSetFromMatrix(countData=data, colData=ExpDesign,design=~condition) +cds$condition <- relevel(cds$condition, "Control") +cds<-DESeq(cds, betaPrior=FALSE) +# log2 transformation for PCA plot + +# get norm counts +norm_counts<-counts(cds,normalized=TRUE) +norm_counts_data<-as.data.frame(norm_counts) +ensembl_id<-rownames(norm_counts) +norm_counts_data<-cbind(ensembl_id,norm_counts_data) +filtered_norm_counts<-norm_counts_data[!rowSums(norm_counts_data[,2:7]==0)>=1, ] +``` + +The size of the table with all transcripts is: +```{r,echo=FALSE, message=FALSE, warning=FALSE} +# get gene name from Ensembl gene ID +ensembl=useMart("ensembl") +ensembl = useDataset("mmusculus_gene_ensembl",mart=ensembl) +data_table<-filtered_norm_counts + +query<-getBM(attributes=c('ensembl_gene_id','external_gene_name','gene_biotype'), filters = 'ensembl_gene_id', values = ensembl_id, mart=ensembl) +col.names<-c("ensembl_id","external_gene_id","gene_biotype") +colnames(query)<-col.names +merge_biomart_res_counts <- merge(data_table,query,by="ensembl_id") +temp_data_merged_counts<-merge_biomart_res_counts + +## +res<-results(cds,contrast=c("condition","Mutant","Control")) +res_ordered<-res[order(res$padj),] +ensembl_id<-rownames(res_ordered) +res_ordered<-as.data.frame(res_ordered) +res_ordered<-cbind(res_ordered,ensembl_id) +merge_biomart_res_counts <- merge(temp_data_merged_counts,res_ordered,by="ensembl_id") +merge_biomart_res_all<-subset(merge_biomart_res_counts,merge_biomart_res_counts$padj!="NA") +merge_biomart_res_all<-merge_biomart_res_all[order(merge_biomart_res_all$padj),] +dim(merge_biomart_res_all) +write.csv(merge_biomart_res_all,"Dasen_thoracic_Mutant_Control_CPM_all.csv") +``` + +The size of the table with only significant transcripts, padj<0.05 is: +```{r,echo=FALSE, message=FALSE, warning=FALSE} +#res_merged_cutoff<-subset(merge_biomart_res_all,merge_biomart_res_all$padj<0.1) +res_merged_cutoff<-subset(merge_biomart_res_all,merge_biomart_res_all$padj<0.05) +dim(res_merged_cutoff) +write.csv(res_merged_cutoff,"Dasen_thoracic_Mutant_Control_padj0.05.csv") +plot(log2(res$baseMean), res$log2FoldChange, col=ifelse(res$padj < 0.05, "red","gray67"),main="(DESeq2) Thoracic Mutant vs. Control (padj<0.05)",xlim=c(1,15),pch=20,cex=1) +abline(h=c(-1,1), col="blue") +``` + + +# Heatmap + +All genes padj<0.05, Log2FC>1, Log2FC<-0.5 + +```{r,echo=FALSE, message=FALSE, warning=FALSE} +up_down_FC<-subset(res_merged_cutoff,res_merged_cutoff$log2FoldChange>1 | res_merged_cutoff$log2FoldChange< -0.5) +#d<-up_down_1FC +d<-as.matrix(res_merged_cutoff[,c(2:7)]) +rownames(d)<-res_merged_cutoff[,8] +d<-na.omit(d) +d<-d[,c(1,3,5,2,4,6)] +colnames(d)<-c("TH-A-Control","TH-B-Control","TH-C-Control","TH-A-Mutant","TH-B-Mutant","TH-C-Mutant") +hr <- hclust(as.dist(1-cor(t(d), method="pearson")), method="complete") +mycl <- cutree(hr, h=max(hr$height/1.5)) +clusterCols <- rainbow(length(unique(mycl))) +myClusterSideBar <- clusterCols[mycl] +myheatcol <- greenred(75) +#tiff("Thoracic_heatmap.tiff", width = 1000,height = 1000,units="px",res = NA,pointsize=12) +heatmap.2(d, + Rowv=as.dendrogram(hr), + cexRow=0.8,cexCol=0.8,srtCol= 90, + adjCol = c(NA,0),offsetCol=2.5, offsetRow=0.01, + Colv=NA, dendrogram="row", + scale="row", col=myheatcol, + density.info="none", + trace="none") +#dev.off() +### + +``` + +Versions: +```{r,} +sessionInfo() +``` + +### Sequencing and bioinformatics analysis by: + +NYU Langone Medical Center +Bioinformatics Core, Genome Technology Center, OCS +Email: Genomics@nyumc.org +Phone: 646-501-2834 +http://ocs.med.nyu.edu/bioinformatics-core +http://ocs.med.nyu.edu/genome-technology-center + +# References + +M. I. Love, W. Huber, S. Anders: Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. +Genome Biology 2014, 15:550. http://dx.doi.org/10.1186/s13059-014-0550-8 + +R-Bioconductor: http://www.bioconductor.org/ + +DESeq2: http://www.bioconductor.org/packages/release/bioc/vignettes/DESeq2/inst/doc/DESeq2.pdf \ No newline at end of file diff --git a/Dasen_RNAseq_report_thoracic_padj.html b/Dasen_RNAseq_report_thoracic_padj.html new file mode 100644 index 0000000..986a3e8 --- /dev/null +++ b/Dasen_RNAseq_report_thoracic_padj.html @@ -0,0 +1,222 @@ + + + + + + + + + + + + + + +Dasen lab, Pbx-mutant RNAseq, thoracic + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+

Thoracic-mutant vs. Control

+

Filenames containing raw transcript counts from htseq-count are as follows:

+
## [1] "TH-A-Control_counts.txt" "TH-A-Mutant_counts.txt" 
+## [3] "TH-B-Control_counts.txt" "TH-B-Mutant_counts.txt" 
+## [5] "TH-C-Control_counts.txt" "TH-C-Mutant_counts.txt"
+

The size of the table with all transcripts is:

+
## [1] 15791    15
+

The size of the table with only significant transcripts, padj<0.05 is:

+
## [1] 124  15
+

+
+
+

Heatmap

+

All genes padj<0.05, Log2FC>1, Log2FC<-0.5

+

+

Versions:

+
sessionInfo()
+
## R version 3.3.0 (2016-05-03)
+## Platform: x86_64-apple-darwin13.4.0 (64-bit)
+## Running under: OS X 10.9.5 (Mavericks)
+## 
+## locale:
+## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
+## 
+## attached base packages:
+## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
+## [8] methods   base     
+## 
+## other attached packages:
+##  [1] lattice_0.20-33            genefilter_1.54.2         
+##  [3] biomaRt_2.28.0             RColorBrewer_1.1-2        
+##  [5] gplots_3.0.1               DESeq2_1.12.3             
+##  [7] SummarizedExperiment_1.2.3 Biobase_2.32.0            
+##  [9] GenomicRanges_1.24.2       GenomeInfoDb_1.8.1        
+## [11] IRanges_2.6.1              S4Vectors_0.10.1          
+## [13] BiocGenerics_0.18.0       
+## 
+## loaded via a namespace (and not attached):
+##  [1] gtools_3.5.0         locfit_1.5-9.1       splines_3.3.0       
+##  [4] colorspace_1.2-6     htmltools_0.3.5      yaml_2.1.13         
+##  [7] chron_2.3-47         survival_2.39-5      XML_3.98-1.4        
+## [10] foreign_0.8-66       DBI_0.4-1            BiocParallel_1.6.2  
+## [13] plyr_1.8.4           stringr_1.0.0        zlibbioc_1.18.0     
+## [16] munsell_0.4.3        gtable_0.2.0         caTools_1.17.1      
+## [19] evaluate_0.9         latticeExtra_0.6-28  knitr_1.13          
+## [22] geneplotter_1.50.0   AnnotationDbi_1.34.3 Rcpp_0.12.5         
+## [25] acepack_1.3-3.3      KernSmooth_2.23-15   xtable_1.8-2        
+## [28] scales_0.4.0         formatR_1.4          gdata_2.17.0        
+## [31] Hmisc_3.17-4         annotate_1.50.0      XVector_0.12.0      
+## [34] gridExtra_2.2.1      ggplot2_2.1.0        digest_0.6.9        
+## [37] stringi_1.1.1        grid_3.3.0           tools_3.3.0         
+## [40] bitops_1.0-6         magrittr_1.5         RCurl_1.95-4.8      
+## [43] RSQLite_1.0.0        Formula_1.2-1        cluster_2.0.4       
+## [46] Matrix_1.2-6         data.table_1.9.6     rmarkdown_0.9.6     
+## [49] rpart_4.1-10         nnet_7.3-12
+
+

Sequencing and bioinformatics analysis by:

+

NYU Langone Medical Center
+Bioinformatics Core, Genome Technology Center, OCS
+Email: Genomics@nyumc.org
+Phone: 646-501-2834
+http://ocs.med.nyu.edu/bioinformatics-core
+http://ocs.med.nyu.edu/genome-technology-center

+
+
+
+

References

+

M. I. Love, W. Huber, S. Anders: Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biology 2014, 15:550. http://dx.doi.org/10.1186/s13059-014-0550-8

+

R-Bioconductor: http://www.bioconductor.org/

+

DESeq2: http://www.bioconductor.org/packages/release/bioc/vignettes/DESeq2/inst/doc/DESeq2.pdf

+
+ + + + +
+ + + + + + + + diff --git a/README.md b/README.md index bc4c633..420bd96 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,11 @@ # Pbx-Neuron-Paper RNA-seq Analysis + + +* [Brachial](http://htmlpreview.github.io/?https://github.com/ljcohen/Pbx-Neuron-Paper/blob/master/Dasen_RNAseq_report_brachial_padj_nologFCcutoff.html) + +* [Thoracic](http://htmlpreview.github.io/?https://github.com/ljcohen/Pbx-Neuron-Paper/blob/master/Dasen_RNAseq_report_thoracic_padj.html) + +* [Brachial/Thoracic overlap](http://htmlpreview.github.io/?https://github.com/ljcohen/Pbx-Neuron-Paper/blob/master/Dasen_RNAseq_report_brachial_thoracic_overlap.html) +* [Controls](http://htmlpreview.github.io/?https://github.com/ljcohen/Pbx-Neuron-Paper/blob/master/Dasen_RNAseq_report_controls.html) +