@@ -34,7 +34,7 @@ filenames.list[["SOG_16s"]] <- setNames(object = c("NGS4-16Schord_S1_L001_ali_as
3434 , nm = c(" count" , " annot" ))
3535
3636
37- # ### 1. Import input data and merge #####
37+ # ### 1.0 Import input data and merge #####
3838paste(" You are analyzing " , datatype , sep = " " )
3939counts <- read.delim2(paste(" 04_samples/" , filenames.list [[datatype ]][1 ], sep = " " ))
4040annot <- read.delim2(paste(" 05_annotated/" , filenames.list [[datatype ]][2 ], sep = " " ), header = F
@@ -55,7 +55,7 @@ names(data)
5555# Count up the reads coming out of MEGAN
5656sum(data $ count )
5757
58- # #### Limit to amplicon size #####
58+ # #### 1.1 Limit to amplicon size #####
5959# Investigate which species that you will remove with a particular amplicon size filter
6060losing.species <- sort(unique(data [which(data $ seq_length > 250 ), " taxon" ]))
6161losing.species
@@ -85,15 +85,15 @@ sum(data$count)
8585data <- data2
8686str(data )
8787
88- # ##### Reduce columns within data ####
88+ # ##### 1.2 Reduce columns within data ####
8989data.df <- as.data.frame(data [, grepl( " sample\\ .|taxon" , names( data ))]) # keeps 'sample.' or 'taxon'
9090head(data.df )
9191
9292# View species that are present in dataset
9393unique(data.df $ taxon )
9494
9595
96- # ### 1.1 Set location information ####
96+ # ### 1.3 Set location information ####
9797locations <- list ()
9898locations [[" locations.C3" ]] <- c(" IleQuarry" , " Charlott" , " LouisbNS" , " TerraNova" ," RigolNL" ," RamahNL"
9999 , " PondInlet" , " ErebusNu" , " StRochNu" , " BathhurNu" , " PearceNT" , " NomeAK"
@@ -106,7 +106,7 @@ sample.locations <- locations[[location.type]]
106106sample.locations
107107
108108
109- # #### Explore unassigned or unannotated data #####
109+ # #### 1.4 Explore unassigned or unannotated data #####
110110head(data.df )
111111table.filename <- paste(" 05_annotated/" , datatype , " _unassigned_unknown_counts.csv" , sep = " " )
112112no.hits <- colSums(data.df [data.df $ taxon == " No hits" , 2 : length(colnames(data.df ))])
@@ -121,7 +121,7 @@ unannot.df <- round(x = unannot.df, digits = 2)
121121
122122colnames(unannot.df ) <- sample.locations
123123
124- write.csv(x = unannot.df , file = table.filename )
124+ # write.csv(x = unannot.df, file = table.filename)
125125
126126
127127# Set species to remove (e.g. humans)
@@ -226,7 +226,7 @@ head(counts.filtered.df)
226226counts.filtered.filename <- paste(" 05_annotated/" , datatype , " _count_by_taxa_filt_at_" , min.count , " .csv" , sep = " " )
227227# write.csv(x = counts.filtered.df, file = counts.filtered.filename)
228228
229- # #### 4. Prepare plotting ####
229+ # #### 4.0 Prepare plotting (colors) ####
230230# Prepare palette
231231# display.brewer.all() # see color options
232232cols <- brewer.pal(n = 9 , name = " Set1" )
@@ -265,9 +265,19 @@ if(length(index) > length(palette)){
265265}
266266
267267
268- # #### Create Legend ####
268+ # ### 4.1 Drop Mock Column ####
269+ # This will remove the mock column for purposes of plotting as it overwhelms all of the data (due to too much sequencing for this)
270+ if (" sample.Mock" %in% colnames(counts.df ) == T ){
271+ counts.df <- counts.df [,- (which(colnames(counts.df )== " sample.Mock" ))]
272+ prop.df <- prop.df [,- (which(colnames(prop.df )== " sample.Mock" ))]
273+ site.names <- site.names [- (which(site.names == " sample.Mock" ))]
274+ sample.reads <- sample.reads [- (which(names(sample.reads )== " sample.Mock" ))]
275+ }
276+
277+
278+ # #### 4.2 Create Legend ####
269279# Prepare legend size
270- legend.cex <- c(1 , 0.7 , 1 , 0.8 , 0.8 ) ; names(legend.cex ) <- c(" C3_16s" ," C3_COI" , " SOG_16s" , " C3_val" , " SOG_val" )
280+ legend.cex <- c(1 , 1 , 1 , 1 , 1 ) ; names(legend.cex ) <- c(" C3_16s" ," C3_COI" , " SOG_16s" , " C3_val" , " SOG_val" )
271281
272282# Create dataframe with the taxon and the color
273283color.index <- cbind(rownames(prop.df ), this.palette )
@@ -293,7 +303,6 @@ high.presence.taxa
293303legend.info <- color.index.df [color.index.df $ taxon %in% high.presence.taxa , ]
294304
295305
296-
297306# ### 5. Plot ####
298307filename <- paste(" 06_output_figures/" , datatype , " _read_count_and_prop_by_loc.pdf" , sep = " " )
299308
@@ -308,42 +317,30 @@ position.info <- barplot(as.matrix(counts.df)
308317 , ylab = " Reads" )
309318# axis(side = 1, at = position.info, labels = sample.locations, las = 3, cex.axis = 0.9)
310319
311- # unique to SOG data, graph it without the mock sample...
312- # pdf(file = "06_output_figures/C3_val_counts_by_loc_no_mock.pdf", width = 10, height = 8)
313- # position.info <- barplot(as.matrix(counts.df[,-(which(colnames(counts.df)=="sample.Mock"))]), col = this.palette, las = 2, xaxt = "n")
314- # axis(side = 1, at = position.info, labels = sample.locations[-(which(sample.locations=="sample.Mock"))], las = 3, cex.axis = 0.9)
315-
316- # legend("topright", legend = legend.info$taxon, fill = as.character(legend.info$color), cex = 0.8)
317-
318-
319320# Plot proportion data
320321position.info <- barplot(as.matrix(prop.df ), col = this.palette
321322 , xlim = c(0 , ncol(prop.df )+ 4 )
322323 , las = 1
323- # , cex.names = 0.9
324- # , cex.axis = 0.9
325324 , ylab = " Proportion (%)"
326325 , xaxt = " n" )
327326
328327axis(side = 1 , at = position.info ,
329328 labels = site.names , las = 3
330- # , cex.axis = 0.9
331329 )
332330
333331# Add information about read counts per sample
334332mtext(x = position.info , text = sample.reads
335333 , side = 3 , at = position.info , cex = 0.7 )
336334
337335
338- # blank second plot
336+ # Plot Legend, first make blank second plot
339337plot(1 , type = " n" , axes = F , xlab = " " , ylab = " " )
340338
341339# fix legend info to character text
342340legend(x = " center" , y = " center" , legend = legend.info $ taxon
343341 , fill = as.character(legend.info $ color ), cex = legend.cex [datatype ]
344342 , ncol = 4 )
345343
346-
347344dev.off()
348345#
349346# Save out as 10 x 8 in portrait
0 commit comments