Skip to content

Commit e8dcc93

Browse files
Merge pull request #4 from HudsonAlpha/revert-3-spyder_error_zero_reads
Revert "Spyder error zero reads"
2 parents dbbb94f + acb46f4 commit e8dcc93

10 files changed

Lines changed: 53 additions & 810 deletions

R/stats_algs.R

Lines changed: 8 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4,35 +4,22 @@
44
# INPUT DATA
55
args <- commandArgs(trailingOnly = TRUE)
66
path <- args[1] # path to the project outputs/ folder
7-
alg <- args[2] # algorithm: star or htseq-gene or htseq-exon or kallisto
7+
alg <- args[2] # algorithm: star or htseq-gene or htseq-exon
88
# READS RPKM AND COUNT MATRICES
9-
if (alg != 'kallisto'){
10-
rpkms <- read.table(paste(path, alg, "_rpkms.txt", sep=""), sep = "\t", header = T, stringsAsFactors = F)
11-
snames1 <- as.character(read.table(paste(path, alg, "_rpkms.txt", sep=""), sep = "\t", header = F, stringsAsFactors = F, nrows = 1))
12-
counts <- read.table(paste(path, alg, "_counts.txt", sep=""), sep = "\t", header = T, stringsAsFactors = F)
13-
snames2 <- as.character(read.table(paste(path, alg, "_counts.txt", sep=""), sep = "\t", header = F, stringsAsFactors = F, nrows = 1))
14-
labs <- c("RPKM","COUNTS")
15-
} else{
16-
rpkms <- read.table(paste(path, alg, "_tpm.txt", sep=""), sep = "\t", header = T, stringsAsFactors = F, na.strings = '-nan')
17-
rpkms <- rpkms[,2:ncol(rpkms)]
18-
snames1 <- as.character(read.table(paste(path, alg, "_tpm.txt", sep=""), sep = "\t", header = F, stringsAsFactors = F, nrows = 1))
19-
snames1 <- snames1[2:length(snames1)]
20-
counts <- read.table(paste(path, alg, "_est_counts.txt", sep=""), sep = "\t", header = T, stringsAsFactors = F)
21-
counts <- counts[,2:ncol(counts)]
22-
snames2 <- as.character(read.table(paste(path, alg, "_est_counts.txt", sep=""), sep = "\t", header = F, stringsAsFactors = F, nrows = 1))
23-
snames2 <- snames2[2:length(snames2)]
24-
labs <- c("TPM","EST_COUNTS")
25-
}
9+
rpkms <- read.table(paste(path, alg, "_rpkms.txt", sep=""), sep = "\t", header = T, stringsAsFactors = F)
10+
snames1 <- as.character(read.table(paste(path, alg, "_rpkms.txt", sep=""), sep = "\t", header = F, stringsAsFactors = F, nrows = 1))
11+
counts <- read.table(paste(path, alg, "_counts.txt", sep=""), sep = "\t", header = T, stringsAsFactors = F)
12+
snames2 <- as.character(read.table(paste(path, alg, "_counts.txt", sep=""), sep = "\t", header = F, stringsAsFactors = F, nrows = 1))
2613
# ONLY USES FEATURES WITH AT LEAST TWO RAW COUNTS IN ONE LIBRARY
2714
inc <- which(rowSums(counts[2:ncol(counts)], na.rm = T) > 1)
2815
NT <- c()
2916
for (i in 1:2){
3017
N <- matrix(NA,nrow=0,ncol=0)
31-
if (i == 1) {G <- rpkms[inc,];lab <- labs[1]; snames <- snames1[2:length(snames1)]}
32-
if (i == 2) {G <- counts[inc,];lab <- labs[2]; snames <- snames2[2:length(snames2)]}
18+
if (i == 1) {G <- rpkms[inc,];lab <- "RPKM"; snames <- snames1[2:length(snames1)]}
19+
if (i == 2) {G <- counts[inc,];lab <- "COUNTS"; snames <- snames2[2:length(snames2)]}
3320
if (ncol(G)>2){ # at least two samples available
3421
lcounts <- log2(G[, 2:ncol(G)]+1)
35-
s_ok <- (which((colSums(G[, 2:ncol(G)]) > 10000)&(colSums(is.na(lcounts)) == 0))) # remove samples with NA (not processed yet)
22+
s_ok <- (which(colSums(is.na(lcounts)) == 0)) # remove samples with NA (not processed yet)
3623
if (length(s_ok) > 1){
3724
## QUANTILES
3825
tp <- round(t(apply(lcounts[, s_ok], 2, quantile, probs = c(0.05,0.25,0.5,0.75,0.95), na.rm = T)), 2)
@@ -62,4 +49,3 @@ for (i in 1:2){
6249
NT <- cbind(snames[s_ok], NT)
6350
write.table(NT, file = paste(path, alg, "_pca", ".txt", sep=""), quote = F, row.names = F, sep = "\t")
6451

65-

lib/html_lib.py

Lines changed: 18 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -33,24 +33,18 @@ def get_menu(config, ns):
3333
menu.append('<h1>Alignment QC:</h1>')
3434
if enabled.has_key("picard"):
3535
menu.append('<h2><a #highlight="" href="./picard.html">- Picard</a></h2>')
36-
if enabled.has_key("picard_IS"):
37-
menu.append('<h2><a #highlight="" href="./picard-is.html">- Picard Insert Size</a></h2>')
3836
if enabled.has_key("star"):
3937
menu.append('<h2><a #highlight="" href="./star.html">- STAR</a></h2>')
40-
if enabled.has_key("kallisto"):
41-
menu.append('<h2><a #highlight="" href="./kallisto.html">- KALLISTO</a></h2>')
4238
if enabled.has_key("htseq-gene"):
4339
menu.append('<h2><a #highlight="" href="./htseq-gene.html">- HTseq-Gene</a></h2>')
4440
if enabled.has_key("htseq-exon"):
4541
menu.append('<h2><a #highlight="" href="./htseq-exon.html">- HTseq-Exon</a></h2>')
4642
if ns > 1:
47-
if enabled.has_key("star") or enabled.has_key("htseq-gene") or enabled.has_key("htseq-exon") or enabled.has_key("kallisto"):
43+
if enabled.has_key("star") or enabled.has_key("htseq-gene") or enabled.has_key("htseq-exon"):
4844
menu.append('<h1>Count statistics:</h1>')
4945
menu.append('<h2><a #highlight="" href="./downloads.html">- DOWNLOADS</a></h2>')
5046
if enabled.has_key("star"):
5147
menu.append('<h2><a #highlight="" href="./star2.html">- STAR</a></h2>')
52-
if enabled.has_key("kallisto"):
53-
menu.append('<h2><a #highlight="" href="./kallisto2.html">- KALLISTO</a></h2>')
5448
if enabled.has_key("htseq-gene"):
5549
menu.append('<h2><a href="./htseq-gene2.html">- HTseq-Gene</a></h2>')
5650
if enabled.has_key("htseq-exon"):
@@ -68,8 +62,8 @@ def get_menu(config, ns):
6862
return menu
6963

7064
def print_samples(path,config):
71-
analysis = ['trimgalore', 'fastqc', 'kallisto', 'star', 'star-fusion', 'picard', "htseq-gene", "htseq-exon", "picard_IS", "varscan", 'gatk']
72-
sta= {"trimgalore":"TrimGalore", "fastqc":"FastQC","star":"STAR","star-fusion":"STAR-Fusion","picard":"PicardQC","kallisto":"Kallisto","htseq-gene":"HTseq-gene","htseq-exon":"HTseq-exon", "picard_IS":"Picard-InsertSize", "varscan":"VARSCAN", "gatk":"GATK"}
65+
analysis = ['trimgalore', 'fastqc', 'kallisto', 'star', 'star-fusion', 'picard', "htseq-gene", "htseq-exon", "varscan", 'gatk']
66+
sta= {"trimgalore":"TrimGalore", "fastqc":"FastQC","star":"STAR","star-fusion":"STAR-Fusion","picard":"PicardQC","kallisto":"Kallisto","htseq-gene":"HTseq-gene","htseq-exon":"HTseq-exon", "varscan":"VARSCAN", "gatk":"GATK"}
7367
# SAMPLES LIST
7468
samples = dict()
7569
f = open(path + "/samples.list",'r')
@@ -154,16 +148,6 @@ def print_samples(path,config):
154148
else:
155149
res.append("FAIL")
156150
results["star-fusion"][x] = " / ".join(res)
157-
elif i=="picard_IS":
158-
for x, y in sorted(samples.iteritems()):
159-
res = []
160-
if sok.has_key(x):
161-
link = "../results_picard_IS/" + x + ".txt"
162-
link = '<a href="LINK" target="_blank">OK</a>'.replace("LINK", link)
163-
res.append(link)
164-
else:
165-
res.append("FAIL")
166-
results["picard_IS"][x] = " / ".join(res)
167151
elif i=="picard":
168152
for x, y in sorted(samples.iteritems()):
169153
res = []
@@ -346,7 +330,6 @@ def stats_picard(path,samples,config):
346330
if i+n[k] in files:
347331
f = open(path+"/results_picard"+"/"+i+n[k],'r')
348332
nx = 0
349-
kdiff0 = 0
350333
for ii in f:
351334
if ii.startswith("PF_BASES"):
352335
head = ii.rstrip().split("\t")
@@ -358,26 +341,17 @@ def stats_picard(path,samples,config):
358341
vals = ii.strip("\n").split("\t")
359342
for kk in range(len(head)):
360343
stats[k][head[kk]] = vals[kk]
361-
if vals[kk] != "":
362-
if float(vals[kk]) > 0:
363-
kdiff0 += 1
364344
nx = 0
365-
if kdiff0 == 0:
366-
ex = 1
367-
break
368345
f.close()
369346
else:
370347
ex = 1
371-
break
372348
if ex ==1:
373349
tr = "<td bgcolor='#CC3300'>"+i+"</td>"
374350
o = i
375351
for ii in range(18):
376352
tr += "<td bgcolor='#CC3300'>NA</td>"
377353
o += "\tNA"
378354
tr = "<tr>"+tr+"</tr>"
379-
table.append(tr)
380-
s = ["NA" for ii in range(10)]
381355
else:
382356
align = int(stats[0]["PF_ALIGNED_BASES"])
383357
cod = int(stats[0]["CODING_BASES"])
@@ -415,56 +389,27 @@ def stats_picard(path,samples,config):
415389
tr +="<td bgcolor='#99CC66'>0</td>"
416390
o += "\t0"
417391
tr = "<tr>"+tr+"</tr>"
418-
table.append(tr)
419-
o = o.split("\t")
420-
st= 0
421-
s = []
422-
for ind in [10, 11, 12, 5, 7, 3]:
423-
s.append(str(round(float(o[ind]) * float(o[2])/float(o[1]),3)))
424-
if ind != 3:
425-
st += float(o[ind]) * float(o[2])/float(o[1])
426-
for ind in [15,16,17,18]:
427-
if o[ind] != "0":
428-
s.append(str(round(float(o[ind]) * 100,3)))
429-
else:
430-
s.append("0")
431-
s[5] = str(round(100 - st,3))
392+
table.append(tr)
393+
o = o.split("\t")
394+
st= 0
395+
s = []
396+
for ind in [10, 11, 12, 5, 7, 3]:
397+
s.append(str(round(float(o[ind]) * float(o[2])/float(o[1]),3)))
398+
if ind != 3:
399+
st += float(o[ind]) * float(o[2])/float(o[1])
400+
for ind in [15,16,17,18]:
401+
if o[ind] != "0":
402+
s.append(str(round(float(o[ind]) * 100,3)))
403+
else:
404+
s.append("0")
405+
s[5] = str(round(100 - st,3))
432406
print >> out, i + "\t" + "\t".join(s)
433407
out.close()
434408
return "<table>"+"\n".join(table)+"</table>"
435409
else:
436410
return ""
437411

438412

439-
def stats_picard_2(path,samples,config):
440-
n = os.listdir(path)
441-
hh = "\t".join(['sample_id','MEDIAN_INSERT_SIZE','MEDIAN_ABSOLUTE_DEVIATION','MIN_INSERT_SIZE',
442-
'MAX_INSERT_SIZE','MEAN_INSERT_SIZE','STANDARD_DEVIATION','READ_PAIRS', 'LINK_TXT', 'LINK_PDF'])
443-
if config.has_key("picard_IS") and ("results_picard_IS" in n):
444-
files = os.listdir(path+"/results_picard_IS")
445-
out = open(path + "/outputs/stats_picard2.txt",'w')
446-
print >> out, hh
447-
data = list()
448-
for i in sorted(samples.keys()):
449-
if i + ".txt" in files:
450-
f = open(path+"/results_picard_IS"+"/"+i+".txt",'r')
451-
k = 0
452-
while (1):
453-
j = f.readline()
454-
if j.startswith("MEDIAN_INSERT_SIZE"):
455-
j = f.readline().strip("\n").split("\t")
456-
break
457-
k += 1
458-
if k > 10 or len(j) == 0:
459-
j = ['NA' for i in range(7)]
460-
break
461-
print >> out, "\t".join([i] + j + ['<a href="../results_picard_IS/' + i + '.txt" target="_blank">+</a>', '<a href="../results_picard_IS/' + i + '.pdf" target="_blank">+</a>'])
462-
f.close()
463-
else:
464-
print >> out, "\t".join([i] + ['NA' for i in range(9)])
465-
out.close()
466-
return 1
467-
468413
def skeleton(path, path2html):
469414
print "> Building HTML and OUTPUT folders skeletons..."
470415
print " - Path: " + path
@@ -562,7 +507,7 @@ def check_config(path):
562507
# Parses the configuration file
563508
print "> Parsing configuration file..."
564509
try:
565-
z = ["trimgalore", "fastqc", "star", "star-fusion", "picard", "htseq-gene", "htseq-exon", "kallisto", "picard_IS", "varscan", "gatk"]
510+
z = ["trimgalore", "fastqc", "star", "star-fusion", "picard", "htseq-gene", "htseq-exon", "kallisto", "varscan", "gatk"]
566511
f = open(path + "/config.txt", 'r')
567512
analysis = dict()
568513
analysis["cluster"] = dict()

lib/programs.py

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -366,28 +366,6 @@ def sam2sortbam(timestamp, path_base, folder, samples, nproc, wt, q):
366366
return submit_job_super("sam2sortbam", path_base + folder, wt, str(nproc), q, len(samples), bsub_suffix, nchild, timestamp)
367367

368368

369-
def picard_IS(timestamp, path_base, folder, samples, nproc, wt, q):
370-
output = "results_picard_IS"
371-
secure_mkdir(path_base + folder, output)
372-
print "## Picard-InsertSize"
373-
print "> Writing jobs for Picard InsertSize..."
374-
nproc, nchild, bsub_suffix = manager.get_bsub_arg(nproc, len(samples))
375-
commands = list()
376-
ksamp = sortbysize(samples)
377-
proc_files = os.listdir(path_base + folder + "/results_sam2sortbam/")
378-
for sample in ksamp:
379-
in_file = path_base + folder + "/results_sam2sortbam/" + sample + ".sorted.bam"
380-
if sample + ".sorted.bam" in proc_files:
381-
for i in range(len(config.nannots)):
382-
out_file = in_file.replace("results_sam2sortbam/", "results_picard_IS/").replace(".sorted.bam", "")
383-
call = "java -jar " + config.path_picard + "/CollectInsertSizeMetrics.jar I="+in_file+" O="+out_file+".txt H="+out_file+".pdf"
384-
commands.append(call + sample_checker.replace("#FOLDER", path_base + folder + "/results_picard_IS").replace("#SAMPLE", sample))
385-
else:
386-
print "Warning: [Picard] Sorted BAM file not found -> " + in_file
387-
create_scripts(nchild, commands, path_base, folder, output)
388-
return submit_job_super("picard_IS", path_base + folder, wt, str(nproc), q, len(samples), bsub_suffix, nchild, timestamp)
389-
390-
391369
def varscan(timestamp, path_base, folder, samples, nproc, wt, q, genome_build, args):
392370
ref = config.path_fasta.replace("#LABEL",genome_build)
393371
output = "results_varscan"

lib/spider_stats.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
import sys
23
import config
34

45
head_star_log = ["Sample", "Links","Started job","Started mapping","Finished","Mapping speed [Mr/h]","Input reads [n]","Input read length (mean)","Uniquely mapped [n]","Uniquely mapped [%]","Mapped length","Splices [n]","Splices annotated [n]","Splices GT/AG [n]","Splices: GC/AG [n]","Splices: AT/AC [n]","Splices: Non-canonical [n]","Mismatch rate per base [%]","Deletion rate per base [%]","Deletion average length","Insertion rate per base [%]","Insertion average length","Multimapping reads [n]","Multimapping reads [%]","Multimapping reads (+) [n]","Multimapping reads (+) [%]","Unmapped reads: too many mismatches [%]","Unmapped reads: too short [%]","Unmapped reads: other [%]"]
@@ -522,7 +523,7 @@ def stats_star(path, samples):
522523
r = list()
523524
for sample in samples:
524525
if N[i].has_key(sample):
525-
if (len(N[i][sample]) > j) and (MT[sample] > 0):
526+
if len(N[i][sample]) > j:
526527
s = str(round(float(N[i][sample][j]) * 1000000000 / (float(L[ng[j]]) * MT[sample]),4))
527528
else:
528529
s = "NA"

lib/vcrparser.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,14 +55,14 @@ def project_process(path_base, folder):
5555
for i in f:
5656
if not i.startswith("%"):
5757
i = i.strip("\n").split("\t")
58-
if i[0] in ["trimgalore", "fastqc", "kallisto", "star", "star-fusion", "picard", "htseq-gene", "htseq-exon", "picard_IS", "varscan", "gatk"]:
58+
if i[0] in ["trimgalore", "fastqc", "kallisto", "star", "star-fusion", "picard", "htseq-gene", "htseq-exon", "varscan", "gatk"]:
5959
i[1] = i[1].split("/")[0]
6060
if i[1] != "0":
6161
config[i[0]] = i[1]
62-
if config.has_key("varscan") or config.has_key("gatk") or config.has_key("picard_IS") :
62+
if config.has_key("varscan") or config.has_key("gatk"):
6363
config["sam2sortbam"] = 1
6464
if len(config) > 0:
65-
for pg in ["trimgalore", "fastqc", "kallisto", "star", "star-fusion", "picard", "htseq-gene", "htseq-exon", "sam2sortbam", "picard_IS", "varscan", "gatk"]:
65+
for pg in ["trimgalore", "fastqc", "kallisto", "star", "star-fusion", "picard", "htseq-gene", "htseq-exon", "sam2sortbam", "varscan", "gatk"]:
6666
if config.has_key(pg):
6767
print "Process: " + pg
6868
if not pids.has_key(pg):

0 commit comments

Comments
 (0)