Skip to content

Commit 5618e5f

Browse files
committed
keeping track of changes
1 parent 526b32d commit 5618e5f

2 files changed

Lines changed: 33 additions & 27 deletions

File tree

bin/RAILS

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,25 +18,25 @@
1818
#
1919
#
2020
# LICENSE
21-
# LINKS, RAILS and Cobbler Copyright (c) 2014-2016 Canada's Michael Smith Genome Science Centre. All rights reserved.
21+
# LINKS, RAILS and Cobbler Copyright (c) 2014-2017 Canada's Michael Smith Genome Science Centre. All rights reserved.
2222

2323
use strict;
2424
use Getopt::Std;
2525
use Net::SMTP;
2626
use vars qw($opt_f $opt_s $opt_d $opt_i $opt_e $opt_l $opt_a $opt_v $opt_b $opt_t $opt_p $opt_q);
2727
getopts('f:s:d:e:l:a:v:b:t:p:i:q:');
28-
my ($base_name,$frag_dist,$seqid,$insert_stdev,$min_links,$max_link_ratio,$verbose)=("",1000,0.9,1.0,1,0.0,0);
28+
my ($base_name,$frag_dist,$seqid,$insert_stdev,$min_links,$max_link_ratio,$verbose)=("",250,0.9,1.0,1,0.0,0);
2929

30-
my $version = "[v1.1]";
30+
my $version = "[v1.2]";
3131
my $dev = "rwarren\@bcgsc.ca";
32-
32+
my $SAMPATH = "/gsc/btl/linuxbrew/bin/samtools";
3333
#-------------------------------------------------
3434

3535
if(! $opt_f || ! $opt_s || ! $opt_q){
3636
print "Usage: $0 $version\n";
37-
print "-f Assembled Sequences to further scaffold (Multi-Fasta format, required)\n";
38-
print "-q Long Sequences queried (Multi-Fasta format, required)\n";
39-
print "-s SAM file\n";
37+
print "-f Assembled Sequences to further scaffold (Multi-FASTA format NO LINE BREAKS, required)\n";
38+
print "-q Long Sequences queried (Multi-FASTA format NO LINE BREAKS, required)\n";
39+
print "-s BAM file (use v1.1 for reading SAM files)\n";
4040
print "-d Anchoring bases on contig edges (ie. minimum required alignment size on contigs, default -d $frag_dist, optional)\n";
4141
print "-i Minimum sequence identity, default -i $seqid, optional\n";
4242
print "-t LIST of names/header, long sequences to avoid using for merging/gap-filling scaffolds (optional)\n";
@@ -74,7 +74,7 @@ if(! -e $longfile){
7474
### Naming output files
7575
if ($base_name eq ""){
7676

77-
$base_name = $file . ".scaff_s-" . $longfile . "_d" . $frag_dist . "_i" . $seqid . "_e" . $insert_stdev . "_l" . $min_links . "_a" . $max_link_ratio . "_t" . $listfile;
77+
$base_name = $file . ".scaff_s-" . $longfile . "_q-" . $queryfile . "_d" . $frag_dist . "_i" . $seqid . "_t" . $listfile;
7878

7979
my $pid_num = getpgrp(0);
8080
$base_name .= "_pid" . $pid_num;
@@ -165,11 +165,11 @@ my $numgaps = $#arrsg+1;
165165
printf $final_message, ($numgaps,$avg,$sd,$sum,$max,$min);
166166
printf LOG $final_message, ($numgaps,$avg,$sd,$sum,$max,$min);
167167

168-
$assemblyruninfo .= "done: $date\n\n--------------- $0 Summary ---------------\nNumber of gaps patched : $numgaps\nAverage length (bp) : $avg\nLength st.dev +/- : $sd\nTotal bases added : $sum\nLargest gap resolved (bp) : $max\nShortest gap resolved (bp) : $min\n---------------------------------------------\n";
168+
$assemblyruninfo .= "done: $date\n\n--------------- $0 Summary ---------------\nNumber of merges : $numgaps\nAverage closed gap length (bp) : $avg\nClosed gap length st.dev +/- : $sd\nTotal bases added : $sum\nLargest gap resolved (bp) : $max\nShortest gap resolved (bp) : $min\n---------------------------------------------\n";
169169

170170
close LOG;
171171

172-
exit;
172+
#exit;
173173

174174
###for dev. test purposes
175175
eval{
@@ -292,7 +292,9 @@ sub readSam{
292292
my %rlength = ();
293293
my $min=1;
294294

295-
open(IN,$samfile) || die "Error reading $samfile -- fatal.\n";
295+
my $ERRLOG = $samfile.".bampreprocessor.err.log".$$.time();
296+
my $cmd = "$SAMPATH view $samfile 2>$ERRLOG|";
297+
open(IN,$cmd) || die "Error reading $samfile -- fatal.\n";
296298
while(<IN>){
297299

298300
chomp;

bin/cobbler.pl

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,25 +18,26 @@
1818
#
1919

2020
#LICENSE
21-
# LINKS, RAILS and Cobbler Copyright (c) 2014-2016 Canada's Michael Smith Genome Science Centre. All rights reserved.
21+
# LINKS, RAILS and Cobbler Copyright (c) 2014-2017 Canada's Michael Smith Genome Science Centre. All rights reserved.
2222

2323
use strict;
2424
use Getopt::Std;
2525
use Net::SMTP;
2626
use vars qw($opt_f $opt_s $opt_d $opt_i $opt_v $opt_b $opt_t $opt_q);
2727
getopts('f:s:d:v:b:t:i:q:');
28-
my ($base_name,$frag_dist,$seqid,$verbose)=("",1000,0.9,0);
28+
my ($base_name,$frag_dist,$seqid,$verbose)=("",250,0.9,0);
2929

30-
my $version = "[v0.2]";
30+
my $version = "[v0.3]";
3131
my $dev = "rwarren\@bcgsc.ca";
32+
my $SAMPATH = "/gsc/btl/linuxbrew/bin/samtools";
3233

3334
#-------------------------------------------------
3435

3536
if(! $opt_f || ! $opt_s || ! $opt_q){
3637
print "Usage: $0 $version\n";
37-
print "-f Assembled Sequences to further scaffold (Multi-Fasta format, required)\n";
38-
print "-q Long Sequences queried (Multi-Fasta format, required)\n";
39-
print "-s SAM file\n";
38+
print "-f Assembled Sequences to further scaffold (Multi-FASTA format NO LINE BREAKS, required)\n";
39+
print "-q Long Sequences queried (Multi-FASTA format NO LINE BREAKS, required)\n";
40+
print "-s BAM file (use v0.2 for reading SAM files)\n";
4041
print "-d Anchoring bases on contig edges (ie. minimum required alignment size on contigs, default -d $frag_dist, optional)\n";
4142
print "-i Minimum sequence identity, default -i $seqid, optional\n";
4243
print "-t LIST of names/header, long sequences to avoid using for merging/gap-filling scaffolds (optional)\n";
@@ -67,7 +68,7 @@
6768
### Naming output files
6869
if ($base_name eq ""){
6970

70-
$base_name = $file . ".scaff_s-" . $longfile . "_d" . $frag_dist . "_i" . $seqid . "_t" . $listfile;
71+
$base_name = $file . ".scaff_s-" . $longfile . "_q-" . $queryfile . "_d" . $frag_dist . "_i" . $seqid . "_t" . $listfile;
7172

7273
my $pid_num = getpgrp(0);
7374
$base_name .= "_pid" . $pid_num;
@@ -113,21 +114,22 @@
113114
print $patchmsg;
114115
print LOG $patchmsg;
115116
$assemblyruninfo.=$patchmsg;
116-
my $gsl = &patchGaps($file,$tigpair,$newassemblyfile,$tsvfile);
117+
my ($gsl,$totalgap) = &patchGaps($file,$tigpair,$newassemblyfile,$tsvfile);
117118

118119
my $date = `date`;
119120
chomp($date);
120121
my ($avg,$sum,$max,$min) = &average($gsl);
121122
my $sd = &stdev($gsl);
122-
my $final_message = "done: $date\n\n--------------- $0 Summary ---------------\nNumber of gaps patched : %i\nAverage length (bp) : %.2f\nLength st.dev +/- : %.2f\nTotal bases added : %i\nLargest gap resolved (bp) : %i\nShortest gap resolved (bp) : %i\n---------------------------------------------\n";
123+
my $final_message = "done: $date\n\n--------------- $0 Summary ---------------\nNumber of gaps patched : %i out of %i (%.2f %%)\nAverage length (bp) : %.2f\nLength st.dev +/- : %.2f\nTotal bases added : %i\nLargest gap resolved (bp) : %i\nShortest gap resolved (bp) : %i\n---------------------------------------------\n";
123124
my @arrsg=@$gsl;
124125
my $numgaps = $#arrsg+1;
125-
printf $final_message, ($numgaps,$avg,$sd,$sum,$max,$min);
126-
printf LOG $final_message, ($numgaps,$avg,$sd,$sum,$max,$min);
126+
my $percentclosed = $numgaps / $totalgap *100;
127+
printf $final_message, ($numgaps,$totalgap,$percentclosed,$avg,$sd,$sum,$max,$min);
128+
printf LOG $final_message, ($numgaps,$totalgap,$percentclosed,$avg,$sd,$sum,$max,$min);
127129

128-
$assemblyruninfo .= "done: $date\n\n--------------- $0 Summary ---------------\nNumber of gaps patched : $numgaps\nAverage length (bp) : $avg\nLength st.dev +/- : $sd\nTotal bases added : $sum\nLargest gap resolved (bp) : $max\nShortest gap resolved (bp) : $min\n---------------------------------------------\n";
130+
$assemblyruninfo .= "done: $date\n\n--------------- $0 Summary ---------------\nNumber of gaps patched : $numgaps out of $totalgap ($percentclosed %) \nAverage length (bp) : $avg\nLength st.dev +/- : $sd\nTotal bases added : $sum\nLargest gap resolved (bp) : $max\nShortest gap resolved (bp) : $min\n---------------------------------------------\n";
129131

130-
exit;
132+
#exit;
131133

132134
###for dev. test purposes
133135
eval{
@@ -150,7 +152,7 @@
150152
#-----------------
151153
sub readSeqMemory{
152154

153-
my $file = shift;
155+
my $file = shift;
154156

155157
my $fh;
156158
my $prev="NA";
@@ -252,7 +254,7 @@ sub patchGaps{
252254
print "$endmessage";
253255
$assemblyruninfo .= $endmessage;
254256

255-
return \@gapspatched;
257+
return \@gapspatched,$totalgap;
256258
}
257259

258260
#---------------
@@ -299,7 +301,9 @@ sub readSam{
299301
my %rlength = ();
300302
my $min=1;
301303

302-
open(IN,$samfile) || die "Error reading $samfile -- fatal.\n";
304+
my $ERRLOG = $samfile.".bampreprocessor.err.log".$$.time();
305+
my $cmd = "$SAMPATH view $samfile 2>$ERRLOG|";
306+
open(IN,$cmd) || die "Error reading $samfile -- fatal.\n";
303307
while(<IN>){
304308

305309
chomp;

0 commit comments

Comments
 (0)