Skip to content

Commit 4acfdac

Browse files
committed
Merge branch 'release/v2.5.0'
2 parents 5b9262e + 3fd0314 commit 4acfdac

7 files changed

Lines changed: 230 additions & 28 deletions

File tree

MANIFEST

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ bin/bam_to_sra_sub.pl
33
bin/bamToBw.pl
44
bin/bwa_aln.pl
55
bin/bwa_mem.pl
6+
bin/detectExtremeDepth.pl
67
bin/diff_bams.pl
78
bin/gnos_pull.pl
89
bin/monitor.pl

Makefile.PL

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
##########LICENCE##########
44
# PCAP - NGS reference implementations and helper code for the ICGC/TCGA Pan-Cancer Analysis Project
5-
# Copyright (C) 2014 ICGC PanCancer Project
5+
# Copyright (C) 2014-2016 ICGC PanCancer Project
66
#
77
# This program is free software; you can redistribute it and/or
88
# modify it under the terms of the GNU General Public License
@@ -33,6 +33,7 @@ WriteMakefile(
3333
bin/bwa_aln.pl
3434
bin/bwa_mem.pl
3535
bin/bam_stats.pl
36+
bin/detectExtremeDepth.pl
3637
bin/diff_bams.pl
3738
bin/monitor.pl
3839
bin/xml_to_bas.pl
@@ -55,8 +56,6 @@ WriteMakefile(
5556
'Proc::ProcessTable' => 0.50,
5657
'Data::UUID' => 1.219,
5758
'Test::Fatal' => 0.013,
58-
'GD' => 2.52,
59-
'Math::Gradient' => 0.04,
6059
'Devel::Cover' => 1.09,
6160
'Pod::Coverage' => 0.23,
6261
'Term::UI' => 0.42, # currently in core but scheduled for removal 5.17, no alternative recommended

bin/detectExtremeDepth.pl

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
#!/usr/bin/perl
2+
3+
##########LICENCE##########
4+
# PCAP - NGS reference implementations and helper code for the ICGC/TCGA Pan-Cancer Analysis Project
5+
# Copyright (C) 2014-2016 ICGC PanCancer Project
6+
#
7+
# This program is free software; you can redistribute it and/or
8+
# modify it under the terms of the GNU General Public License
9+
# as published by the Free Software Foundation; either version 2
10+
# of the License, or (at your option) any later version.
11+
#
12+
# This program is distributed in the hope that it will be useful,
13+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
# GNU General Public License for more details.
16+
#
17+
# You should have received a copy of the GNU General Public License
18+
# along with this program; if not see:
19+
# http://www.gnu.org/licenses/gpl-2.0.html
20+
##########LICENCE##########
21+
22+
use Cwd qw(abs_path);
23+
use strict;
24+
use English qw( -no_match_vars );
25+
use warnings FATAL => 'all';
26+
27+
use File::Basename;
28+
use Carp;
29+
use Getopt::Long;
30+
use Pod::Usage;
31+
32+
use PCAP;
33+
34+
use Bio::DB::BigWig 'binMean','binStdev';
35+
36+
my %chr_stats;
37+
my @chr_order;
38+
39+
{
40+
my $options = option_builder();
41+
42+
my $wig = Bio::DB::BigWig->new(-bigwig=>$options->{'b'});
43+
my @chroms = $wig->features(-type=>'summary');
44+
45+
for my $c (@chroms) {
46+
my $seqid = $c->seq_id;
47+
next if(defined $options->{'r'} && $options->{'r'} ne $seqid && 'chr'.$options->{'r'} ne $seqid);
48+
my $start = $c->start;
49+
50+
my $stats = $c->statistical_summary(1);
51+
my $bin_width = $c->length/@$stats;
52+
53+
my $s = shift @{$stats};
54+
55+
my $mean = binMean($s);
56+
my $stdev = binStdev($s);
57+
my $end = $start + $bin_width-1;
58+
59+
push @chr_order, $seqid;
60+
$chr_stats{$seqid} = {'mean' => binMean($s),
61+
'stdev' => binStdev($s)};
62+
warn sprintf "%s: mean %.2f, stdev %.2f\n", $seqid, $chr_stats{$seqid}{'mean'}, $chr_stats{$seqid}{'stdev'};
63+
}
64+
65+
open my $OFH, '>', $options->{'o'} or die "Failed to create $options->{o}: $!\n";
66+
for my $chr(@chr_order) {
67+
my $max_val = $chr_stats{$chr}{'mean'} + ($chr_stats{$chr}{'stdev'} * $options->{'s'});
68+
warn sprintf "%s: Max depth permitted = %d\n", $chr, $max_val;
69+
my $iterator = $wig->get_seq_stream(-seq_id=> $chr);
70+
while (my $p = $iterator->next_seq) {
71+
next if($p->score <= $max_val);
72+
printf $OFH "%s\t%d\t%d\t%d\n", $chr, $p->start-1, $p->end, $p->score;
73+
}
74+
}
75+
close $OFH;
76+
}
77+
78+
sub option_builder {
79+
my ($factory) = @_;
80+
81+
my %opts;
82+
83+
&GetOptions (
84+
'h|help' => \$opts{'h'},
85+
'b|bigwig=s' => \$opts{'b'},
86+
'o|output=s' => \$opts{'o'},
87+
'r|ref=s' => \$opts{'r'},
88+
'd|decode=s@' => \$opts{'d'},
89+
's|sd=n' => \$opts{'s'},
90+
'v|version' => \$opts{'v'},
91+
);
92+
93+
if(defined $opts{'v'}) {
94+
print PCAP->VERSION,"\n";
95+
exit 0;
96+
}
97+
98+
pod2usage(0) if($opts{'h'});
99+
100+
pod2usage(1) if(!$opts{'b'} || !$opts{'o'});
101+
102+
croak $opts{'b'}.' was not found or is empty' if(!-e $opts{'b'} || !-s $opts{'b'});
103+
104+
if($opts{'d'}) {
105+
if(!$opts{'r'}) {
106+
croak '-d should not be defined without -r';
107+
}
108+
my %decode;
109+
foreach my $d_str(@{$opts{'d'}}) {
110+
if($d_str =~ m/^(\d+)\:(.*)$/) {
111+
my $num = $1;
112+
my $chr = $2;
113+
$decode{$num} = $chr;
114+
}
115+
else {
116+
croak "Decode string of $d_str is invalid see --help";
117+
}
118+
}
119+
if(defined $decode{$opts{'r'}}) {
120+
$opts{'r'} = $decode{$opts{'r'}};
121+
}
122+
}
123+
124+
my $fn = fileparse($opts{'b'});
125+
$fn =~ s/\.bw$//;
126+
$opts{'o'} .= '/' if($opts{'o'} !~ m/\/$/);
127+
$opts{'o'} .= $fn;
128+
if($opts{'r'}) {
129+
$opts{'o'} .= '.'.$opts{'r'};
130+
}
131+
$opts{'o'} .= '.bed';
132+
133+
if(!$opts{'s'}) {
134+
$opts{'s'} = 12;
135+
}
136+
137+
return \%opts;
138+
}
139+
140+
__END__
141+
142+
=head1 NAME
143+
144+
detectExtremeDepth.pl - Generate profile of BigWig file and identify regions outside the normal range
145+
146+
=head1 SYNOPSIS
147+
148+
General Options (list OR project must be defined):
149+
150+
--bigwig (-b) FILE BigWig file path
151+
--output (-o) DIR Folder to send output to
152+
- named as input file with '.tab' extension
153+
- if '-r' defined '.{val}' will prefix '.bed'
154+
155+
Optional:
156+
--ref (-r) STR Restrict to this reference (mainly for testing)
157+
- without 'chr' prefix, will test with and without the 'chr' for you.
158+
--decode (-d) STR Decode -r to chromosome names (do not include 'chr')
159+
e.g. -d 23:X -d 24:Y -d 25:MT
160+
--sd (-s) INT Number of standard deviations above mean for group to be included [12]
161+
--help (-h) This message
162+
--version (-v) Version
163+
164+
Examples:
165+
perl ~/detectExtremeDepth.pl -o someplace -b sample.bw
166+
167+
=cut

docs.tar.gz

1.75 KB
Binary file not shown.

lib/PCAP.pm

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ const my %UPGRADE_PATH => ( # all earlier versions need full upgrade
8080
'2.2.1' => '',
8181
'2.3.0' => '',
8282
'2.4.0' => '',
83+
'2.5.0' => '',
8384
);
8485

8586
sub license {

lib/PCAP/Bam/Stats.pm

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,18 @@ use Const::Fast qw( const );
3131
use Try::Tiny;
3232
use File::Basename;
3333

34-
use Math::Gradient;
3534
use List::Util qw(sum sum0 first);
3635
use Bio::DB::HTS;
37-
use GD::Image;
3836
use JSON;
3937

38+
my $plots_available = 0;
39+
eval {
40+
require Math::Gradient;
41+
require GD::Image;
42+
$plots_available = 1;
43+
1;
44+
};
45+
4046
const my $PAIRED => 1; # not needed
4147
const my $PROPER => 2;
4248
const my $UNMAPPED => 4;
@@ -75,7 +81,11 @@ sub init{
7581

7682
my $path = $args{-path};
7783
my $sam;
78-
my $q_scoring = $args{-qscoring};
84+
my $q_scoring = $args{-qscoring} ? 1 : 0;
85+
if($q_scoring && $plots_available == 0) {
86+
$q_scoring = 0;
87+
warn "WARN: quality plots have been disabled as Math::Gradient and GD::Image were not found\n";
88+
}
7989

8090
unless ($sam && ref $sam eq 'Bio::DB::HTS'){
8191
$sam = Bio::DB::HTS->new(-bam => $path);
@@ -89,10 +99,10 @@ sub init{
8999
}
90100

91101
my $groups = _parse_header($sam);
92-
_process_reads($groups,$sam,$q_scoring, $mod, $rem) unless(defined $args{-no_proc});
93102
$self->{_file_path} = $path;
94-
$self->{_qualiy_scoring} = $q_scoring ? 1 : 0;
103+
$self->{_qualiy_scoring} = $q_scoring;
95104
$self->{_groups} = $groups;
105+
_process_reads($groups,$sam,$q_scoring, $mod, $rem) unless(defined $args{-no_proc});
96106
}
97107

98108
sub merge_json_stats {
@@ -147,7 +157,6 @@ sub _parse_header {
147157

148158
sub _process_reads {
149159
my ($groups, $sam, $qualiy_scoring, $mod, $rem) = @_;
150-
151160
my $bam = $sam->hts_file;
152161
my $header = $bam->header_read;
153162
my $processed_x = 0;
@@ -675,8 +684,13 @@ sub fqplots {
675684
for my $rg(keys %{$groups}) {
676685
for my $read(1..2) {
677686
my $plot_vals = $groups->{$rg}->{'fqp_'.$read};
678-
down_pop_quals($plot_vals); # adds mem bloat as undef values are all filled
679-
fastq2image($output_dir_path, $plot_vals, $rg, $read, $groups->{$rg}->{'length_'.$read}, $groups->{$rg}->{'count_'.$read});
687+
if($plot_vals) {
688+
down_pop_quals($plot_vals); # adds mem bloat as undef values are all filled
689+
fastq2image($output_dir_path, $plot_vals, $rg, $read, $groups->{$rg}->{'length_'.$read}, $groups->{$rg}->{'count_'.$read});
690+
}
691+
elsif($rg ne q{.}) {
692+
warn "WARN: No plot_vals found for RG '$rg'\n";
693+
}
680694
delete $groups->{$rg}->{'fqp_'.$read};
681695
}
682696
}
@@ -754,7 +768,7 @@ sub fastq2image {
754768
my $value;
755769
while (scalar @{$values}) {
756770
if ($cycle_count == 1 && ($quality == 1 || ($quality > 1 && $quality % 5 == 0))) {
757-
$im->string(GD::gdSmallFont, 25, $y1-5, $quality, $black);
771+
$im->string(GD::Font->Small, 25, $y1-5, $quality, $black);
758772
}
759773
$y2 = $y1 + $shift;
760774
$im->filledRectangle($x1,$y1,$x2,$y2, $colours->{int ((pop @{$values}) / $read_pct)});
@@ -763,7 +777,7 @@ sub fastq2image {
763777
}
764778

765779
if ($cycle_count == 1 || $cycle_count % 5 == 0) {
766-
$im->string(GD::gdSmallFont, $x1, $y1+5, $cycle_count, $black);
780+
$im->string(GD::Font->Small, $x1, $y1+5, $cycle_count, $black);
767781
}
768782
$x1 = $x2;
769783
}
@@ -775,9 +789,9 @@ sub fastq2image {
775789

776790
my $start_xaxis_label = (int $num_cycles*$shift/2) - 40;
777791
if ($start_xaxis_label < 0) { $start_xaxis_label = 0; }
778-
$im->string(GD::gdSmallFont, $start_xaxis_label, $y1+20, $xaxis_label, $black);
792+
$im->string(GD::Font->Small, $start_xaxis_label, $y1+20, $xaxis_label, $black);
779793

780-
$im->stringUp(GD::gdSmallFont, 5, $height/2, q[Quality], $black);
794+
$im->stringUp(GD::Font->Small, 5, $height/2, q[Quality], $black);
781795

782796
open my $PNG, '>', $out_file;
783797
binmode($PNG);

setup.sh

Lines changed: 33 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ SOURCE_HTSLIB="https://github.com/samtools/htslib/archive/1.3.1.tar.gz"
99

1010
# for bigwig
1111
SOURCE_JKENT_BIN="https://github.com/ENCODE-DCC/kentUtils/raw/master/bin/linux.x86_64"
12+
# for Bio::DB::BigWig
13+
SOURCE_KENTSRC="http://hgdownload.cse.ucsc.edu/admin/jksrc.zip"
14+
# for fast merging of per-chr BW files
1215
SOURCE_LIB_BW="https://github.com/dpryan79/libBigWig/archive/0.1.6.tar.gz"
1316

1417
# for biobambam
@@ -191,21 +194,21 @@ else
191194
fi
192195

193196
if [[ ",$COMPILE," == *,biobambam,* ]] ; then
194-
echo -n "Building biobambam ..."
195-
if [ -e $SETUP_DIR/biobambam.success ]; then
196-
echo " previously installed ..."
197+
echo -n "Building biobambam2 ..."
198+
if [ -e $SETUP_DIR/biobambam2.success ]; then
199+
echo " previously installed2 ..."
197200
else
198201
cd $SETUP_DIR
199-
get_distro "biobambam" $SOURCE_BBB_BIN_DIST
200-
mkdir -p biobambam
201-
tar --strip-components 1 -C biobambam -zxf biobambam.tar.gz
202+
get_distro "biobambam2" $SOURCE_BBB_BIN_DIST
203+
mkdir -p biobambam2
204+
tar --strip-components 1 -C biobambam2 -zxf biobambam2.tar.gz
202205
mkdir -p $INST_PATH/bin $INST_PATH/etc $INST_PATH/lib $INST_PATH/share
203-
rm -f biobambam/bin/curl # don't let this file in SSL doesn't work
204-
cp -r biobambam/bin/* $INST_PATH/bin/.
205-
cp -r biobambam/etc/* $INST_PATH/etc/.
206-
cp -r biobambam/lib/* $INST_PATH/lib/.
207-
cp -r biobambam/share/* $INST_PATH/share/.
208-
touch $SETUP_DIR/biobambam.success
206+
rm -f biobambam2/bin/curl # don't let this file in SSL doesn't work
207+
cp -r biobambam2/bin/* $INST_PATH/bin/.
208+
cp -r biobambam2/etc/* $INST_PATH/etc/.
209+
cp -r biobambam2/lib/* $INST_PATH/lib/.
210+
cp -r biobambam2/share/* $INST_PATH/share/.
211+
touch $SETUP_DIR/biobambam2.success
209212
echo
210213
fi
211214
else
@@ -239,7 +242,7 @@ cd $INIT_DIR
239242
if [[ ",$COMPILE," == *,samtools,* ]] ; then
240243
echo -n "Building Bio::DB::HTS ..."
241244
if [ -e $SETUP_DIR/biohts.success ]; then
242-
echo -n " previously installed ...";
245+
echo " previously installed ...";
243246
else
244247
cd $SETUP_DIR
245248
$CPANM --mirror http://cpan.metacpan.org --notest -l $INST_PATH Module::Build Bio::Root::Version
@@ -254,6 +257,23 @@ else
254257
echo "Bio::DB::HTS - No change between PCAP versions" # based on samtools tag
255258
fi
256259

260+
echo -n "Building kentsrc + Bio::DB::BigFile ..."
261+
if [ -e $SETUP_DIR/kentsrc.success ]; then
262+
echo " previously installed ...";
263+
else
264+
cd $SETUP_DIR
265+
get_distro "kentsrc" $SOURCE_KENTSRC
266+
unzip -q kentsrc.zip
267+
perl -pi -e 's/(\s+CFLAGS=)$/${1}-fPIC/' kent/src/inc/common.mk
268+
cd kent/src/lib
269+
export MACHTYPE=i686 # for a 64-bit system
270+
make
271+
cd ../
272+
export KENT_SRC=`pwd`
273+
cd $SETUP_DIR
274+
$CPANM --mirror http://cpan.metacpan.org -l $INST_PATH Bio::DB::BigFile
275+
fi
276+
257277
cd $INIT_DIR
258278

259279
echo -n "Installing Perl prerequisites ..."

0 commit comments

Comments
 (0)